FMS
2024.03
Flexible Modeling System
|
This module defines interfaces for common operations using message-passing libraries. Any type-less arguments in the documentation are MPP_TYPE_ which is defined by the pre-processor to create multiple subroutines out of one implementation for use in an interface. See the note below for more information. More...
Data Types | |
interface | array_to_char |
Takes a given integer or real array and returns it as a string. More... | |
type | clock |
a clock contains an array of event profiles for a region More... | |
type | clock_data_summary |
Summary of information from a clock run. More... | |
type | communicator |
Communication information for message passing libraries. More... | |
type | event |
Communication event profile. More... | |
interface | mpp_alltoall |
Scatter a vector across all PEs. More... | |
interface | mpp_broadcast |
Perform parallel broadcasts. More... | |
interface | mpp_chksum |
Calculate parallel checksums. More... | |
interface | mpp_error |
Error handler. More... | |
interface | mpp_gather |
Gather data sent from pelist onto the root pe Wrapper for MPI_gather, can be used with and without indices. More... | |
interface | mpp_max |
Reduction operations. Find the max of scalar a from the PEs in pelist result is also automatically broadcast to all PEs. More... | |
interface | mpp_min |
Reduction operations. Find the min of scalar a from the PEs in pelist result is also automatically broadcast to all PEs. More... | |
interface | mpp_recv |
Recieve data from another PE. More... | |
interface | mpp_scatter |
Scatter (ie - is) * (je - js) contiguous elements of array data from the designated root pe into contigous members of array segment in each pe that is included in the pelist argument. More... | |
interface | mpp_send |
Send data to a receiving PE. More... | |
interface | mpp_sum |
Reduction operation. More... | |
interface | mpp_sum_ad |
Calculates sum of a given numerical array across pe's for adjoint domains. More... | |
interface | mpp_transmit |
Basic message-passing call. More... | |
type | mpp_type |
Data types for generalized data transfer (e.g. MPI_Type) More... | |
interface | mpp_type_create |
Create a mpp_type variable. More... | |
type | mpp_type_list |
Persisent elements for linked list interaction. More... | |
type | summary_struct |
holds name and clock data for use in mpp_util.h More... | |
Functions/Subroutines | |
subroutine | clock_init (id, name, flags, grain) |
subroutine | dump_clock_summary () |
subroutine | expand_peset () |
This routine will double the size of peset and copy the original peset data into the expanded one. The maximum allowed to expand is PESET_MAX. | |
integer function | find_bin (event_size) |
integer function | get_ascii_file_num_lines (FILENAME, LENGTH, PELIST) |
integer function, dimension(2) | get_ascii_file_num_lines_and_length (FILENAME, PELIST) |
Function to determine the maximum line length and number of lines from an ascii file. More... | |
integer function | get_unit () |
character(len=256) function | iarray_to_char (iarray) |
subroutine | increment_current_clock (event_id, bytes) |
character(len=len(cs)) function, target | lowercase (cs) |
subroutine | mpp_broadcast_2d_ (broadcast_data, length, from_pe, pelist) |
subroutine | mpp_broadcast_3d_ (broadcast_data, length, from_pe, pelist) |
subroutine | mpp_broadcast_4d_ (broadcast_data, length, from_pe, pelist) |
subroutine | mpp_broadcast_5d_ (broadcast_data, length, from_pe, pelist) |
subroutine | mpp_broadcast_char (char_data, length, from_pe, pelist) |
Broadcasts a character string from the given pe to it's pelist. More... | |
subroutine | mpp_broadcast_scalar_ (broadcast_data, from_pe, pelist) |
subroutine | mpp_clock_begin (id) |
subroutine | mpp_clock_end (id) |
integer function | mpp_clock_id (name, flags, grain) |
Return an ID for a new or existing clock. | |
subroutine | mpp_clock_set_grain (grain) |
Set the level of granularity of timing measurements. More... | |
subroutine | mpp_declare_pelist (pelist, name, commID) |
Declare a pelist. More... | |
subroutine | mpp_error_basic (errortype, errormsg) |
A very basic error handler uses ABORT and FLUSH calls, may need to use cpp to rename. | |
subroutine | mpp_error_ia (errortype, errormsg1, array, errormsg2) |
subroutine | mpp_error_is (errortype, errormsg1, mpp_ival, errormsg2) |
subroutine | mpp_error_mesg (routine, errormsg, errortype) |
overloads to mpp_error_basic, support for error_mesg routine in FMS | |
subroutine | mpp_error_noargs () |
subroutine | mpp_error_ra (errortype, errormsg1, array, errormsg2) |
subroutine | mpp_error_rs (errortype, errormsg1, mpp_rval, errormsg2) |
integer function | mpp_error_state () |
subroutine | mpp_exit () |
Finalizes process termination. To be called at the end of a run. Certain mpi implementations(openmpi) will fail if this is not called before program termination. | |
subroutine | mpp_get_current_pelist (pelist, name, commID) |
character(len=len(peset(current_peset_num)%name)) function | mpp_get_current_pelist_name () |
subroutine | mpp_init (flags, localcomm, test_level, alt_input_nml_path) |
Initialize the mpp_mod module. Must be called before any usage. More... | |
subroutine | mpp_init_logfile () |
subroutine | mpp_init_warninglog () |
Opens the warning log file, called during mpp_init. | |
integer function | mpp_npes () |
Returns processor count for current pelist. More... | |
integer function | mpp_pe () |
Returns processor ID. More... | |
subroutine | mpp_record_time_end () |
subroutine | mpp_record_time_start () |
subroutine | mpp_recv_ (get_data, get_len, from_pe, block, tag, request) |
subroutine | mpp_recv_2d_ (get_data, get_len, from_pe, block, tag, request) |
subroutine | mpp_recv_3d_ (get_data, get_len, from_pe, block, tag, request) |
subroutine | mpp_recv_4d_ (get_data, get_len, from_pe, block, tag, request) |
subroutine | mpp_recv_5d_ (get_data, get_len, from_pe, block, tag, request) |
subroutine | mpp_recv_scalar_ (get_data, from_pe, glen, block, tag, request) |
integer function | mpp_root_pe () |
subroutine | mpp_send_ (put_data, put_len, to_pe, tag, request) |
subroutine | mpp_send_2d_ (put_data, put_len, to_pe, tag, request) |
subroutine | mpp_send_3d_ (put_data, put_len, to_pe, tag, request) |
subroutine | mpp_send_4d_ (put_data, put_len, to_pe, tag, request) |
subroutine | mpp_send_5d_ (put_data, put_len, to_pe, tag, request) |
subroutine | mpp_send_scalar_ (put_data, to_pe, plen, tag, request) |
subroutine | mpp_set_current_pelist (pelist, no_sync) |
Set context pelist. More... | |
subroutine | mpp_set_root_pe (num) |
subroutine | mpp_set_stack_size (n) |
Set the mpp_stack variable to be at least n LONG words long. More... | |
subroutine | mpp_set_warn_level (flag) |
subroutine | mpp_sum_2d_ (a, length, pelist) |
Sums 2d array across pes. More... | |
subroutine | mpp_sum_2d_ad_ (a, length, pelist) |
Sums 2d array across pes. More... | |
subroutine | mpp_sum_3d_ (a, length, pelist) |
Sums 3d array across pes. More... | |
subroutine | mpp_sum_3d_ad_ (a, length, pelist) |
Sums 3d array across pes. More... | |
subroutine | mpp_sum_4d_ (a, length, pelist) |
Sums 4d array across pes. More... | |
subroutine | mpp_sum_4d_ad_ (a, length, pelist) |
Sums 4d array across pes. More... | |
subroutine | mpp_sum_5d_ (a, length, pelist) |
Sums 5d array across pes. More... | |
subroutine | mpp_sum_5d_ad_ (a, length, pelist) |
Sums 5d array across pes. More... | |
subroutine | mpp_sum_scalar_ (a, pelist) |
Sums array a when only first element is passed: this routine just converts to a call to MPP_SUM_. | |
subroutine | mpp_sum_scalar_ad_ (a, pelist) |
Sums array a. when only first element is passed: this routine just converts to a call to MPP_SUM_. | |
subroutine | mpp_sync (pelist, do_self) |
Synchronize PEs in list. | |
subroutine | mpp_sync_self (pelist, check, request, msg_size, msg_type) |
This is to check if current PE's outstanding puts are complete but we can't use shmem_fence because we are actually waiting for a remote PE to complete its get. | |
subroutine | mpp_transmit_2d_ (put_data, put_len, to_pe, get_data, get_len, from_pe, block, tag, recv_request, send_request) |
subroutine | mpp_transmit_3d_ (put_data, put_len, to_pe, get_data, get_len, from_pe, block, tag, recv_request, send_request) |
subroutine | mpp_transmit_4d_ (put_data, put_len, to_pe, get_data, get_len, from_pe, block, tag, recv_request, send_request) |
subroutine | mpp_transmit_5d_ (put_data, put_len, to_pe, get_data, get_len, from_pe, block, tag, recv_request, send_request) |
subroutine | mpp_transmit_scalar_ (put_data, to_pe, get_data, from_pe, plen, glen, block, tag, recv_request, send_request) |
subroutine | mpp_type_free (dtype) |
Deallocates memory for mpp_type objects @TODO This should probably not take a pointer, but for now we do this. | |
character(len=256) function | rarray_to_char (rarray) |
subroutine | read_ascii_file (FILENAME, LENGTH, Content, PELIST) |
Reads any ascii file into a character array and broadcasts it to the non-root mpi-tasks. Based off READ_INPUT_NML. More... | |
subroutine | read_input_nml (pelist_name_in, alt_input_nml_path) |
Reads an existing input nml file into a character array and broadcasts it to the non-root mpi-tasks. This allows the use of reads from an internal file for namelist settings (requires 2003 compliant compiler) More... | |
integer function | stderr () |
This function returns the current standard fortran unit numbers for error messages. | |
integer function | stdin () |
This function returns the current standard fortran unit numbers for input. | |
integer function | stdlog () |
This function returns the current standard fortran unit numbers for log messages. Log messages, by convention, are written to the file logfile.out . | |
integer function | stdout () |
This function returns the current standard fortran unit numbers for output. | |
subroutine | sum_clock_data () |
character(len=len(cs)) function, target | uppercase (cs) |
integer function | warnlog () |
This function returns unit number for the warning log if on the root pe, otherwise returns the etc_unit value (usually /dev/null) | |
Variables | |
integer | clock0 |
measures total runtime from mpp_init to mpp_exit | |
integer | clock_grain =CLOCK_LOOP-1 |
integer | clock_num =0 |
type(summary_struct), dimension(max_clocks) | clock_summary |
type(clock), dimension(max_clocks), save | clocks |
character(len=32), parameter | configfile ='logfile' |
integer | cur_recv_request = 0 |
integer | cur_send_request = 0 |
integer | current_clock =0 |
integer | current_peset_max = 32 |
integer | current_peset_num =0 |
type(mpp_type_list) | datatypes |
logical | debug = .false. |
integer(i8_kind) | end_tick |
integer | err_unit =ERROR_UNIT |
integer | error |
integer | error_state =0 |
integer | etc_unit |
logical | etc_unit_is_stderr = .false. |
character(len=32) | etcfile ='/dev/null' |
logical | first_call_system_clock_mpi =.TRUE. |
integer | get_len_nocomm = 0 |
needed for mpp_transmit_nocomm.h | |
integer | in_unit =INPUT_UNIT |
Use the intrinsics in iso_fortran_env. | |
character(len=:), dimension(:), allocatable, target, public | input_nml_file |
integer | log_unit |
integer | max_request = 0 |
integer, parameter | max_request_min = 10000 |
integer(i8_kind) | max_ticks |
logical | module_is_initialized = .false. |
real(r8_kind) | mpi_count0 =0 |
use to prevent integer overflow | |
real(r8_kind) | mpi_tick_rate =0.d0 |
clock rate for mpi_wtick() | |
type(mpp_type), target, public | mpp_byte |
integer, parameter, public | mpp_comm_null = 67108864 |
MPP_COMM_NULL acts as an analagous mpp-macro for MPI_COMM_NULL to share with fms2_io NetCDF4 mpi-io. The default value for the no-mpi case comes from Intel MPI and MPICH. OpenMPI sets a default value of '2'. | |
integer | mpp_comm_private |
integer, parameter, public | mpp_info_null = 469762048 |
MPP_INFO_NULL acts as an analagous mpp-macro for MPI_INFO_NULL to share with fms2_io NetCDF4 mpi-io. The default value for the no-mpi case comes from Intel MPI and MPICH. OpenMPI sets a default value of '0'. | |
integer, parameter, public | mpp_init_test_clocks_init = 2 |
integer, parameter, public | mpp_init_test_datatype_list_init = 3 |
integer, parameter, public | mpp_init_test_etc_unit = 6 |
integer, parameter, public | mpp_init_test_full_init = -1 |
integer, parameter, public | mpp_init_test_init_true_only = 0 |
integer, parameter, public | mpp_init_test_logfile_init = 4 |
integer, parameter, public | mpp_init_test_peset_allocated = 1 |
integer, parameter, public | mpp_init_test_read_namelist = 5 |
integer, parameter, public | mpp_init_test_requests_allocated = 7 |
logical, public | mpp_record_timing_data =.TRUE. |
integer | mpp_stack_hwm =0 |
integer | mpp_stack_size =0 |
integer | npes =1 |
integer | num_clock_ids =0 |
integer | out_unit =OUTPUT_UNIT |
integer | pe =0 |
type(communicator), dimension(:), allocatable | peset |
Will be allocated starting from 0, 0 is a dummy used to hold single-PE "self" communicator. | |
integer, parameter | peset_max = 10000 |
integer | peset_num =0 |
integer, dimension(max_clocks) | previous_clock =0 |
logical | read_ascii_file_on = .FALSE. |
integer | request_multiply = 20 |
integer, dimension(:), allocatable | request_recv |
integer, dimension(:), allocatable | request_send |
integer | root_pe =0 |
integer, dimension(:), allocatable | size_recv |
integer(i8_kind) | start_tick |
integer | stdout_unit |
logical | sync_all_clocks = .false. |
integer(i8_kind) | tick |
integer(i8_kind) | tick0 =0 |
real | tick_rate |
integer(i8_kind) | ticks_per_sec |
integer, dimension(:), allocatable | type_recv |
logical | verbose =.FALSE. |
integer | warn_unit |
unit number of the warning log | |
character(len=32), parameter | warnfile ='warnfile' |
base name for warninglog (appends ".<PE>.out") | |
logical | warnings_are_fatal = .FALSE. |
integer | world_peset_num |
the world communicator | |
This module defines interfaces for common operations using message-passing libraries. Any type-less arguments in the documentation are MPP_TYPE_ which is defined by the pre-processor to create multiple subroutines out of one implementation for use in an interface. See the note below for more information.
A set of simple calls to provide a uniform interface to different message-passing libraries. It currently can be implemented either in the SGI/Cray native SHMEM library or in the MPI standard. Other libraries (e.g MPI-2, Co-Array Fortran) can be incorporated as the need arises.
The data transfer between a processor and its own memory is based on load
and store
operations upon memory. Shared-memory systems (including distributed shared memory systems) have a single address space and any processor can acquire any data within the memory by load
and store
. The situation is different for distributed parallel systems. Specialized MPP systems such as the T3E can simulate shared-memory by direct data acquisition from remote memory. But if the parallel code is distributed across a cluster, or across the Net, messages must be sent and received using the protocols for long-distance communication, such as TCP/IP. This requires a `‘handshaking’' between nodes of the distributed system. One can think of the two different methods as involving put
s or get
s (e.g the SHMEM library), or in the case of negotiated communication (e.g MPI), send
s and recv
s.
The difference between SHMEM and MPI is that SHMEM uses one-sided communication, which can have very low-latency high-bandwidth implementations on tightly coupled systems. MPI is a standard developed for distributed computing across loosely-coupled systems, and therefore incurs a software penalty for negotiating the communication. It is however an open industry standard whereas SHMEM is a proprietary interface. Besides, the put
s or get
s on which it is based cannot currently be implemented in a cluster environment (there are recent announcements from Compaq that occasion hope).
The message-passing requirements of climate and weather codes can be reduced to a fairly simple minimal set, which is easily implemented in any message-passing API. mpp_mod
provides this API.
Features of mpp_mod
include:
This module is used to develop higher-level calls for domain decomposition (mpp_domains) and parallel I/O (FMS2 IO)
Parallel computing is initially daunting, but it soon becomes second nature, much the way many of us can now write vector code without much effort. The key insight required while reading and writing parallel code is in arriving at a mental grasp of several independent parallel execution streams through the same code (the SPMD model). Each variable you examine may have different values for each stream, the processor ID being an obvious example. Subroutines and function calls are particularly subtle, since it is not always obvious from looking at a call what synchronization between execution streams it implies. An example of erroneous code would be a global barrier call (see mpp_sync below) placed within a code block that not all PEs will execute, e.g:
if( pe.EQ.0 )call mpp_sync()
Here only PE 0 reaches the barrier, where it will wait indefinitely. While this is a particularly egregious example to illustrate the coding flaw, more subtle versions of the same are among the most common errors in parallel code.
It is therefore important to be conscious of the context of a subroutine or function call, and the implied synchronization. There are certain calls here (e.g mpp_declare_pelist, mpp_init, mpp_set_stack_size
) which must be called by all PEs. There are others which must be called by a subset of PEs (here called a pelist
) which must be called by all the PEs in the pelist
(e.g mpp_max, mpp_sum, mpp_sync
). Still others imply no synchronization at all. I will make every effort to highlight the context of each call in the MPP modules, so that the implicit synchronization is spelt out.
For performance it is necessary to keep synchronization as limited as the algorithm being implemented will allow. For instance, a single message between two PEs should only imply synchronization across the PEs in question. A global synchronization (or barrier) is likely to be slow, and is best avoided. But codes first parallelized on a Cray T3E tend to have many global syncs, as very fast barriers were implemented there in hardware.
Another reason to use pelists is to run a single program in MPMD mode, where different PE subsets work on different portions of the code. A typical example is to assign an ocean model and atmosphere model to different PE subsets, and couple them concurrently instead of running them serially. The MPP module provides the notion of a current pelist, which is set when a group of PEs branch off into a subset. Subsequent calls that omit the pelist
optional argument (seen below in many of the individual calls) assume that the implied synchronization is across the current pelist. The calls mpp_root_pe
and mpp_npes
also return the values appropriate to the current pelist. The mpp_set_current_pelist
call is provided to set the current pelist.
MPP_TYPE_
. This is resolved in the pre-processor stage to any of a variety of types. In general the MPP operations work on 4-byte and 8-byte variants of integer, real, complex, logical
variables, of rank 0 to 5, leading to 48 specific module procedures under the same generic interface. Any of the variables below shown as MPP_TYPE_
is treated in this way. interface mpp_mod::array_to_char |
type mpp_mod::clock |
Private Attributes | |
type(event), pointer | allocate |
logical | calling |
logical | detailed |
type(event), dimension(:), pointer | events =>NULL() |
logical | false |
integer | grain |
integer(i8_kind) | hits |
type(event), pointer | if |
logical | initialize |
logical | is_on |
type(event), pointer | max_event_types |
logical | mpp_clock_begin |
character(len=32) | name |
type(event), pointer | needed |
integer | peset_num |
logical | set |
logical | sync_on_begin |
integer(i8_kind) | tick |
type(event), pointer | to |
logical, pointer | to |
integer(i8_kind) | total_ticks |
logical | true |
logical | when |
type mpp_mod::clock_data_summary |
type mpp_mod::communicator |
Communication information for message passing libraries.
peset hold communicators as SHMEM-compatible triads (start, log2(stride), num)
Private Attributes | |
integer | count |
integer | group |
MPI communicator and group id for this PE set. | |
integer | id |
integer, dimension(:), pointer | list =>NULL() |
integer | log2stride |
dummy variables when libMPI is defined. | |
character(len=32) | name |
integer | start |
type mpp_mod::event |
interface mpp_mod::mpp_alltoall |
Scatter a vector across all PEs.
Transpose the vector and PE index Wrapper for the MPI_alltoall function, includes more generic _V and _W versions if given displacements/data types
Generic MPP_TYPE_ implentations: mpp_alltoall_ mpp_alltoallv_ mpp_alltoallw_
interface mpp_mod::mpp_broadcast |
Perform parallel broadcasts.
The mpp_broadcast
call has been added because the original syntax (using ALL_PES
in mpp_transmit
) did not support a broadcast across a pelist.
MPP_TYPE_
corresponds to any 4-byte and 8-byte variant of integer, real, complex, logical
variables, of rank 0 or 1. A contiguous block from a multi-dimensional array may be passed by its starting address and its length, as in f77
.
Global broadcasts through the ALL_PES
argument to mpp_transmit are still provided for backward-compatibility.
If pelist
is omitted, the context is assumed to be the current pelist. from_pe
must belong to the current pelist. This call implies synchronization across the PEs in pelist
, or the current pelist if pelist
is absent.
Example usage:
call mpp_broadcast( data, length, from_pe, pelist )
[in,out] | data | Data to broadcast |
length | Length of data to broadcast | |
from_pe | PE to send the data from | |
pelist | List of PE's to broadcast across, if not provided uses current list |
interface mpp_mod::mpp_chksum |
Calculate parallel checksums.
mpp_chksum is a parallel checksum routine that returns an identical answer for the same array irrespective of how it has been partitioned across processors. int_kind is the KIND parameter corresponding to long integers (see discussion on OS-dependent preprocessor directives) defined in the file platform.F90. MPP_TYPE_ corresponds to any 4-byte and 8-byte variant of integer, real, complex, logical variables, of rank 0 to 5.
Integer checksums on FP data use the F90 TRANSFER()
intrinsic.
This provides identical results on a single-processor job, and to perform serial checksums on a single processor of a parallel job, you only need to use the optional pelist
argument.
use mpp_mod integer :: pe, chksum real :: a(:) pe = mpp_pe() chksum = mpp_chksum( a, (/pe/) )
The additional functionality of mpp_chksum
over serial checksums is to compute the checksum across the PEs in pelist
. The answer is guaranteed to be the same for the same distributed array irrespective of how it has been partitioned.
If pelist
is omitted, the context is assumed to be the current pelist. This call implies synchronization across the PEs in pelist
, or the current pelist if pelist
is absent.
Example usage:
mpp_chksum( var, pelist )
var | Data to calculate checksum of |
pelist | Optional list of PE's to include in checksum calculation if not using current pelist |
Generic MPP_TYPE_ implentations: mpp_chksum_ mpp_chksum_int_ mpp_chksum_int_rmask_
interface mpp_mod::mpp_error |
Error handler.
It is strongly recommended that all error exits pass through mpp_error
to assure the program fails cleanly. An individual PE encountering a STOP
statement, for instance, can cause the program to hang. The use of the STOP
statement is strongly discouraged.
Calling mpp_error with no arguments produces an immediate error exit, i.e:
call mpp_error call mpp_error()
are equivalent.
The argument order
call mpp_error( routine, errormsg, errortype )
is also provided to support legacy code. In this version of the call, none of the arguments may be omitted.
The behaviour of mpp_error
for a WARNING
can be controlled with an additional call mpp_set_warn_level
.
call mpp_set_warn_level(ERROR)
causes mpp_error
to treat WARNING
exactly like FATAL
.
call mpp_set_warn_level(WARNING)
resets to the default behaviour described above.
mpp_error
also has an internal error state which maintains knowledge of whether a warning has been issued. This can be used at startup in a subroutine that checks if the model has been properly configured. You can generate a series of warnings using mpp_error
, and then check at the end if any warnings has been issued using the function mpp_error_state()
. If the value of this is WARNING
, at least one warning has been issued, and the user can take appropriate action:
if( ... )call mpp_error( WARNING, '...' ) if( ... )call mpp_error( WARNING, '...' ) if( ... )call mpp_error( WARNING, '...' ) ... if( mpp_error_state().EQ.WARNING )call mpp_error( FATAL, '...' )
Example usage:
errortype | One of NOTE , WARNING or FATAL (these definitions are acquired by use association). NOTE writes errormsg to STDOUT . WARNING writes errormsg to STDERR . FATAL writes errormsg to STDERR , and induces a clean error exit with a call stack traceback. |
routine | Calling routine name |
errmsg | Message to output </IN> |
interface mpp_mod::mpp_gather |
Gather data sent from pelist onto the root pe Wrapper for MPI_gather, can be used with and without indices.
sbuf | MPP_TYPE_ data buffer to send |
rbuf | MPP_TYPE_ data buffer to receive |
pelist | integer(:) optional pelist to gather from, defaults to current |
Example usage:
call mpp_gather(send_buffer,recv_buffer, pelist) call mpp_gather(is, ie, js, je, pelist, array_seg, data, is_root_pe)
interface mpp_mod::mpp_max |
Reduction operations. Find the max of scalar a from the PEs in pelist result is also automatically broadcast to all PEs.
a | real or integer , of 4-byte of 8-byte kind. |
pelist | If pelist is omitted, the context is assumed to be the current pelist. This call implies synchronization across the PEs in pelist , or the current pelist if pelist is absent. |
Private Member Functions | |
mpp_max_int4_0d | |
mpp_max_int4_1d | |
mpp_max_int8_0d | |
mpp_max_int8_1d | |
mpp_max_real4_0d | |
mpp_max_real4_1d | |
mpp_max_real8_0d | |
mpp_max_real8_1d | |
interface mpp_mod::mpp_min |
Reduction operations. Find the min of scalar a from the PEs in pelist result is also automatically broadcast to all PEs.
a | real or integer , of 4-byte of 8-byte kind. |
pelist | If pelist is omitted, the context is assumed to be the current pelist. This call implies synchronization across the PEs in pelist , or the current pelist if pelist is absent. |
Private Member Functions | |
mpp_min_int4_0d | |
mpp_min_int4_1d | |
mpp_min_int8_0d | |
mpp_min_int8_1d | |
mpp_min_real4_0d | |
mpp_min_real4_1d | |
mpp_min_real8_0d | |
mpp_min_real8_1d | |
interface mpp_mod::mpp_recv |
Recieve data from another PE.
[out] | get_data | scalar or array to get written with received data |
get_len | size of array to recv from get_data | |
from_pe | PE number to receive from | |
block | true for blocking, false for non-blocking. Defaults to true | |
tag | communication tag | |
[out] | request | MPI request handle |
interface mpp_mod::mpp_scatter |
Scatter (ie - is) * (je - js) contiguous elements of array data from the designated root pe into contigous members of array segment in each pe that is included in the pelist argument.
is,ie | integer start and end index of the first dimension of the segment array |
je,js | integer start and end index of the second dimension of the segment array |
pelist | integer(:) the PE list of target pes, needs to be monotonically increasing |
array_seg | MPP_TYPE_ 2D array that the data is to be copied into |
data | MPP_TYPE_ the source array |
is_root_pe | logical true if calling from root pe |
ishift | integer offsets specifying the first elelement in the data array |
nk | integer size of third dimension for 3D calls |
Example usage:
call mpp_scatter(is, ie, js, je, pelist, segment, data, .true.)
interface mpp_mod::mpp_send |
Send data to a receiving PE.
put_data | scalar or array to get sent to a receiving PE | |
put_len | size of data to send from put_data | |
to_pe | PE number to send to | |
block | true for blocking, false for non-blocking. Defaults to true | |
tag | communication tag | |
[out] | request | MPI request handle Example usage: call mpp_send(data, ie, pe)
|
interface mpp_mod::mpp_sum |
Reduction operation.
MPP_TYPE_
corresponds to any 4-byte and 8-byte variant of integer, real, complex
variables, of rank 0 or 1. A contiguous block from a multi-dimensional array may be passed by its starting address and its length, as in f77
.
Library reduction operators are not required or guaranteed to be bit-reproducible. In any case, changing the processor count changes the data layout, and thus very likely the order of operations. For bit-reproducible sums of distributed arrays, consider using the mpp_global_sum
routine provided by the mpp_domains module.
The bit_reproducible
flag provided in earlier versions of this routine has been removed.
If pelist
is omitted, the context is assumed to be the current pelist. This call implies synchronization across the PEs in pelist
, or the current pelist if pelist
is absent. Example usage: call mpp_sum( a, length, pelist )
interface mpp_mod::mpp_sum_ad |
Calculates sum of a given numerical array across pe's for adjoint domains.
interface mpp_mod::mpp_transmit |
Basic message-passing call.
MPP_TYPE_
corresponds to any 4-byte and 8-byte variant of integer, real, complex, logical
variables, of rank 0 or 1. A contiguous block from a multi-dimensional array may be passed by its starting address and its length, as in f77
.
mpp_transmit
is currently implemented as asynchronous outward transmission and synchronous inward transmission. This follows the behaviour of shmem_put
and shmem_get
. In MPI, it is implemented as mpi_isend
and mpi_recv
. For most applications, transmissions occur in pairs, and are here accomplished in a single call.
The special PE designations NULL_PE
, ANY_PE
and ALL_PES
are provided by use association.
NULL_PE
: is used to disable one of the pair of transmissions.
ANY_PE
: is used for unspecific remote destination. (Please note that put_pe=ANY_PE
has no meaning in the MPI context, though it is available in the SHMEM invocation. If portability is a concern, it is best avoided).
ALL_PES
: is used for broadcast operations.
It is recommended that mpp_broadcast be used for broadcasts.
The following example illustrates the use of NULL_PE
and ALL_PES
:
real, dimension(n) :: a if( pe.EQ.0 )then do p = 1,npes-1 call mpp_transmit( a, n, p, a, n, NULL_PE ) end do else call mpp_transmit( a, n, NULL_PE, a, n, 0 ) end if call mpp_transmit( a, n, ALL_PES, a, n, 0 )
The do loop and the broadcast operation above are equivalent.
Two overloaded calls mpp_send
and mpp_recv
have also been provided. mpp_send
calls mpp_transmit
with get_pe=NULL_PE
. mpp_recv
calls mpp_transmit
with put_pe=NULL_PE
. Thus the do loop above could be written more succinctly:
if( pe.EQ.0 )then do p = 1,npes-1 call mpp_send( a, n, p ) end do else call mpp_recv( a, n, 0 ) end if
Example call:
type mpp_mod::mpp_type |
Private Attributes | |
integer | counter |
integer | data |
integer | elementary |
integer | etype |
integer | id |
integer | identifier |
integer | instances |
integer, dimension(e.g. mpi) | library |
integer | message |
integer | ndims |
type(mpp_type), pointer | next => null() |
integer | number |
integer | of |
integer | passing |
type(mpp_type), pointer | prev => null() |
integer, dimension(:), allocatable | sizes |
integer, dimension(:), allocatable | starts |
integer, dimension(:), allocatable | subsizes |
integer | this |
integer, dimension(e.g. mpi_byte) | type |
integer | within |
interface mpp_mod::mpp_type_create |
Create a mpp_type variable.
[in] | field | A field of any numerical or logical type |
[in] | array_of_subsizes | Integer array of subsizes |
[in] | array_of_starts | Integer array of starts |
[out] | dtype_out | Output variable for created mpp_type |
type mpp_mod::mpp_type_list |
type mpp_mod::summary_struct |
Private Attributes | |
type(clock_data_summary), dimension(max_event_types) | event |
character(len=16) | name |
integer function, dimension(2) get_ascii_file_num_lines_and_length | ( | character(len=*), intent(in) | FILENAME, |
integer, dimension(:), intent(in), optional | PELIST | ||
) |
Function to determine the maximum line length and number of lines from an ascii file.
[in] | filename | name of the file to be read |
[in] | pelist | optional pelist |
Definition at line 1344 of file mpp_util.inc.
subroutine mpp_broadcast_char | ( | character(len=*), dimension(:), intent(inout) | char_data, |
integer, intent(in) | length, | ||
integer, intent(in) | from_pe, | ||
integer, dimension(:), intent(in), optional | pelist | ||
) |
Broadcasts a character string from the given pe to it's pelist.
[in,out] | char_data | Character string to send |
[in] | length | length of given data to broadcast |
[in] | from_pe | pe to broadcast from |
[in] | pelist | optional pelist to broadcast to |
Definition at line 324 of file mpp_comm_mpi.inc.
subroutine mpp_clock_set_grain | ( | integer, intent(in) | grain | ) |
Set the level of granularity of timing measurements.
This routine and three other routines, mpp_clock_id, mpp_clock_begin(id), and mpp_clock_end(id) may be used to time parallel code sections, and extract parallel statistics. Clocks are identified by names, which should be unique in the first 32 characters. The mpp_clock_id
call initializes a clock of a given name and returns an integer id
. This id
can be used by subsequent mpp_clock_begin
and mpp_clock_end
calls set around a code section to be timed. Example:
integer :: id id = mpp_clock_id( 'Atmosphere' ) call mpp_clock_begin(id) call atmos_model() call mpp_clock_end()
Two flags may be used to alter the behaviour of mpp_clock
. If the flag MPP_CLOCK_SYNC
is turned on by mpp_clock_id
, the clock calls mpp_sync
across all the PEs in the current pelist at the top of the timed code section, but allows each PE to complete the code section (and reach mpp_clock_end
) at different times. This allows us to measure load imbalance for a given code section. Statistics are written to stdout
by mpp_exit
.
The flag MPP_CLOCK_DETAILED
may be turned on by mpp_clock_id
to get detailed communication profiles. Communication events of the types SEND, RECV, BROADCAST, REDUCE
and WAIT
are separately measured for data volume and time. Statistics are written to stdout
by mpp_exit
, and individual PE info is also written to the file mpp_clock.out.####
where ####
is the PE id given by mpp_pe
.
The flags MPP_CLOCK_SYNC
and MPP_CLOCK_DETAILED
are integer parameters available by use association, and may be summed to turn them both on.
While the nesting of clocks is allowed, please note that turning on the non-optional flags on inner clocks has certain subtle issues. Turning on MPP_CLOCK_SYNC
on an inner clock may distort outer clock measurements of load imbalance. Turning on MPP_CLOCK_DETAILED
will stop detailed measurements on its outer clock, since only one detailed clock may be active at one time. Also, detailed clocks only time a certain number of events per clock (currently 40000) to conserve memory. If this array overflows, a warning message is printed, and subsequent events for this clock are not timed.
Timings are done using the f90
standard SYSTEM_CLOCK
intrinsic.
The resolution of SYSTEM_CLOCK is often too coarse for use except across large swaths of code. On SGI systems this is transparently overloaded with a higher resolution clock made available in a non-portable fortran interface made available by nsclock.c
. This approach will eventually be extended to other platforms.
New behaviour added at the Havana release allows the user to embed profiling calls at varying levels of granularity all over the code, and for any particular run, set a threshold of granularity so that finer-grained clocks become dormant.
The threshold granularity is held in the private module variable clock_grain
. This value may be modified by the call mpp_clock_set_grain
, and affect clocks initiated by subsequent calls to mpp_clock_id
. The value of clock_grain
is set to an arbitrarily large number initially.
Clocks initialized by mpp_clock_id
can set a new optional argument grain
setting their granularity level. Clocks check this level against the current value of clock_grain
, and are only triggered if they are at or below ("coarser than") the threshold. Finer-grained clocks are dormant for that run.
The following grain levels are pre-defined:
Note that subsequent changes to <TT>clock_grain</TT> do not change the status of already initiated clocks, and that if the optional <TT>grain</TT> argument is absent, the clock is always triggered. This guarantees backward compatibility.
Definition at line 640 of file mpp_util.inc.
subroutine mpp_declare_pelist | ( | character(len=*), dimension(:), intent(in), optional | pelist, |
character(len=*), intent(in), optional | name, | ||
integer, intent(out), optional | commID | ||
) |
Declare a pelist.
This call is written specifically to accommodate a MPI restriction that requires a parent communicator to create a child communicator, In other words: a pelist cannot go off and declare a communicator, but every PE in the parent, including those not in pelist(:), must get together for the MPI_COMM_CREATE
call. The parent is typically MPI_COMM_WORLD
, though it could also be a subset that includes all PEs in pelist
.
This call implies synchronization across the PEs in the current pelist, of which pelist
is a subset.
Definition at line 460 of file mpp_util.inc.
subroutine mpp_init | ( | integer, intent(in), optional | flags, |
integer, intent(in), optional | localcomm, | ||
integer, intent(in), optional | test_level, | ||
character(len=*), intent(in), optional | alt_input_nml_path | ||
) |
Initialize the mpp_mod module. Must be called before any usage.
[in] | flags | Flags for debug output, can be MPP_VERBOSE or MPP_DEBUG |
[in] | localcomm | Id of MPI communicator used to initialize |
[in] | test_level | Used to exit initialization at certain stages before completion for testing purposes |
[in] | alt_input_nml_path | Input path for namelist |
Definition at line 33 of file mpp_comm_mpi.inc.
integer function mpp_npes |
Returns processor count for current pelist.
This returns the number of PEs in the current pelist. For a uniprocessor application, it will always return 1.
Definition at line 420 of file mpp_util.inc.
integer function mpp_pe |
Returns processor ID.
This returns the unique ID associated with a PE. This number runs between 0 and npes-1
, where npes
is the total processor count, returned by mpp_npes
. For a uniprocessor application this will always return 0.
Definition at line 406 of file mpp_util.inc.
subroutine mpp_set_current_pelist | ( | integer, dimension(:), intent(in), optional | pelist, |
logical, intent(in), optional | no_sync | ||
) |
Set context pelist.
This call sets the value of the current pelist, which is the context for all subsequent "global" calls where the optional pelist
argument is omitted. All the PEs that are to be in the current pelist must call it.
In MPI, this call may hang unless pelist
has been previous declared using mpp_declare_pelist
If the argument pelist
is absent, the current pelist is set to the "world" pelist, of all PEs in the job.
Definition at line 489 of file mpp_util.inc.
subroutine mpp_set_stack_size | ( | integer, intent(in) | n | ) |
Set the mpp_stack variable to be at least n LONG words long.
[in] | n | stack size to set |
Definition at line 302 of file mpp_comm_mpi.inc.
subroutine mpp_sum_2d_ | ( | dimension(:,:), intent(inout) | a, |
integer, intent(in) | length, | ||
integer, dimension(:), intent(in), optional | pelist | ||
) |
Sums 2d array across pes.
[in] | length | amount of indices in given 2d array |
[in] | pelist | pelist to calculate sum across |
Definition at line 44 of file mpp_sum.inc.
subroutine mpp_sum_2d_ad_ | ( | dimension(:,:), intent(inout) | a, |
integer, intent(in) | length, | ||
integer, dimension(:), intent(in), optional | pelist | ||
) |
Sums 2d array across pes.
[in] | length | amount of indices in given 2d array |
[in] | pelist | pelist to calculate sum across |
Definition at line 45 of file mpp_sum_ad.inc.
subroutine mpp_sum_3d_ | ( | dimension(:,:,:), intent(inout) | a, |
integer, intent(in) | length, | ||
integer, dimension(:), intent(in), optional | pelist | ||
) |
Sums 3d array across pes.
[in] | length | amount of indices in given 3d array |
[in] | pelist | pelist to calculate sum across |
Definition at line 59 of file mpp_sum.inc.
subroutine mpp_sum_3d_ad_ | ( | dimension(:,:,:), intent(inout) | a, |
integer, intent(in) | length, | ||
integer, dimension(:), intent(in), optional | pelist | ||
) |
Sums 3d array across pes.
[in] | length | amount of indices in given 3d array |
[in] | pelist | pelist to calculate sum across |
Definition at line 60 of file mpp_sum_ad.inc.
subroutine mpp_sum_4d_ | ( | dimension(:,:,:,:), intent(inout) | a, |
integer, intent(in) | length, | ||
integer, dimension(:), intent(in), optional | pelist | ||
) |
Sums 4d array across pes.
[in] | length | amount of indices in given 4d array |
[in] | pelist | pelist to calculate sum across |
Definition at line 74 of file mpp_sum.inc.
subroutine mpp_sum_4d_ad_ | ( | dimension(:,:,:,:), intent(inout) | a, |
integer, intent(in) | length, | ||
integer, dimension(:), intent(in), optional | pelist | ||
) |
Sums 4d array across pes.
[in] | length | amount of indices in given 4d array |
[in] | pelist | pelist to calculate sum across |
Definition at line 75 of file mpp_sum_ad.inc.
subroutine mpp_sum_5d_ | ( | dimension(:,:,:,:,:), intent(inout) | a, |
integer, intent(in) | length, | ||
integer, dimension(:), intent(in), optional | pelist | ||
) |
Sums 5d array across pes.
[in] | length | amount of indices in given 5d array |
[in] | pelist | pelist to calculate sum across |
Definition at line 89 of file mpp_sum.inc.
subroutine mpp_sum_5d_ad_ | ( | dimension(:,:,:,:,:), intent(inout) | a, |
integer, intent(in) | length, | ||
integer, dimension(:), intent(in), optional | pelist | ||
) |
Sums 5d array across pes.
[in] | length | amount of indices in given 5d array |
[in] | pelist | pelist to calculate sum across |
Definition at line 90 of file mpp_sum_ad.inc.
subroutine read_ascii_file | ( | character(len=*), intent(in) | FILENAME, |
integer, intent(in) | LENGTH, | ||
character(len=*), dimension(:), intent(inout) | Content, | ||
integer, dimension(:), intent(in), optional | PELIST | ||
) |
Reads any ascii file into a character array and broadcasts it to the non-root mpi-tasks. Based off READ_INPUT_NML.
Passed in 'Content' array, must be of the form: character(len=LENGTH), dimension(:), allocatable :: array_name
Reads from this array must be done in a do loop over the number of lines, i.e.:
do i=1, num_lines read (UNIT=array_name(i), FMT=*) var1, var2, ... end do
Definition at line 1437 of file mpp_util.inc.
subroutine read_input_nml | ( | character(len=*), intent(in), optional | pelist_name_in, |
character(len=*), intent(in), optional | alt_input_nml_path | ||
) |
Reads an existing input nml file into a character array and broadcasts it to the non-root mpi-tasks. This allows the use of reads from an internal file for namelist settings (requires 2003 compliant compiler)
read(input_nml_file, nml=<name_nml>, iostat=status)
Definition at line 1218 of file mpp_util.inc.