aboutsummaryrefslogtreecommitdiff
path: root/Source/ablastr/parallelization/MPIInitHelpers.cpp
blob: 633c004c93a03bba70f29ad14070e2acc95ea319 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/* This file is part of ABLASTR.
 *
 * Authors: Axel Huebl
 * License: BSD-3-Clause-LBNL
 */
#include "MPIInitHelpers.H"

#include <ablastr/warn_manager/WarnManager.H>

#include <AMReX_Config.H>
#include <AMReX_ParallelDescriptor.H>

#if defined(AMREX_USE_MPI)
#   include <mpi.h>
#endif

// OLCFDEV-1655: Segfault during MPI_Init & in PMI_Allgather
// https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html#olcfdev-1655-occasional-seg-fault-during-mpi-init
#if defined(AMREX_USE_HIP)
#include <hip/hip_runtime.h>
#endif

#include <iostream>
#include <string>
#include <utility>
#include <stdexcept>
#include <sstream>


namespace ablastr::parallelization
{
    int
    mpi_thread_required ()
    {
        int thread_required = -1;
#ifdef AMREX_USE_MPI
        thread_required = MPI_THREAD_SINGLE;  // equiv. to MPI_Init
#   ifdef AMREX_USE_OMP
        thread_required = MPI_THREAD_FUNNELED;
#   endif
#   ifdef AMREX_MPI_THREAD_MULTIPLE  // i.e. for async_io
        thread_required = MPI_THREAD_MULTIPLE;
#   endif
#endif
        return thread_required;
    }

    std::pair< int, int >
    mpi_init (int argc, char* argv[])
    {
        // OLCFDEV-1655: Segfault during MPI_Init & in PMI_Allgather
        // https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html#olcfdev-1655-occasional-seg-fault-during-mpi-init
#if defined(AMREX_USE_HIP) && defined(AMREX_USE_MPI)
        hipError_t hip_ok = hipInit(0);
        if (hip_ok != hipSuccess) {
            std::cerr << "hipInit failed with error code " << hip_ok << "! Aborting now.\n";
            throw std::runtime_error("hipInit failed. Did not proceeding with MPI_Init_thread.");
        }
#endif

        const int thread_required = mpi_thread_required();
#ifdef AMREX_USE_MPI
        int thread_provided = -1;
        MPI_Init_thread(&argc, &argv, thread_required, &thread_provided);
#else
        amrex::ignore_unused(argc, argv);
        const int thread_provided = -1;
#endif
        return std::make_pair(thread_required, thread_provided);
    }


    void
    mpi_finalize ()
    {
#ifdef AMREX_USE_MPI
        MPI_Finalize();
#endif
    }

    void
    check_mpi_thread_level ()
    {
#ifdef AMREX_USE_MPI
        const int thread_required = mpi_thread_required();
        int thread_provided = -1;
        MPI_Query_thread(&thread_provided);
        auto mtn = amrex::ParallelDescriptor::mpi_level_to_string;

        std::stringstream ss;
        if( thread_provided < thread_required ){
            ss << "WARNING: Provided MPI thread safety level ("
                           << mtn(thread_provided) << ") is LOWER than requested "
                           << mtn(thread_required) << "). This might lead to undefined "
                           << "results in asynchronous operations (e.g. async_io).";
            ablastr::warn_manager::WMRecordWarning(
                    "MPI", ss.str(), ablastr::warn_manager::WarnPriority::high);
        }
        if( thread_provided > thread_required ){
            ss << "NOTE: Provided MPI thread safety level ("
                           << mtn(thread_provided) << ") is stricter than requested "
                           << mtn(thread_required) << "). This might reduce multi-node "
                           << "communication performance.";
            ablastr::warn_manager::WMRecordWarning(
                    "MPI", ss.str());
        }
#endif
    }

} // namespace ablastr::parallelization