diff options
author | 2023-08-28 10:25:25 -0700 | |
---|---|---|
committer | 2023-08-28 10:25:25 -0700 | |
commit | f02ad26b531fb087b18db929ee328e074fb1b1ee (patch) | |
tree | 0b15489e1e1bf973e18afc0ff5ba6839027d774a /Source/ablastr/parallelization/MPIInitHelpers.cpp | |
parent | bacabae8ed0fdcc82d9f3ea9d82b7dc53691e3b8 (diff) | |
download | WarpX-f02ad26b531fb087b18db929ee328e074fb1b1ee.tar.gz WarpX-f02ad26b531fb087b18db929ee328e074fb1b1ee.tar.zst WarpX-f02ad26b531fb087b18db929ee328e074fb1b1ee.zip |
Work-Around: Segfault in MPI_Init with HIP (#4237)
* Work-Around: Segfault in MPI_Init with HIP
See:
https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html#olcfdev-1655-occasional-seg-fault-during-mpi-init
* Move to ABLASTR
All that counts is that HIP is initialized before GPU-aware MPI.
* Add Exception
Diffstat (limited to 'Source/ablastr/parallelization/MPIInitHelpers.cpp')
-rw-r--r-- | Source/ablastr/parallelization/MPIInitHelpers.cpp | 24 |
1 files changed, 21 insertions, 3 deletions
diff --git a/Source/ablastr/parallelization/MPIInitHelpers.cpp b/Source/ablastr/parallelization/MPIInitHelpers.cpp index 65e7525c0..633c004c9 100644 --- a/Source/ablastr/parallelization/MPIInitHelpers.cpp +++ b/Source/ablastr/parallelization/MPIInitHelpers.cpp @@ -1,7 +1,6 @@ -/* Copyright 2020 Axel Huebl - * - * This file is part of ABLASTR. +/* This file is part of ABLASTR. * + * Authors: Axel Huebl * License: BSD-3-Clause-LBNL */ #include "MPIInitHelpers.H" @@ -15,10 +14,19 @@ # include <mpi.h> #endif +// OLCFDEV-1655: Segfault during MPI_Init & in PMI_Allgather +// https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html#olcfdev-1655-occasional-seg-fault-during-mpi-init +#if defined(AMREX_USE_HIP) +#include <hip/hip_runtime.h> +#endif + +#include <iostream> #include <string> #include <utility> +#include <stdexcept> #include <sstream> + namespace ablastr::parallelization { int @@ -40,6 +48,16 @@ namespace ablastr::parallelization std::pair< int, int > mpi_init (int argc, char* argv[]) { + // OLCFDEV-1655: Segfault during MPI_Init & in PMI_Allgather + // https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html#olcfdev-1655-occasional-seg-fault-during-mpi-init +#if defined(AMREX_USE_HIP) && defined(AMREX_USE_MPI) + hipError_t hip_ok = hipInit(0); + if (hip_ok != hipSuccess) { + std::cerr << "hipInit failed with error code " << hip_ok << "! Aborting now.\n"; + throw std::runtime_error("hipInit failed. Did not proceeding with MPI_Init_thread."); + } +#endif + const int thread_required = mpi_thread_required(); #ifdef AMREX_USE_MPI int thread_provided = -1; |