diff options
author | 2022-04-02 23:12:53 -0700 | |
---|---|---|
committer | 2022-04-02 23:12:53 -0700 | |
commit | 1d6ce20cded62eb6d02ee5ccf5398e5d938b40e3 (patch) | |
tree | 63ab140a95aa1945f4a971f8bcb06375b14319a4 /Source/ablastr/utils/SignalHandling.H | |
parent | 14292f6d6256bebfd92c5c325fc259d8280d682f (diff) | |
download | WarpX-1d6ce20cded62eb6d02ee5ccf5398e5d938b40e3.tar.gz WarpX-1d6ce20cded62eb6d02ee5ccf5398e5d938b40e3.tar.zst WarpX-1d6ce20cded62eb6d02ee5ccf5398e5d938b40e3.zip |
Allow process signals to trigger checkpoint or break behavior (#2896)
* Beginnings of signal handling machinery
* Add tentative logic to make checkpoint call
* Adapt formatting slightly
* Add calls to read signals and set up signal handlers
* Initialize signal flag array
* Add parsing of signal names, and fix some whitespace issues
* Skip signal setup on Windows
* added checkpoint and break signal inputs to picmi.py
* Address initial review requests
* Correct comment to match changed code
* Convert maximum signal number to a symbolic name
* Always parse signal input, and error out on Windows or wherever it may be unsupported
* Typo fix
* Add missing reset of checkpoint signal flag
* Add reset of break signal, in support of Python or library usage
* Test for a configured checkpoint diag when asked to checkpoint on a signal
* Fix typo in Linux code path
* Clean up MPI support
* Use symbolic name for maximum signal number
* Fix unused variable in the no-MPI case
* Add missing header inclusions
* Switch signal parsing to an enumerated table
* Test signal handling for Linux, not GNU C library
* Avoid another magic number
* Update MPI_Ibcast call to match symbolic array length
* Update loop over signal flags to use symbolic limit
* Match #includes to usage
* Add omitted C++ std <atomic> header include
* Guard entire set of signal definitions as *nix-only, not for Windows
* Broaden Windows exclusion to avoid zero-length array that displeases MSVC++
* Check return value from sigaction()
* Convert conditional calls to Abort() to assertions
* Move check for platform support to input parsing
* Shift signal handling code over toward ABLASTR to share with ImpactX and Hipace++
* Minor cleanup
* A bit more cleanup
* Fix formatting nits
* Add AMReX error handling on MPI calls
* Add ABLASTR signal handling code to GNU makefile too
* Document new input parameters
* Use ABLASTR assertion macros in ABLASTR code
* Convert requests limit value to a requests array size
* Generalize signal handling to an arbitrary set of potential actions
* Rename class to match usage and file name
* Stick stuff in ABLASTR namespace
* Indent conditional includes as requested
Co-authored-by: Roelof <roelof.groenewald@modernelectron.com>
Diffstat (limited to 'Source/ablastr/utils/SignalHandling.H')
-rw-r--r-- | Source/ablastr/utils/SignalHandling.H | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/Source/ablastr/utils/SignalHandling.H b/Source/ablastr/utils/SignalHandling.H new file mode 100644 index 000000000..b633c3860 --- /dev/null +++ b/Source/ablastr/utils/SignalHandling.H @@ -0,0 +1,88 @@ +/* Copyright 2022 Philip Miller + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef ABLASTR_SIGNAL_HANDLING_H_ +#define ABLASTR_SIGNAL_HANDLING_H_ + +#include <AMReX_Config.H> + +#if defined(AMREX_USE_MPI) +# include <mpi.h> +#endif + +#include <atomic> +#include <string> + +namespace ablastr::utils { + +/** + * \brief + * Signal handling + * + * Rank 0 will accept signals and asynchronously broadcast the + * configured response; other processes will ignore them and + * follow the lead of rank 0 to avoid potential for deadlocks or + * timestep-skewed response. + * + * Variables and functions are static rather than per-instance + * because signal handlers are configured at the process level. + */ +class SignalHandling +{ +public: + //! The range of signal values to accept + static constexpr int NUM_SIGNALS = 32; + + //! Labels for indexed positions in signal_actions_requests + enum signal_action_requested_labels { + //! Cleanly stop execution, as if the simulation reached its configured end + SIGNAL_REQUESTS_BREAK = 0, + //! Produce a checkpoint + SIGNAL_REQUESTS_CHECKPOINT = 1, + SIGNAL_REQUESTS_SIZE = 2 // This should always be 1 greater than the last valid value + }; + + //! Whether configuration requests the code take a particular action on a particular signal + static bool signal_conf_requests[SIGNAL_REQUESTS_SIZE][NUM_SIGNALS]; + + //! Take a string and convert it to a corresponding signal number if possible + static int parseSignalNameToNumber (const std::string &str); + + //! Set up signal handlers based on input configuration provided in `signal_conf_requests_*` + static void InitSignalHandling (); + + //! Check and clear signal flags and asynchronously broadcast them from process 0 + static void CheckSignals (); + //! Complete the asynchronous broadcast of signal flags + static void WaitSignals (); + + //! Check whether a given action has been requested, and reset the associated flag + static bool TestAndResetActionRequestFlag (int action_to_test); + +private: + //! On process 0, whether a given signal has been received since the last check + static std::atomic<bool> signal_received_flags[NUM_SIGNALS]; + +#if defined(AMREX_USE_MPI) + //! MPI requests for the asynchronous broadcasts of the signal-requested actions + static MPI_Request signal_mpi_ibcast_request; +#endif + + //! Signal handler to set flags on process 0 (other processes ignore configured signals) + static void SignalSetFlag (int signal_number); + + //! Boolean flags transmitted between CheckSignals() and + //! HandleSignals() to indicate actions requested by signals + static bool signal_actions_requested[SIGNAL_REQUESTS_SIZE]; + + // Don't allow clients to incorrectly try to construct and use an instance of this type + SignalHandling() = delete; +}; + +} // namespace ablastr::utils + +#endif // ABLASTR_SIGNAL_HANDLING_H_ |