aboutsummaryrefslogtreecommitdiff
path: root/Source/ablastr/utils/SignalHandling.cpp
diff options
context:
space:
mode:
authorGravatar Phil Miller <unmobile+gh@gmail.com> 2022-04-02 23:12:53 -0700
committerGravatar GitHub <noreply@github.com> 2022-04-02 23:12:53 -0700
commit1d6ce20cded62eb6d02ee5ccf5398e5d938b40e3 (patch)
tree63ab140a95aa1945f4a971f8bcb06375b14319a4 /Source/ablastr/utils/SignalHandling.cpp
parent14292f6d6256bebfd92c5c325fc259d8280d682f (diff)
downloadWarpX-1d6ce20cded62eb6d02ee5ccf5398e5d938b40e3.tar.gz
WarpX-1d6ce20cded62eb6d02ee5ccf5398e5d938b40e3.tar.zst
WarpX-1d6ce20cded62eb6d02ee5ccf5398e5d938b40e3.zip
Allow process signals to trigger checkpoint or break behavior (#2896)
* Beginnings of signal handling machinery * Add tentative logic to make checkpoint call * Adapt formatting slightly * Add calls to read signals and set up signal handlers * Initialize signal flag array * Add parsing of signal names, and fix some whitespace issues * Skip signal setup on Windows * added checkpoint and break signal inputs to picmi.py * Address initial review requests * Correct comment to match changed code * Convert maximum signal number to a symbolic name * Always parse signal input, and error out on Windows or wherever it may be unsupported * Typo fix * Add missing reset of checkpoint signal flag * Add reset of break signal, in support of Python or library usage * Test for a configured checkpoint diag when asked to checkpoint on a signal * Fix typo in Linux code path * Clean up MPI support * Use symbolic name for maximum signal number * Fix unused variable in the no-MPI case * Add missing header inclusions * Switch signal parsing to an enumerated table * Test signal handling for Linux, not GNU C library * Avoid another magic number * Update MPI_Ibcast call to match symbolic array length * Update loop over signal flags to use symbolic limit * Match #includes to usage * Add omitted C++ std <atomic> header include * Guard entire set of signal definitions as *nix-only, not for Windows * Broaden Windows exclusion to avoid zero-length array that displeases MSVC++ * Check return value from sigaction() * Convert conditional calls to Abort() to assertions * Move check for platform support to input parsing * Shift signal handling code over toward ABLASTR to share with ImpactX and Hipace++ * Minor cleanup * A bit more cleanup * Fix formatting nits * Add AMReX error handling on MPI calls * Add ABLASTR signal handling code to GNU makefile too * Document new input parameters * Use ABLASTR assertion macros in ABLASTR code * Convert requests limit value to a requests array size * Generalize signal handling to an arbitrary set of potential actions * Rename class to match usage and file name * Stick stuff in ABLASTR namespace * Indent conditional includes as requested Co-authored-by: Roelof <roelof.groenewald@modernelectron.com>
Diffstat (limited to 'Source/ablastr/utils/SignalHandling.cpp')
-rw-r--r--Source/ablastr/utils/SignalHandling.cpp192
1 files changed, 192 insertions, 0 deletions
diff --git a/Source/ablastr/utils/SignalHandling.cpp b/Source/ablastr/utils/SignalHandling.cpp
new file mode 100644
index 000000000..cdec9b653
--- /dev/null
+++ b/Source/ablastr/utils/SignalHandling.cpp
@@ -0,0 +1,192 @@
+/* Copyright 2022 Philip Miller
+ *
+ * This file is part of WarpX.
+ *
+ * License: BSD-3-Clause-LBNL
+ */
+
+#include "SignalHandling.H"
+#include "TextMsg.H"
+
+#include <AMReX.H>
+#include <AMReX_ParallelDescriptor.H>
+#include <AMReX_IParser.H>
+
+#include <cctype>
+
+// For sigaction() et al.
+#if defined(__linux__) || defined(__APPLE__)
+# include <signal.h>
+#endif
+
+namespace ablastr::utils {
+
+std::atomic<bool> SignalHandling::signal_received_flags[NUM_SIGNALS];
+bool SignalHandling::signal_conf_requests[SIGNAL_REQUESTS_SIZE][NUM_SIGNALS];
+bool SignalHandling::signal_actions_requested[SIGNAL_REQUESTS_SIZE];
+#if defined(AMREX_USE_MPI)
+MPI_Request SignalHandling::signal_mpi_ibcast_request;
+#endif
+
+int
+SignalHandling::parseSignalNameToNumber(const std::string &str)
+{
+ amrex::IParser signals_parser(str);
+
+#if defined(__linux__) || defined(__APPLE__)
+ struct {
+ const char* abbrev;
+ const int value;
+ } signals_to_parse[] = {
+ {"ABRT", SIGABRT},
+ {"ALRM", SIGALRM},
+ {"BUS", SIGBUS},
+ {"CHLD", SIGCHLD},
+ {"CLD", SIGCHLD}, // Synonymous to SIGCHLD on Linux
+ {"CONT", SIGCONT},
+#if defined(SIGEMT)
+ {"EMT", SIGEMT}, // macOS and some Linux architectures
+#endif
+ // Omitted because AMReX typically handles SIGFPE specially
+ // {"FPE", SIGFPE},
+ {"HUP", SIGHUP},
+ {"ILL", SIGILL},
+#if defined(SIGINFO)
+ {"INFO", SIGINFO}, // macOS and some Linux architectures
+#endif
+ {"INT", SIGINT},
+ {"IO", SIGIO},
+ {"IOT", SIGABRT}, // Synonymous to SIGABRT on Linux
+ // {"KILL", SIGKILL}, // Cannot be handled
+ {"PIPE", SIGPIPE},
+ {"POLL", SIGIO}, // Synonymous to SIGIO on Linux
+ {"PROF", SIGPROF},
+#if defined(SIGPWR)
+ {"PWR", SIGPWR}, // Linux-only
+#endif
+ {"QUIT", SIGQUIT},
+ {"SEGV", SIGSEGV},
+#if defined(SIGSTKFLT)
+ {"STKFLT", SIGSTKFLT}, // Linux-only
+#endif
+ // {"STOP", SIGSTOP}, // Cannot be handled
+ {"SYS", SIGSYS},
+ {"TERM", SIGTERM},
+ {"TRAP", SIGTRAP},
+ {"TSTP", SIGTSTP},
+ {"TTIN", SIGTTIN},
+ {"TTOU", SIGTTOU},
+ {"URG", SIGURG},
+ {"USR1", SIGUSR1},
+ {"USR2", SIGUSR2},
+ {"VTALRM", SIGVTALRM},
+ {"WINCH", SIGWINCH},
+ {"XCPU", SIGXCPU},
+ {"XFSZ", SIGXFSZ},
+ };
+
+ for (const auto& sp : signals_to_parse) {
+ std::string name_upper = sp.abbrev;
+ std::string name_lower = name_upper;
+ for (char &c : name_lower) {
+ c = std::tolower(c);
+ }
+
+ signals_parser.setConstant(name_upper, sp.value);
+ signals_parser.setConstant(name_lower, sp.value);
+ name_upper = "SIG" + name_upper;
+ name_lower = "sig" + name_lower;
+ signals_parser.setConstant(name_upper, sp.value);
+ signals_parser.setConstant(name_lower, sp.value);
+ }
+#endif // #if defined(__linux__) || defined(__APPLE__)
+
+ auto spf = signals_parser.compileHost<0>();
+
+ int sig = spf();
+ ABLASTR_ALWAYS_ASSERT_WITH_MESSAGE(sig < NUM_SIGNALS,
+ "Parsed signal value is outside the supported range of [1, 31]");
+
+ return sig;
+}
+
+void
+SignalHandling::InitSignalHandling()
+{
+#if defined(__linux__) || defined(__APPLE__)
+ struct sigaction sa;
+ sigemptyset(&sa.sa_mask);
+ for (int signal_number = 0; signal_number < NUM_SIGNALS; ++signal_number) {
+ signal_received_flags[signal_number] = false;
+
+ bool signal_active = false;
+ for (int signal_request = 0; signal_request < SIGNAL_REQUESTS_SIZE; ++signal_request) {
+ signal_active |= signal_conf_requests[signal_request][signal_number];
+ }
+ if (signal_active) {
+ if (amrex::ParallelDescriptor::MyProc() == 0) {
+ sa.sa_handler = &SignalHandling::SignalSetFlag;
+ } else {
+ sa.sa_handler = SIG_IGN;
+ }
+ int result = sigaction(signal_number, &sa, nullptr);
+ ABLASTR_ALWAYS_ASSERT_WITH_MESSAGE(result == 0,
+ "Failed to install signal handler for a configured signal");
+ }
+ }
+#endif
+}
+
+void
+SignalHandling::CheckSignals()
+{
+ // We assume that signals will definitely be delivered to rank 0,
+ // and may be delivered to other ranks as well. For coordination,
+ // we process them according to when they're received by rank 0.
+ if (amrex::ParallelDescriptor::MyProc() == 0) {
+ for (int signal_number = 0; signal_number < NUM_SIGNALS; ++signal_number) {
+ // Read into a local temporary to ensure the same value is
+ // used throughout. Atomically exchange it with false to
+ // unset the flag without risking loss of a signal - if a
+ // signal arrives after this, it will be handled the next
+ // time this function is called.
+ bool signal_received = signal_received_flags[signal_number].exchange(false);
+
+ if (signal_received) {
+ for (int signal_request = 0; signal_request < SIGNAL_REQUESTS_SIZE; ++signal_request) {
+ signal_actions_requested[signal_request] |= signal_conf_requests[signal_request][signal_number];
+ }
+ }
+ }
+ }
+
+#if defined(AMREX_USE_MPI)
+ auto comm = amrex::ParallelDescriptor::Communicator();
+ BL_MPI_REQUIRE(MPI_Ibcast(signal_actions_requested, SIGNAL_REQUESTS_SIZE,
+ MPI_CXX_BOOL, 0, comm,&signal_mpi_ibcast_request));
+#endif
+}
+
+void
+SignalHandling::WaitSignals()
+{
+#if defined(AMREX_USE_MPI)
+ BL_MPI_REQUIRE(MPI_Wait(&signal_mpi_ibcast_request, MPI_STATUS_IGNORE));
+#endif
+}
+
+bool
+SignalHandling::TestAndResetActionRequestFlag(int action_to_test)
+{
+ bool retval = signal_actions_requested[action_to_test];
+ signal_actions_requested[action_to_test] = false;
+ return retval;
+}
+
+void
+SignalHandling::SignalSetFlag(int signal_number)
+{
+ signal_received_flags[signal_number] = true;
+}
+
+} // namespace ablastr::utils