2 files changed, 62 insertions, 0 deletions
diff --git a/Tools/BatchScripts/batch_lassen.sh b/Tools/BatchScripts/batch_lassen.sh
new file mode 100644
index 000000000..0fd2500c5
--- /dev/null
+++ b/Tools/BatchScripts/batch_lassen.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Copyright 2020 Axel Huebl
+#
+# This file is part of WarpX.
+#
+# License: BSD-3-Clause-LBNL
+#
+# Refs.:
+#   https://jsrunvisualizer.olcf.ornl.gov/?s4f0o11n6c7g1r11d1b1l0=
+#   https://hpc.llnl.gov/training/tutorials/using-lcs-sierra-system#quick16
+
+#BSUB -G <allocation ID>
+#BSUB -W 00:10
+#BSUB -nnodes 2
+#BSUB -alloc_flags smt4
+#BSUB -J WarpX
+#BSUB -o WarpXo.%J
+#BSUB -e WarpXe.%J
+
+export OMP_NUM_THREADS=1
+jsrun -r 4 -a 1 -g 1 -c 7 -l GPU-CPU -d packed -b rs -M "-gpu" <path/to/executable> <input file> > output.txt
diff --git a/Tools/BatchScripts/batch_quartz.sh b/Tools/BatchScripts/batch_quartz.sh
new file mode 100644
index 000000000..4c1a82ff8
--- /dev/null
+++ b/Tools/BatchScripts/batch_quartz.sh
@@ -0,0 +1,40 @@
+#!/bin/bash -l
+
+# Just increase this number of you need more nodes.
+#SBATCH -N 2
+#SBATCH -t 24:00:00
+#SBATCH -A <allocation ID>
+
+#SBATCH -J WarpX
+#SBATCH -q pbatch
+#SBATCH --qos=normal
+#SBATCH --license=lustre1,lustre2
+#SBATCH --export=ALL
+#SBATCH -e error.txt
+#SBATCH -o output.txt
+# one MPI rank per half-socket (see below)
+#SBATCH --tasks-per-node=2
+# request all logical (virtual) cores per half-socket
+#SBATCH --cpus-per-task=18
+
+
+# each Quartz node has 1 socket of Intel Xeon E5-2695 v4
+# each Xeon CPU is divided into 2 bus rings that each have direct L3 access
+export WARPX_NMPI_PER_NODE=2
+
+# each MPI rank per half-socket has 9 physical cores
+#   or 18 logical (virtual) cores
+# over-subscribing each physical core with 2x
+#   hyperthreading led to a slight (3.5%) speedup on Cori's Intel Xeon E5-2698 v3,
+#   so we do the same here
+# the settings below make sure threads are close to the
+#   controlling MPI rank (process) per half socket and
+#   distribute equally over close-by physical cores and,
+#   for N>9, also equally over close-by logical cores
+export OMP_PROC_BIND=spread
+export OMP_PLACES=threads
+export OMP_NUM_THREADS=18
+
+EXE="<path/to/executable>"  # e.g. ./warpx
+
+srun --cpu_bind=cores -n $(( ${SLURM_JOB_NUM_NODES} * ${WARPX_NMPI_PER_NODE} )) ${EXE} <input file>