6 files changed, 308 insertions, 0 deletions
diff --git a/Docs/source/building/building.rst b/Docs/source/building/building.rst
index 9d2b8f249..8aad5a56d 100644
--- a/Docs/source/building/building.rst
+++ b/Docs/source/building/building.rst
@@ -105,3 +105,5 @@ Building for specific platforms
    cori
    summit
    juwels
+   lassen
+   quartz
diff --git a/Docs/source/building/lassen.rst b/Docs/source/building/lassen.rst
new file mode 100644
index 000000000..5145f8f21
--- /dev/null
+++ b/Docs/source/building/lassen.rst
@@ -0,0 +1,92 @@
+.. _building-lassen:
+
+Lassen (LLNL)
+=============
+
+The `Lassen V100 GPU cluster <https://hpc.llnl.gov/hardware/platforms/lassen>`_ is located at LLNL.
+
+If you are new to this system, please see the following resources:
+
+* `LLNL user account <https://lc.llnl.gov/lorenz/mylc/mylc.cgi>`_
+* `Lassen user guide <https://hpc.llnl.gov/training/tutorials/using-lcs-sierra-system>`_
+* Batch system: `LSF <https://hpc.llnl.gov/training/tutorials/using-lcs-sierra-system#batch-system>`_
+* `Production directories <https://hpc.llnl.gov/hardware/file-systems>`_:
+
+  * ``/p/gpfs1/$(whoami)``: personal directory on the parallel filesystem
+  * Note that the ``$HOME`` directory and the ``/usr/workspace/$(whoami)`` space are NFS mounted and are suitable for production quality data generation.
+
+
+Installation
+------------
+
+Use the following commands to download the WarpX source code and switch to the correct branch:
+
+.. code-block:: bash
+
+   git clone https://github.com/ECP-WarpX/WarpX.git $HOME/src/warpx
+
+We use the following modules and environments on the system.
+
+.. code-block:: bash
+
+   # please set your project account
+   export proj=<yourProject>
+
+   # required dependencies
+   module load cmake/3.16.8
+   module load gcc/8.3.1
+   module load cuda/11.0.2
+
+   # optional: for PSATD support
+   module load fftw/3.3.8
+
+   # optional: for QED support
+   module load boost/1.70.0
+
+   # optional: for openPMD support
+   # TODO ADIOS2 & HDF5
+
+   # optional: for PSATD in RZ geometry support
+   # TODO: blaspp lapackpp
+
+   # optional: for Python bindings
+   module load python/3.8.2
+
+   # optional: an alias to request an interactive node for two hours
+   alias getNode="bsub -G $proj -W 2:00 -nnodes 1 -Is /bin/bash"
+
+   # fix system defaults: do not escape $ with a \ on tab completion
+   shopt -s direxpand
+
+   # compiler environment hints
+   export CC=$(which gcc)
+   export CXX=$(which g++)
+   export FC=$(which gfortran)
+   export CUDACXX=$(which nvcc)
+   export CUDAHOSTCXX=$(which g++)
+
+
+We recommend to store the above lines in a file, such as ``$HOME/lassen_warpx.profile``, and load it into your shell after a login:
+
+.. code-block:: bash
+
+   source $HOME/lassen_warpx.profile
+
+Then, ``cd`` into the directory ``$HOME/src/warpx`` and use the following commands to compile:
+
+.. code-block:: bash
+
+   rm -rf build/
+   cmake -B build -DWarpX_COMPUTE=CUDA -DWarpX_OPENPMD=ON
+   cmake --build build -j 10
+
+This will build an executable in ``build/bin/``.
+The other :ref:`general compile-time options <building-cmake>` apply as usual.
+
+
+Running
+-------
+
+Please see :ref:`our example job scripts <running-cpp-lassen>` on how to run WarpX on Lassen.
+
+See :doc:`../visualization/yt` for more information on how to visualize the simulation results.
diff --git a/Docs/source/building/quartz.rst b/Docs/source/building/quartz.rst
new file mode 100644
index 000000000..8522308ad
--- /dev/null
+++ b/Docs/source/building/quartz.rst
@@ -0,0 +1,94 @@
+.. _building-quartz:
+
+Quartz (LLNL)
+=============
+
+The `Quartz Intel CPU cluster <https://hpc.llnl.gov/hardware/platforms/quartz>`_ is located at LLNL.
+
+If you are new to this system, please see the following resources:
+
+* `LLNL user account <https://lc.llnl.gov/lorenz/mylc/mylc.cgi>`_
+* `Quartz user guide <https://computing.llnl.gov/tutorials/linux_clusters/>`_
+* Batch system: `Slurm <https://computing.llnl.gov/tutorials/moab/>`_
+* `Production directories <https://hpc.llnl.gov/hardware/file-systems>`_:
+
+  * ``/p/lustre1/$(whoami)`` and ``/p/lustre2/$(whoami)``: personal directory on the parallel filesystem
+  * Note that the ``$HOME`` directory and the ``/usr/workspace/$(whoami)`` space are NFS mounted and are suitable for production quality data generation.
+
+
+Installation
+------------
+
+Use the following commands to download the WarpX source code and switch to the correct branch:
+
+.. code-block:: bash
+
+   git clone https://github.com/ECP-WarpX/WarpX.git $HOME/src/warpx
+
+We use the following modules and environments on the system.
+
+.. code-block:: bash
+
+   # please set your project account
+   export proj=<yourProject>
+
+   # required dependencies
+   module load cmake/3.16.8
+   module load intel/19.1.2
+   module load mvapich2/2.3
+
+   # optional: for PSATD support
+   module load fftw/3.3.8
+
+   # optional: for QED support
+   module load boost/1.73.0
+
+   # optional: for openPMD support
+   # TODO ADIOS2
+   module load hdf5-parallel/1.10.2
+
+   # optional: for PSATD in RZ geometry support
+   # TODO: blaspp lapackpp
+
+   # optional: for Python bindings
+   module load python/3.8.2
+
+   # optional: an alias to request an interactive node for two hours
+   alias getNode="srun --time=0:30:00 --nodes=1 --ntasks-per-node=2 --cpus-per-task=18 -p pdebug --pty bash"
+
+   # fix system defaults: do not escape $ with a \ on tab completion
+   shopt -s direxpand
+
+   # compiler environment hints
+   export CC=$(which icc)
+   export CXX=$(which icpc)
+   export FC=$(which ifort)
+   # we need a newer libstdc++:
+   export CFLAGS="-gcc-name=/usr/tce/packages/gcc/gcc-8.3.1/bin/gcc ${CFLAGS}"
+   export CXXFLAGS="-gxx-name=/usr/tce/packages/gcc/gcc-8.3.1/bin/g++ ${CXXFLAGS}"
+
+
+We recommend to store the above lines in a file, such as ``$HOME/quartz_warpx.profile``, and load it into your shell after a login:
+
+.. code-block:: bash
+
+   source $HOME/quartz_warpx.profile
+
+Then, ``cd`` into the directory ``$HOME/src/warpx`` and use the following commands to compile:
+
+.. code-block:: bash
+
+   rm -rf build/
+   cmake -B build -DWarpX_OPENPMD=ON
+   cmake --build build -j 6
+
+This will build an executable in ``build/bin/``.
+The other :ref:`general compile-time options <building-cmake>` apply as usual.
+
+
+Running
+-------
+
+Please see :ref:`our example job scripts <running-cpp-quartz>` on how to run WarpX on Quartz.
+
+See :doc:`../visualization/yt` for more information on how to visualize the simulation results.
diff --git a/Docs/source/running_cpp/platforms.rst b/Docs/source/running_cpp/platforms.rst
index 2a3ead381..925eacccd 100644
--- a/Docs/source/running_cpp/platforms.rst
+++ b/Docs/source/running_cpp/platforms.rst
@@ -131,3 +131,61 @@ parameters provided good performance:
 
 * **Sixteen `64x64x64` grids per MPI rank** (with default tiling in WarpX, this
   results in ~49 tiles per OpenMP thread)
+
+.. _running-cpp-lassen:
+
+Running on Lassen at LLNL
+-------------------------
+
+.. _running-cpp-lassen-V100-GPUs:
+
+V100 GPUs
+^^^^^^^^^
+
+The batch script below can be used to run a WarpX simulation on 2 nodes on the supercomputer Lassen at LLNL.
+Replace descriptions between chevrons ``<>`` by relevant values, for instance ``<input file>`` could be ``plasma_mirror_inputs``.
+Note that the only option so far is to run with one MPI rank per GPU.
+
+.. literalinclude:: ../../../Tools/BatchScripts/batch_lassen.sh
+   :language: bash
+
+To run a simulation, copy the lines above to a file ``batch_lassen.sh`` and run
+::
+
+  bsub batch_lassen.sh
+
+to submit the job.
+
+For a 3D simulation with a few (1-4) particles per cell using FDTD Maxwell
+solver on V100 GPUs for a well load-balanced problem (in our case laser
+wakefield acceleration simulation in a boosted frame in the quasi-linear
+regime), the following set of parameters provided good performance:
+
+* ``amr.max_grid_size=256`` and ``amr.blocking_factor=128``.
+
+* **One MPI rank per GPU** (e.g., 4 MPI ranks for the 4 GPUs on each Lassen
+  node)
+
+* **Two `128x128x128` grids per GPU**, or **one `128x128x256` grid per GPU**.
+
+
+Running on Quartz at LLNL
+-------------------------
+
+.. _running-cpp-quartz-CPUs:
+
+Intel Xeon E5-2695 v4 CPUs
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The batch script below can be used to run a WarpX simulation on 2 nodes on the supercomputer Quartz at LLNL.
+Replace descriptions between chevrons ``<>`` by relevant values, for instance ``<input file>`` could be ``plasma_mirror_inputs``.
+
+.. literalinclude:: ../../../Tools/BatchScripts/batch_quartz.sh
+   :language: bash
+
+To run a simulation, copy the lines above to a file ``batch_quartz.sh`` and run
+::
+
+  sbatch batch_quartz.sh
+
+to submit the job.
diff --git a/Tools/BatchScripts/batch_lassen.sh b/Tools/BatchScripts/batch_lassen.sh
new file mode 100644
index 000000000..0fd2500c5
--- /dev/null
+++ b/Tools/BatchScripts/batch_lassen.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Copyright 2020 Axel Huebl
+#
+# This file is part of WarpX.
+#
+# License: BSD-3-Clause-LBNL
+#
+# Refs.:
+#   https://jsrunvisualizer.olcf.ornl.gov/?s4f0o11n6c7g1r11d1b1l0=
+#   https://hpc.llnl.gov/training/tutorials/using-lcs-sierra-system#quick16
+
+#BSUB -G <allocation ID>
+#BSUB -W 00:10
+#BSUB -nnodes 2
+#BSUB -alloc_flags smt4
+#BSUB -J WarpX
+#BSUB -o WarpXo.%J
+#BSUB -e WarpXe.%J
+
+export OMP_NUM_THREADS=1
+jsrun -r 4 -a 1 -g 1 -c 7 -l GPU-CPU -d packed -b rs -M "-gpu" <path/to/executable> <input file> > output.txt
diff --git a/Tools/BatchScripts/batch_quartz.sh b/Tools/BatchScripts/batch_quartz.sh
new file mode 100644
index 000000000..4c1a82ff8
--- /dev/null
+++ b/Tools/BatchScripts/batch_quartz.sh
@@ -0,0 +1,40 @@
+#!/bin/bash -l
+
+# Just increase this number of you need more nodes.
+#SBATCH -N 2
+#SBATCH -t 24:00:00
+#SBATCH -A <allocation ID>
+
+#SBATCH -J WarpX
+#SBATCH -q pbatch
+#SBATCH --qos=normal
+#SBATCH --license=lustre1,lustre2
+#SBATCH --export=ALL
+#SBATCH -e error.txt
+#SBATCH -o output.txt
+# one MPI rank per half-socket (see below)
+#SBATCH --tasks-per-node=2
+# request all logical (virtual) cores per half-socket
+#SBATCH --cpus-per-task=18
+
+
+# each Quartz node has 1 socket of Intel Xeon E5-2695 v4
+# each Xeon CPU is divided into 2 bus rings that each have direct L3 access
+export WARPX_NMPI_PER_NODE=2
+
+# each MPI rank per half-socket has 9 physical cores
+#   or 18 logical (virtual) cores
+# over-subscribing each physical core with 2x
+#   hyperthreading led to a slight (3.5%) speedup on Cori's Intel Xeon E5-2698 v3,
+#   so we do the same here
+# the settings below make sure threads are close to the
+#   controlling MPI rank (process) per half socket and
+#   distribute equally over close-by physical cores and,
+#   for N>9, also equally over close-by logical cores
+export OMP_PROC_BIND=spread
+export OMP_PLACES=threads
+export OMP_NUM_THREADS=18
+
+EXE="<path/to/executable>"  # e.g. ./warpx
+
+srun --cpu_bind=cores -n $(( ${SLURM_JOB_NUM_NODES} * ${WARPX_NMPI_PER_NODE} )) ${EXE} <input file>