diff options
-rw-r--r-- | Docs/source/building/building.rst | 2 | ||||
-rw-r--r-- | Docs/source/building/lassen.rst | 92 | ||||
-rw-r--r-- | Docs/source/building/quartz.rst | 94 | ||||
-rw-r--r-- | Docs/source/running_cpp/platforms.rst | 58 | ||||
-rw-r--r-- | Tools/BatchScripts/batch_lassen.sh | 22 | ||||
-rw-r--r-- | Tools/BatchScripts/batch_quartz.sh | 40 |
6 files changed, 308 insertions, 0 deletions
diff --git a/Docs/source/building/building.rst b/Docs/source/building/building.rst index 9d2b8f249..8aad5a56d 100644 --- a/Docs/source/building/building.rst +++ b/Docs/source/building/building.rst @@ -105,3 +105,5 @@ Building for specific platforms cori summit juwels + lassen + quartz diff --git a/Docs/source/building/lassen.rst b/Docs/source/building/lassen.rst new file mode 100644 index 000000000..5145f8f21 --- /dev/null +++ b/Docs/source/building/lassen.rst @@ -0,0 +1,92 @@ +.. _building-lassen: + +Lassen (LLNL) +============= + +The `Lassen V100 GPU cluster <https://hpc.llnl.gov/hardware/platforms/lassen>`_ is located at LLNL. + +If you are new to this system, please see the following resources: + +* `LLNL user account <https://lc.llnl.gov/lorenz/mylc/mylc.cgi>`_ +* `Lassen user guide <https://hpc.llnl.gov/training/tutorials/using-lcs-sierra-system>`_ +* Batch system: `LSF <https://hpc.llnl.gov/training/tutorials/using-lcs-sierra-system#batch-system>`_ +* `Production directories <https://hpc.llnl.gov/hardware/file-systems>`_: + + * ``/p/gpfs1/$(whoami)``: personal directory on the parallel filesystem + * Note that the ``$HOME`` directory and the ``/usr/workspace/$(whoami)`` space are NFS mounted and are suitable for production quality data generation. + + +Installation +------------ + +Use the following commands to download the WarpX source code and switch to the correct branch: + +.. code-block:: bash + + git clone https://github.com/ECP-WarpX/WarpX.git $HOME/src/warpx + +We use the following modules and environments on the system. + +.. code-block:: bash + + # please set your project account + export proj=<yourProject> + + # required dependencies + module load cmake/3.16.8 + module load gcc/8.3.1 + module load cuda/11.0.2 + + # optional: for PSATD support + module load fftw/3.3.8 + + # optional: for QED support + module load boost/1.70.0 + + # optional: for openPMD support + # TODO ADIOS2 & HDF5 + + # optional: for PSATD in RZ geometry support + # TODO: blaspp lapackpp + + # optional: for Python bindings + module load python/3.8.2 + + # optional: an alias to request an interactive node for two hours + alias getNode="bsub -G $proj -W 2:00 -nnodes 1 -Is /bin/bash" + + # fix system defaults: do not escape $ with a \ on tab completion + shopt -s direxpand + + # compiler environment hints + export CC=$(which gcc) + export CXX=$(which g++) + export FC=$(which gfortran) + export CUDACXX=$(which nvcc) + export CUDAHOSTCXX=$(which g++) + + +We recommend to store the above lines in a file, such as ``$HOME/lassen_warpx.profile``, and load it into your shell after a login: + +.. code-block:: bash + + source $HOME/lassen_warpx.profile + +Then, ``cd`` into the directory ``$HOME/src/warpx`` and use the following commands to compile: + +.. code-block:: bash + + rm -rf build/ + cmake -B build -DWarpX_COMPUTE=CUDA -DWarpX_OPENPMD=ON + cmake --build build -j 10 + +This will build an executable in ``build/bin/``. +The other :ref:`general compile-time options <building-cmake>` apply as usual. + + +Running +------- + +Please see :ref:`our example job scripts <running-cpp-lassen>` on how to run WarpX on Lassen. + +See :doc:`../visualization/yt` for more information on how to visualize the simulation results. diff --git a/Docs/source/building/quartz.rst b/Docs/source/building/quartz.rst new file mode 100644 index 000000000..8522308ad --- /dev/null +++ b/Docs/source/building/quartz.rst @@ -0,0 +1,94 @@ +.. _building-quartz: + +Quartz (LLNL) +============= + +The `Quartz Intel CPU cluster <https://hpc.llnl.gov/hardware/platforms/quartz>`_ is located at LLNL. + +If you are new to this system, please see the following resources: + +* `LLNL user account <https://lc.llnl.gov/lorenz/mylc/mylc.cgi>`_ +* `Quartz user guide <https://computing.llnl.gov/tutorials/linux_clusters/>`_ +* Batch system: `Slurm <https://computing.llnl.gov/tutorials/moab/>`_ +* `Production directories <https://hpc.llnl.gov/hardware/file-systems>`_: + + * ``/p/lustre1/$(whoami)`` and ``/p/lustre2/$(whoami)``: personal directory on the parallel filesystem + * Note that the ``$HOME`` directory and the ``/usr/workspace/$(whoami)`` space are NFS mounted and are suitable for production quality data generation. + + +Installation +------------ + +Use the following commands to download the WarpX source code and switch to the correct branch: + +.. code-block:: bash + + git clone https://github.com/ECP-WarpX/WarpX.git $HOME/src/warpx + +We use the following modules and environments on the system. + +.. code-block:: bash + + # please set your project account + export proj=<yourProject> + + # required dependencies + module load cmake/3.16.8 + module load intel/19.1.2 + module load mvapich2/2.3 + + # optional: for PSATD support + module load fftw/3.3.8 + + # optional: for QED support + module load boost/1.73.0 + + # optional: for openPMD support + # TODO ADIOS2 + module load hdf5-parallel/1.10.2 + + # optional: for PSATD in RZ geometry support + # TODO: blaspp lapackpp + + # optional: for Python bindings + module load python/3.8.2 + + # optional: an alias to request an interactive node for two hours + alias getNode="srun --time=0:30:00 --nodes=1 --ntasks-per-node=2 --cpus-per-task=18 -p pdebug --pty bash" + + # fix system defaults: do not escape $ with a \ on tab completion + shopt -s direxpand + + # compiler environment hints + export CC=$(which icc) + export CXX=$(which icpc) + export FC=$(which ifort) + # we need a newer libstdc++: + export CFLAGS="-gcc-name=/usr/tce/packages/gcc/gcc-8.3.1/bin/gcc ${CFLAGS}" + export CXXFLAGS="-gxx-name=/usr/tce/packages/gcc/gcc-8.3.1/bin/g++ ${CXXFLAGS}" + + +We recommend to store the above lines in a file, such as ``$HOME/quartz_warpx.profile``, and load it into your shell after a login: + +.. code-block:: bash + + source $HOME/quartz_warpx.profile + +Then, ``cd`` into the directory ``$HOME/src/warpx`` and use the following commands to compile: + +.. code-block:: bash + + rm -rf build/ + cmake -B build -DWarpX_OPENPMD=ON + cmake --build build -j 6 + +This will build an executable in ``build/bin/``. +The other :ref:`general compile-time options <building-cmake>` apply as usual. + + +Running +------- + +Please see :ref:`our example job scripts <running-cpp-quartz>` on how to run WarpX on Quartz. + +See :doc:`../visualization/yt` for more information on how to visualize the simulation results. diff --git a/Docs/source/running_cpp/platforms.rst b/Docs/source/running_cpp/platforms.rst index 2a3ead381..925eacccd 100644 --- a/Docs/source/running_cpp/platforms.rst +++ b/Docs/source/running_cpp/platforms.rst @@ -131,3 +131,61 @@ parameters provided good performance: * **Sixteen `64x64x64` grids per MPI rank** (with default tiling in WarpX, this results in ~49 tiles per OpenMP thread) + +.. _running-cpp-lassen: + +Running on Lassen at LLNL +------------------------- + +.. _running-cpp-lassen-V100-GPUs: + +V100 GPUs +^^^^^^^^^ + +The batch script below can be used to run a WarpX simulation on 2 nodes on the supercomputer Lassen at LLNL. +Replace descriptions between chevrons ``<>`` by relevant values, for instance ``<input file>`` could be ``plasma_mirror_inputs``. +Note that the only option so far is to run with one MPI rank per GPU. + +.. literalinclude:: ../../../Tools/BatchScripts/batch_lassen.sh + :language: bash + +To run a simulation, copy the lines above to a file ``batch_lassen.sh`` and run +:: + + bsub batch_lassen.sh + +to submit the job. + +For a 3D simulation with a few (1-4) particles per cell using FDTD Maxwell +solver on V100 GPUs for a well load-balanced problem (in our case laser +wakefield acceleration simulation in a boosted frame in the quasi-linear +regime), the following set of parameters provided good performance: + +* ``amr.max_grid_size=256`` and ``amr.blocking_factor=128``. + +* **One MPI rank per GPU** (e.g., 4 MPI ranks for the 4 GPUs on each Lassen + node) + +* **Two `128x128x128` grids per GPU**, or **one `128x128x256` grid per GPU**. + + +Running on Quartz at LLNL +------------------------- + +.. _running-cpp-quartz-CPUs: + +Intel Xeon E5-2695 v4 CPUs +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The batch script below can be used to run a WarpX simulation on 2 nodes on the supercomputer Quartz at LLNL. +Replace descriptions between chevrons ``<>`` by relevant values, for instance ``<input file>`` could be ``plasma_mirror_inputs``. + +.. literalinclude:: ../../../Tools/BatchScripts/batch_quartz.sh + :language: bash + +To run a simulation, copy the lines above to a file ``batch_quartz.sh`` and run +:: + + sbatch batch_quartz.sh + +to submit the job. diff --git a/Tools/BatchScripts/batch_lassen.sh b/Tools/BatchScripts/batch_lassen.sh new file mode 100644 index 000000000..0fd2500c5 --- /dev/null +++ b/Tools/BatchScripts/batch_lassen.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Copyright 2020 Axel Huebl +# +# This file is part of WarpX. +# +# License: BSD-3-Clause-LBNL +# +# Refs.: +# https://jsrunvisualizer.olcf.ornl.gov/?s4f0o11n6c7g1r11d1b1l0= +# https://hpc.llnl.gov/training/tutorials/using-lcs-sierra-system#quick16 + +#BSUB -G <allocation ID> +#BSUB -W 00:10 +#BSUB -nnodes 2 +#BSUB -alloc_flags smt4 +#BSUB -J WarpX +#BSUB -o WarpXo.%J +#BSUB -e WarpXe.%J + +export OMP_NUM_THREADS=1 +jsrun -r 4 -a 1 -g 1 -c 7 -l GPU-CPU -d packed -b rs -M "-gpu" <path/to/executable> <input file> > output.txt diff --git a/Tools/BatchScripts/batch_quartz.sh b/Tools/BatchScripts/batch_quartz.sh new file mode 100644 index 000000000..4c1a82ff8 --- /dev/null +++ b/Tools/BatchScripts/batch_quartz.sh @@ -0,0 +1,40 @@ +#!/bin/bash -l + +# Just increase this number of you need more nodes. +#SBATCH -N 2 +#SBATCH -t 24:00:00 +#SBATCH -A <allocation ID> + +#SBATCH -J WarpX +#SBATCH -q pbatch +#SBATCH --qos=normal +#SBATCH --license=lustre1,lustre2 +#SBATCH --export=ALL +#SBATCH -e error.txt +#SBATCH -o output.txt +# one MPI rank per half-socket (see below) +#SBATCH --tasks-per-node=2 +# request all logical (virtual) cores per half-socket +#SBATCH --cpus-per-task=18 + + +# each Quartz node has 1 socket of Intel Xeon E5-2695 v4 +# each Xeon CPU is divided into 2 bus rings that each have direct L3 access +export WARPX_NMPI_PER_NODE=2 + +# each MPI rank per half-socket has 9 physical cores +# or 18 logical (virtual) cores +# over-subscribing each physical core with 2x +# hyperthreading led to a slight (3.5%) speedup on Cori's Intel Xeon E5-2698 v3, +# so we do the same here +# the settings below make sure threads are close to the +# controlling MPI rank (process) per half socket and +# distribute equally over close-by physical cores and, +# for N>9, also equally over close-by logical cores +export OMP_PROC_BIND=spread +export OMP_PLACES=threads +export OMP_NUM_THREADS=18 + +EXE="<path/to/executable>" # e.g. ./warpx + +srun --cpu_bind=cores -n $(( ${SLURM_JOB_NUM_NODES} * ${WARPX_NMPI_PER_NODE} )) ${EXE} <input file> |