aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Tools/machines/perlmutter-nersc/perlmutter_gpu.sbatch10
1 files changed, 6 insertions, 4 deletions
diff --git a/Tools/machines/perlmutter-nersc/perlmutter_gpu.sbatch b/Tools/machines/perlmutter-nersc/perlmutter_gpu.sbatch
index 8432b2eb9..873fd3035 100644
--- a/Tools/machines/perlmutter-nersc/perlmutter_gpu.sbatch
+++ b/Tools/machines/perlmutter-nersc/perlmutter_gpu.sbatch
@@ -17,24 +17,26 @@
# A100 80GB (256 nodes)
#S BATCH -C gpu&hbm80g
#SBATCH --exclusive
+# ideally single:1, but NERSC cgroups issue
#SBATCH --gpu-bind=none
+#SBATCH --ntasks-per-node=4
#SBATCH --gpus-per-node=4
#SBATCH -o WarpX.o%j
#SBATCH -e WarpX.e%j
# executable & inputs file or python interpreter & PICMI script here
EXE=./warpx
-INPUTS=inputs_small
+INPUTS=inputs
# pin to closest NIC to GPU
export MPICH_OFI_NIC_POLICY=GPU
# threads for OpenMP and threaded compressors per MPI rank
+# note: 16 avoids hyperthreading (32 virtual cores, 16 physical)
export SRUN_CPUS_PER_TASK=16
-# depends on https://github.com/ECP-WarpX/WarpX/issues/2009
-#GPU_AWARE_MPI="amrex.the_arena_is_managed=0 amrex.use_gpu_aware_mpi=1"
-GPU_AWARE_MPI=""
+# GPU-aware MPI optimizations
+GPU_AWARE_MPI="amrex.use_gpu_aware_mpi=1"
# CUDA visible devices are ordered inverse to local task IDs
# Reference: nvidia-smi topo -m