diff options
-rw-r--r-- | Tools/machines/perlmutter-nersc/perlmutter_gpu.sbatch | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/Tools/machines/perlmutter-nersc/perlmutter_gpu.sbatch b/Tools/machines/perlmutter-nersc/perlmutter_gpu.sbatch index 8432b2eb9..873fd3035 100644 --- a/Tools/machines/perlmutter-nersc/perlmutter_gpu.sbatch +++ b/Tools/machines/perlmutter-nersc/perlmutter_gpu.sbatch @@ -17,24 +17,26 @@ # A100 80GB (256 nodes) #S BATCH -C gpu&hbm80g #SBATCH --exclusive +# ideally single:1, but NERSC cgroups issue #SBATCH --gpu-bind=none +#SBATCH --ntasks-per-node=4 #SBATCH --gpus-per-node=4 #SBATCH -o WarpX.o%j #SBATCH -e WarpX.e%j # executable & inputs file or python interpreter & PICMI script here EXE=./warpx -INPUTS=inputs_small +INPUTS=inputs # pin to closest NIC to GPU export MPICH_OFI_NIC_POLICY=GPU # threads for OpenMP and threaded compressors per MPI rank +# note: 16 avoids hyperthreading (32 virtual cores, 16 physical) export SRUN_CPUS_PER_TASK=16 -# depends on https://github.com/ECP-WarpX/WarpX/issues/2009 -#GPU_AWARE_MPI="amrex.the_arena_is_managed=0 amrex.use_gpu_aware_mpi=1" -GPU_AWARE_MPI="" +# GPU-aware MPI optimizations +GPU_AWARE_MPI="amrex.use_gpu_aware_mpi=1" # CUDA visible devices are ordered inverse to local task IDs # Reference: nvidia-smi topo -m |