diff options
Diffstat (limited to 'Examples/Tests/gpu_test')
-rw-r--r-- | Examples/Tests/gpu_test/inputs | 2 | ||||
-rwxr-xr-x | Examples/Tests/gpu_test/script_profiling.sh | 48 |
2 files changed, 1 insertions, 49 deletions
diff --git a/Examples/Tests/gpu_test/inputs b/Examples/Tests/gpu_test/inputs index e4ae27469..9cb6b819b 100644 --- a/Examples/Tests/gpu_test/inputs +++ b/Examples/Tests/gpu_test/inputs @@ -25,7 +25,7 @@ warpx.verbose = 1 # Algorithms algo.current_deposition = direct -algo.field_gathering = standard +algo.field_gathering = energy-conserving interpolation.nox = 1 interpolation.noy = 1 diff --git a/Examples/Tests/gpu_test/script_profiling.sh b/Examples/Tests/gpu_test/script_profiling.sh deleted file mode 100755 index cd6b0eadd..000000000 --- a/Examples/Tests/gpu_test/script_profiling.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash -#BSUB -P GEN109 -#BSUB -W 0:10 -#BSUB -nnodes 1 -#BSUB -J WarpX -#BSUB -o WarpXo.%J -#BSUB -e WarpXe.%J - -module load pgi -module load cuda/9.1.85 -module list -set -x - -omp=1 -export OMP_NUM_THREADS=${omp} -#EXE="../main3d.pgi.DEBUG.TPROF.MPI.ACC.CUDA.ex" -EXE="../main3d.pgi.TPROF.MPI.ACC.CUDA.ex" -#JSRUN="jsrun -n 4 -a 1 -g 1 -c 1 --bind=packed:${omp} " -#JSRUN="jsrun -n 1 -a 4 -g 4 -c 4 --bind=packed:${omp} " -JSRUN="jsrun -n 1 -a 1 -g 1 -c 1 --bind=packed:${omp} " - -rundir="${LSB_JOBNAME}-${LSB_JOBID}" -mkdir $rundir -cp $0 $rundir -cp inputs $rundir -cd $rundir - -# 1. Run normally -${JSRUN} --smpiargs="-gpu" ${EXE} inputs - -# 2. Run under cuda-memcheck -# ${JSRUN} --smpiargs="-gpu" cuda-memcheck ${EXE} inputs &> memcheck.txt - -# 3. Run under nvprof and direct all stdout and stderr to nvprof.txt -#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes ${EXE} inputs &> nvprof.txt - -# 4. Run under nvprof and store performance data in a nvvp file -# Can be converted to text using nvprof -i nvprof-timeline-%p.nvvp -#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes -o nvprof-timeline-%p.nvvp ${EXE} inputs - -# COLLECT PERFORMANCE METRICS - THIS IS MUCH SLOWER. Set nsteps=2 in the inputs files -# 5. Run under nvprof and collect metrics for a subset of kernels -#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes --kernels '(deposit_current|gather_\w+_field|push_\w+_boris)' --analysis-metrics -o nvprof-metrics-kernel-%p.nvvp ${EXE} inputs - -# 6. Run under nvprof and collect metrics for all kernels -- much slower! -#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes --analysis-metrics -o nvprof-metrics-%p.nvvp ${EXE} inputs - -cp ../WarpX*.${LSB_JOBID} . |