diff options
Diffstat (limited to 'Examples/Tests/gpu_test')
-rw-r--r-- | Examples/Tests/gpu_test/inputs | 69 | ||||
-rwxr-xr-x | Examples/Tests/gpu_test/script.sh | 48 |
2 files changed, 117 insertions, 0 deletions
diff --git a/Examples/Tests/gpu_test/inputs b/Examples/Tests/gpu_test/inputs new file mode 100644 index 000000000..18bb80d26 --- /dev/null +++ b/Examples/Tests/gpu_test/inputs @@ -0,0 +1,69 @@ +# Maximum number of time steps +max_step = 10 + +# number of grid points +amr.n_cell = 64 64 64 + +# Maximum allowable size of each subdomain in the problem domain; +# this is used to decompose the domain for parallel calculations. +amr.max_grid_size = 64 + +# Maximum level in hierarchy (for now must be 0, i.e., one level in total) +amr.max_level = 0 + +# Geometry +geometry.coord_sys = 0 # 0: Cartesian +geometry.is_periodic = 1 1 1 # Is periodic? +geometry.prob_lo = -20.e-6 -20.e-6 -20.e-6 # physical domain +geometry.prob_hi = 20.e-6 20.e-6 20.e-6 + +warpx.serialize_ics = 1 +warpx.do_pml = 0 + +# Verbosity +warpx.verbose = 1 + +# Algorithms +algo.current_deposition = 3 +algo.charge_deposition = 0 +algo.field_gathering = 1 +algo.particle_pusher = 0 + +interpolation.nox = 1 +interpolation.noy = 1 +interpolation.noz = 1 + +particles.do_tiling = 0 + +# CFL +warpx.cfl = 1.0 + +amr.plot_int = -10 + +particles.nspecies = 1 +particles.species_names = electrons + +electrons.charge = -q_e +electrons.mass = m_e +electrons.injection_style = "NUniformPerCell" +electrons.num_particles_per_cell_each_dim = 2 2 2 +electrons.profile = constant +electrons.density = 1.e25 # number of electrons per m^3 +electrons.momentum_distribution_type = "gaussian" +electrons.ux_th = 0.01 # uth the std of the (unitless) momentum +electrons.uy_th = 0.01 # uth the std of the (unitless) momentum +electrons.uz_th = 0.01 # uth the std of the (unitless) momentum +electrons.uz_m = 10. # Mean momentum along z (unitless) + +# Laser +warpx.use_laser = 1 +laser.profile = Gaussian +laser.position = 0. 0. 0.e-6 # This point is on the laser plane +laser.direction = 0. 0. 1. # The plane normal direction +laser.polarization = 1. 0. 0. # The main polarization vector +laser.e_max = 16.e12 # Maximum amplitude of the laser field (in V/m) +laser.profile_waist = 3.e-6 # The waist of the laser (in meters) +laser.profile_duration = 15.e-15 # The duration of the laser (in seconds) +laser.profile_t_peak = 30.e-15 # The time at which the laser reaches its peak (in seconds) +laser.profile_focal_distance = 100.e-6 # Focal distance from the antenna (in meters) +laser.wavelength = 0.8e-6 # The wavelength of the laser (in meters) diff --git a/Examples/Tests/gpu_test/script.sh b/Examples/Tests/gpu_test/script.sh new file mode 100755 index 000000000..cd6b0eadd --- /dev/null +++ b/Examples/Tests/gpu_test/script.sh @@ -0,0 +1,48 @@ +#!/bin/bash +#BSUB -P GEN109 +#BSUB -W 0:10 +#BSUB -nnodes 1 +#BSUB -J WarpX +#BSUB -o WarpXo.%J +#BSUB -e WarpXe.%J + +module load pgi +module load cuda/9.1.85 +module list +set -x + +omp=1 +export OMP_NUM_THREADS=${omp} +#EXE="../main3d.pgi.DEBUG.TPROF.MPI.ACC.CUDA.ex" +EXE="../main3d.pgi.TPROF.MPI.ACC.CUDA.ex" +#JSRUN="jsrun -n 4 -a 1 -g 1 -c 1 --bind=packed:${omp} " +#JSRUN="jsrun -n 1 -a 4 -g 4 -c 4 --bind=packed:${omp} " +JSRUN="jsrun -n 1 -a 1 -g 1 -c 1 --bind=packed:${omp} " + +rundir="${LSB_JOBNAME}-${LSB_JOBID}" +mkdir $rundir +cp $0 $rundir +cp inputs $rundir +cd $rundir + +# 1. Run normally +${JSRUN} --smpiargs="-gpu" ${EXE} inputs + +# 2. Run under cuda-memcheck +# ${JSRUN} --smpiargs="-gpu" cuda-memcheck ${EXE} inputs &> memcheck.txt + +# 3. Run under nvprof and direct all stdout and stderr to nvprof.txt +#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes ${EXE} inputs &> nvprof.txt + +# 4. Run under nvprof and store performance data in a nvvp file +# Can be converted to text using nvprof -i nvprof-timeline-%p.nvvp +#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes -o nvprof-timeline-%p.nvvp ${EXE} inputs + +# COLLECT PERFORMANCE METRICS - THIS IS MUCH SLOWER. Set nsteps=2 in the inputs files +# 5. Run under nvprof and collect metrics for a subset of kernels +#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes --kernels '(deposit_current|gather_\w+_field|push_\w+_boris)' --analysis-metrics -o nvprof-metrics-kernel-%p.nvvp ${EXE} inputs + +# 6. Run under nvprof and collect metrics for all kernels -- much slower! +#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes --analysis-metrics -o nvprof-metrics-%p.nvvp ${EXE} inputs + +cp ../WarpX*.${LSB_JOBID} . |