diff options
48 files changed, 2053 insertions, 757 deletions
diff --git a/Docs/source/building/building.rst b/Docs/source/building/building.rst index 5913f283d..33357a4b6 100644 --- a/Docs/source/building/building.rst +++ b/Docs/source/building/building.rst @@ -107,3 +107,4 @@ Advanced building instructions spectral cori summitdev + spack diff --git a/Docs/source/building/spack.rst b/Docs/source/building/spack.rst new file mode 100644 index 000000000..d54c6cc10 --- /dev/null +++ b/Docs/source/building/spack.rst @@ -0,0 +1,47 @@ +Building WarpX with Spack +=============================== + +WarpX can be installed using Spack. From the Spack web page: "Spack is a package management tool designed to support multiple +versions and configurations of software on a wide variety of platforms and environments." + +Spack is available from `github <https://github.com/spack/spack>`__. Spack only needs to be cloned and can be used right away - there are no installation +steps. The spack command, "spack/bin/spack", can be used directly or "spack/bin" can be added to your execute path. + +WarpX is built with the single command + +:: + + spack install warpx + +This will build the 3-D version of WarpX using the master branch. +At the very end of the output from build sequence, Spack tells you where the WarpX executable has been placed. +Alternatively, the "spack load" command can be configured so that "spack load warpx" will put the executable in your execute path. + +To build using the dev branch, the command is + +:: + + spack install warpx@dev + + +Other variants of WarpX can be installed, for example + +:: + + spack install warpx dims=2 + +will build the 2-D version. + +:: + + spack install warpx debug=True + +will build with debugging turned on. + +:: + + spack install warpx %intel + +will build using the intel compiler (instead of gcc). + +The Python verson of WarpX is not yet available with Spack. diff --git a/Docs/source/building/summitdev.rst b/Docs/source/building/summitdev.rst index e194db73c..c5a3a7034 100644 --- a/Docs/source/building/summitdev.rst +++ b/Docs/source/building/summitdev.rst @@ -13,7 +13,7 @@ correct branch: git clone https://github.com/ECP-WarpX/WarpX.git cd WarpX - git checkout gpu + git checkout dev cd .. git clone https://bitbucket.org/berkeleylab/picsar.git @@ -33,4 +33,4 @@ Then, use the following set of commands to compile: module load pgi module load cuda - make + make -j 4 USE_GPU=TRUE COMP=pgi diff --git a/Docs/source/conf.py b/Docs/source/conf.py index 1d4b97c49..ce6f436d3 100644 --- a/Docs/source/conf.py +++ b/Docs/source/conf.py @@ -88,6 +88,8 @@ todo_include_todos = False # html_theme = 'sphinx_rtd_theme' +numfig = True + # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. diff --git a/Docs/source/running_cpp/parameters.rst b/Docs/source/running_cpp/parameters.rst index 3c579d6fd..e0a0ac2b2 100644 --- a/Docs/source/running_cpp/parameters.rst +++ b/Docs/source/running_cpp/parameters.rst @@ -376,11 +376,10 @@ Numerics and algorithms - ``2``: Direct deposition, vectorized - ``3``: Direct deposition, non-optimized - .. warning :: + .. warning:: - The vectorized Esirkepov deposition - (``algo.current_deposition=0``) is currently not functional in WarpX. - All the other methods (``1``, ``2`` and ``3``) are functional. + On GPU, use ``algo.current_deposition=0`` for Esirkepov + or ``3`` for direct deposition. * ``algo.charge_deposition`` (`integer`) The algorithm for the charge density deposition: @@ -393,6 +392,10 @@ Numerics and algorithms - ``0``: Vectorized version - ``1``: Non-optimized version + .. warning:: + + The vectorized version does not run on GPU. Use + ``algo.field_gather=1`` when running on GPU. * ``algo.particle_pusher`` (`integer`) The algorithm for the particle pusher: @@ -470,6 +473,9 @@ Diagnostics and output Only used when mesh refinement is activated and ``warpx.plot_raw_fields`` is ``1``. Whether to output the data of the coarse patch, in the plot files. +* ``amr.plot_file`` (`string`) + Root for output file names. Supports sub-directories. Default `plotfiles/plt` + Checkpoints and restart ----------------------- WarpX supports checkpoints/restart via AMReX. diff --git a/Docs/source/visualization/sensei.rst b/Docs/source/visualization/sensei.rst index 4881f9d78..59f879e47 100644 --- a/Docs/source/visualization/sensei.rst +++ b/Docs/source/visualization/sensei.rst @@ -233,19 +233,6 @@ To access the SENSEI modulefiles on cori first add the SENSEI install to the sea module use /usr/common/software/sensei/modulefiles -Examples -=================== - -2D LPA Example --------------- - -* :download:`input file<./inputs.2d>` -* :download:`xml file<./ez2d.xml>` -* :download:`session file<./ez2d.session>` -* :E field screen shot at time step 40 -.. figure:: ez2d_00040.png - :alt: picture - 3D LPA Example -------------- This section shows an example of using SENSEI and three different back ends on diff --git a/Examples/Physics_applications/laser_acceleration/laser_acceleration_PICMI.py b/Examples/Physics_applications/laser_acceleration/laser_acceleration_PICMI.py index 77cd2e176..867e7a951 100644 --- a/Examples/Physics_applications/laser_acceleration/laser_acceleration_PICMI.py +++ b/Examples/Physics_applications/laser_acceleration/laser_acceleration_PICMI.py @@ -95,6 +95,20 @@ solver = picmi.ElectromagneticSolver(grid=grid, method='CKC', cfl=1.) ########################## +# diagnostics +########################## + +field_diag1 = picmi.FieldDiagnostic(grid = grid, + period = 100, + warpx_plot_raw_fields = 1, + warpx_plot_raw_fields_guards = 1, + warpx_plot_finepatch = 1, + warpx_plot_crsepatch = 1) + +part_diag1 = picmi.ParticleDiagnostic(period = 100, + species = [electrons]) + +########################## # simulation setup ########################## @@ -102,7 +116,6 @@ sim = picmi.Simulation(solver = solver, max_steps = max_steps, verbose = 1, cfl = 1.0, - warpx_plot_int = 100, warpx_current_deposition_algo = 3, warpx_charge_deposition_algo = 0, warpx_field_gathering_algo = 0, @@ -112,6 +125,8 @@ sim.add_species(electrons, layout=picmi.GriddedLayout(grid=grid, n_macroparticle sim.add_laser(laser, injection_method=laser_antenna) +sim.add_diagnostic(field_diag1) +sim.add_diagnostic(part_diag1) ########################## # simulation run diff --git a/Examples/Tests/Langmuir/inputs.multi.2d.rt b/Examples/Tests/Langmuir/inputs.multi.2d.rt index 6f301a7c2..ce82fd1c7 100644 --- a/Examples/Tests/Langmuir/inputs.multi.2d.rt +++ b/Examples/Tests/Langmuir/inputs.multi.2d.rt @@ -25,7 +25,7 @@ warpx.serialize_ics = 1 warpx.verbose = 1 # Algorithms -algo.current_deposition = 3 +algo.current_deposition = 0 algo.charge_deposition = 0 algo.field_gathering = 1 algo.particle_pusher = 0 diff --git a/Examples/Tests/Langmuir/inputs.rt b/Examples/Tests/Langmuir/inputs.rt index ec265bca7..3ec8fc47d 100644 --- a/Examples/Tests/Langmuir/inputs.rt +++ b/Examples/Tests/Langmuir/inputs.rt @@ -25,7 +25,7 @@ warpx.serialize_ics = 1 warpx.verbose = 1 # Algorithms -algo.current_deposition = 3 +algo.current_deposition = 0 algo.charge_deposition = 0 algo.field_gathering = 1 algo.particle_pusher = 0 diff --git a/Examples/Tests/Langmuir/langmuir_PICMI_rt.py b/Examples/Tests/Langmuir/langmuir_PICMI_rt.py new file mode 100644 index 000000000..dab414b3b --- /dev/null +++ b/Examples/Tests/Langmuir/langmuir_PICMI_rt.py @@ -0,0 +1,48 @@ +# --- Simple example of Langmuir oscillations in a uniform plasma + +import numpy as np +from pywarpx import picmi + +nx = 64 +ny = 64 +nz = 64 + +xmin = -20.e-6 +ymin = -20.e-6 +zmin = -20.e-6 +xmax = +20.e-6 +ymax = +20.e-6 +zmax = +20.e-6 + +uniform_plasma = picmi.UniformDistribution(density=1.e25, upper_bound=[0., None, None], directed_velocity=[0.1, 0., 0.]) + +electrons = picmi.Species(particle_type='electron', name='electrons', initial_distribution=uniform_plasma) + +grid = picmi.Cartesian3DGrid(number_of_cells = [nx, ny, nz], + lower_bound = [xmin, ymin, zmin], + upper_bound = [xmax, ymax, zmax], + lower_boundary_conditions = ['periodic', 'periodic', 'periodic'], + upper_boundary_conditions = ['periodic', 'periodic', 'periodic'], + moving_window_velocity = [0., 0., 0.], + warpx_max_grid_size=32, warpx_coord_sys=0) + +solver = picmi.ElectromagneticSolver(grid=grid, cfl=1.) + +sim = picmi.Simulation(solver = solver, + max_steps = 40, + verbose = 1, + warpx_plot_int = 40, + warpx_current_deposition_algo = 3, + warpx_charge_deposition_algo = 0, + warpx_field_gathering_algo = 0, + warpx_particle_pusher_algo = 0) + +sim.add_species(electrons, layout=picmi.GriddedLayout(n_macroparticle_per_cell=[2,2,2], grid=grid)) + +# write_inputs will create an inputs file that can be used to run +# with the compiled version. +sim.write_input_file(file_name='inputs_from_PICMI') + +# Alternatively, sim.step will run WarpX, controlling it from Python +sim.step() + diff --git a/Examples/Tests/PML/analysis_pml_ckc.py b/Examples/Tests/PML/analysis_pml_ckc.py new file mode 100755 index 000000000..90d13cecc --- /dev/null +++ b/Examples/Tests/PML/analysis_pml_ckc.py @@ -0,0 +1,34 @@ +#! /usr/bin/env python + +import sys +import yt ; yt.funcs.mylog.setLevel(0) +import numpy as np +import scipy.constants as scc + +filename = sys.argv[1] + +############################ +### INITIAL LASER ENERGY ### +############################ +energy_start = 9.1301289517e-08 + +########################## +### FINAL LASER ENERGY ### +########################## +ds = yt.load( filename ) +all_data_level_0 = ds.covering_grid(level=0,left_edge=ds.domain_left_edge, dims=ds.domain_dimensions) +Bx = all_data_level_0['boxlib', 'Bx'].v.squeeze() +By = all_data_level_0['boxlib', 'By'].v.squeeze() +Bz = all_data_level_0['boxlib', 'Bz'].v.squeeze() +Ex = all_data_level_0['boxlib', 'Ex'].v.squeeze() +Ey = all_data_level_0['boxlib', 'Ey'].v.squeeze() +Ez = all_data_level_0['boxlib', 'Ez'].v.squeeze() +energyE = np.sum(scc.epsilon_0/2*(Ex**2+Ey**2+Ez**2)) +energyB = np.sum(1./scc.mu_0/2*(Bx**2+By**2+Bz**2)) +energy_end = energyE + energyB + +Reflectivity = energy_end/energy_start +Reflectivity_theory = 1.8015e-06 + +assert( abs(Reflectivity-Reflectivity_theory) < 5./100 * Reflectivity_theory ) + diff --git a/Examples/Tests/PML/analysis_pml_yee.py b/Examples/Tests/PML/analysis_pml_yee.py new file mode 100755 index 000000000..6234cd5d2 --- /dev/null +++ b/Examples/Tests/PML/analysis_pml_yee.py @@ -0,0 +1,34 @@ +#! /usr/bin/env python + +import sys +import yt ; yt.funcs.mylog.setLevel(0) +import numpy as np +import scipy.constants as scc + +filename = sys.argv[1] + +############################ +### INITIAL LASER ENERGY ### +############################ +energy_start = 9.1301289517e-08 + +########################## +### FINAL LASER ENERGY ### +########################## +ds = yt.load( filename ) +all_data_level_0 = ds.covering_grid(level=0,left_edge=ds.domain_left_edge, dims=ds.domain_dimensions) +Bx = all_data_level_0['boxlib', 'Bx'].v.squeeze() +By = all_data_level_0['boxlib', 'By'].v.squeeze() +Bz = all_data_level_0['boxlib', 'Bz'].v.squeeze() +Ex = all_data_level_0['boxlib', 'Ex'].v.squeeze() +Ey = all_data_level_0['boxlib', 'Ey'].v.squeeze() +Ez = all_data_level_0['boxlib', 'Ez'].v.squeeze() +energyE = np.sum(scc.epsilon_0/2*(Ex**2+Ey**2+Ez**2)) +energyB = np.sum(1./scc.mu_0/2*(Bx**2+By**2+Bz**2)) +energy_end = energyE + energyB + +Reflectivity = energy_end/energy_start +Reflectivity_theory = 5.683000058954201e-07 + +assert( abs(Reflectivity-Reflectivity_theory) < 5./100 * Reflectivity_theory ) + diff --git a/Examples/Tests/gpu_test/inputs b/Examples/Tests/gpu_test/inputs new file mode 100644 index 000000000..18bb80d26 --- /dev/null +++ b/Examples/Tests/gpu_test/inputs @@ -0,0 +1,69 @@ +# Maximum number of time steps +max_step = 10 + +# number of grid points +amr.n_cell = 64 64 64 + +# Maximum allowable size of each subdomain in the problem domain; +# this is used to decompose the domain for parallel calculations. +amr.max_grid_size = 64 + +# Maximum level in hierarchy (for now must be 0, i.e., one level in total) +amr.max_level = 0 + +# Geometry +geometry.coord_sys = 0 # 0: Cartesian +geometry.is_periodic = 1 1 1 # Is periodic? +geometry.prob_lo = -20.e-6 -20.e-6 -20.e-6 # physical domain +geometry.prob_hi = 20.e-6 20.e-6 20.e-6 + +warpx.serialize_ics = 1 +warpx.do_pml = 0 + +# Verbosity +warpx.verbose = 1 + +# Algorithms +algo.current_deposition = 3 +algo.charge_deposition = 0 +algo.field_gathering = 1 +algo.particle_pusher = 0 + +interpolation.nox = 1 +interpolation.noy = 1 +interpolation.noz = 1 + +particles.do_tiling = 0 + +# CFL +warpx.cfl = 1.0 + +amr.plot_int = -10 + +particles.nspecies = 1 +particles.species_names = electrons + +electrons.charge = -q_e +electrons.mass = m_e +electrons.injection_style = "NUniformPerCell" +electrons.num_particles_per_cell_each_dim = 2 2 2 +electrons.profile = constant +electrons.density = 1.e25 # number of electrons per m^3 +electrons.momentum_distribution_type = "gaussian" +electrons.ux_th = 0.01 # uth the std of the (unitless) momentum +electrons.uy_th = 0.01 # uth the std of the (unitless) momentum +electrons.uz_th = 0.01 # uth the std of the (unitless) momentum +electrons.uz_m = 10. # Mean momentum along z (unitless) + +# Laser +warpx.use_laser = 1 +laser.profile = Gaussian +laser.position = 0. 0. 0.e-6 # This point is on the laser plane +laser.direction = 0. 0. 1. # The plane normal direction +laser.polarization = 1. 0. 0. # The main polarization vector +laser.e_max = 16.e12 # Maximum amplitude of the laser field (in V/m) +laser.profile_waist = 3.e-6 # The waist of the laser (in meters) +laser.profile_duration = 15.e-15 # The duration of the laser (in seconds) +laser.profile_t_peak = 30.e-15 # The time at which the laser reaches its peak (in seconds) +laser.profile_focal_distance = 100.e-6 # Focal distance from the antenna (in meters) +laser.wavelength = 0.8e-6 # The wavelength of the laser (in meters) diff --git a/Examples/Tests/gpu_test/script.sh b/Examples/Tests/gpu_test/script.sh new file mode 100755 index 000000000..cd6b0eadd --- /dev/null +++ b/Examples/Tests/gpu_test/script.sh @@ -0,0 +1,48 @@ +#!/bin/bash +#BSUB -P GEN109 +#BSUB -W 0:10 +#BSUB -nnodes 1 +#BSUB -J WarpX +#BSUB -o WarpXo.%J +#BSUB -e WarpXe.%J + +module load pgi +module load cuda/9.1.85 +module list +set -x + +omp=1 +export OMP_NUM_THREADS=${omp} +#EXE="../main3d.pgi.DEBUG.TPROF.MPI.ACC.CUDA.ex" +EXE="../main3d.pgi.TPROF.MPI.ACC.CUDA.ex" +#JSRUN="jsrun -n 4 -a 1 -g 1 -c 1 --bind=packed:${omp} " +#JSRUN="jsrun -n 1 -a 4 -g 4 -c 4 --bind=packed:${omp} " +JSRUN="jsrun -n 1 -a 1 -g 1 -c 1 --bind=packed:${omp} " + +rundir="${LSB_JOBNAME}-${LSB_JOBID}" +mkdir $rundir +cp $0 $rundir +cp inputs $rundir +cd $rundir + +# 1. Run normally +${JSRUN} --smpiargs="-gpu" ${EXE} inputs + +# 2. Run under cuda-memcheck +# ${JSRUN} --smpiargs="-gpu" cuda-memcheck ${EXE} inputs &> memcheck.txt + +# 3. Run under nvprof and direct all stdout and stderr to nvprof.txt +#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes ${EXE} inputs &> nvprof.txt + +# 4. Run under nvprof and store performance data in a nvvp file +# Can be converted to text using nvprof -i nvprof-timeline-%p.nvvp +#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes -o nvprof-timeline-%p.nvvp ${EXE} inputs + +# COLLECT PERFORMANCE METRICS - THIS IS MUCH SLOWER. Set nsteps=2 in the inputs files +# 5. Run under nvprof and collect metrics for a subset of kernels +#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes --kernels '(deposit_current|gather_\w+_field|push_\w+_boris)' --analysis-metrics -o nvprof-metrics-kernel-%p.nvvp ${EXE} inputs + +# 6. Run under nvprof and collect metrics for all kernels -- much slower! +#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes --analysis-metrics -o nvprof-metrics-%p.nvvp ${EXE} inputs + +cp ../WarpX*.${LSB_JOBID} . diff --git a/GNUmakefile b/GNUmakefile index 01b9f03ef..06f4c6229 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -10,6 +10,7 @@ DIM = 3 COMP = gcc #COMP = intel +#COMP = pgi TINY_PROFILE = TRUE #PROFILE = TRUE @@ -19,6 +20,7 @@ TINY_PROFILE = TRUE STORE_OLD_PARTICLE_ATTRIBS = FALSE USE_OMP = TRUE +USE_GPU = FALSE EBASE = main diff --git a/Python/pywarpx/Bucket.py b/Python/pywarpx/Bucket.py index c73b3dac9..66494a700 100644 --- a/Python/pywarpx/Bucket.py +++ b/Python/pywarpx/Bucket.py @@ -20,6 +20,10 @@ class Bucket(object): except KeyError: return object.__getattr__(self, name) + def check_consistency(self, vname, value, errmsg): + if vname in self.argvattrs: + assert (self.argvattrs[vname] is None) or (self.argvattrs[vname] == value), Exception(errmsg) + def attrlist(self): "Concatenate the attributes into a string" result = [] diff --git a/Python/pywarpx/WarpX.py b/Python/pywarpx/WarpX.py index 9c4a4b380..f58d4f111 100644 --- a/Python/pywarpx/WarpX.py +++ b/Python/pywarpx/WarpX.py @@ -70,7 +70,7 @@ class WarpX(Bucket): argv = self.create_argv_list() with open(filename, 'w') as ff: - for k, v in kw.iteritems(): + for k, v in kw.items(): ff.write('{0} = {1}\n'.format(k, v)) for arg in argv: diff --git a/Python/pywarpx/picmi.py b/Python/pywarpx/picmi.py index 1d02f8b29..81b1543bb 100644 --- a/Python/pywarpx/picmi.py +++ b/Python/pywarpx/picmi.py @@ -462,6 +462,9 @@ class Simulation(picmistandard.PICMI_Simulation): self.lasers[i].initialize_inputs() self.laser_injection_methods[i].initialize_inputs(self.lasers[i]) + for diagnostic in self.diagnostics: + diagnostic.initialize_inputs() + def initialize_warpx(self): if self.warpx_initialized: return @@ -492,3 +495,90 @@ class Simulation(picmistandard.PICMI_Simulation): if self.warpx_initialized: self.warpx_initialized = False pywarpx.warpx.finalize() + + +# ---------------------------- +# Simulation frame diagnostics +# ---------------------------- + + +class FieldDiagnostic(picmistandard.PICMI_FieldDiagnostic): + def init(self, kw): + + self.plot_raw_fields = kw.pop('warpx_plot_raw_fields', None) + self.plot_raw_fields_guards = kw.pop('warpx_plot_raw_fields_guards', None) + self.plot_finepatch = kw.pop('warpx_plot_finepatch', None) + self.plot_crsepatch = kw.pop('warpx_plot_crsepatch', None) + + def initialize_inputs(self): + # --- For now, the period must be the same as plot_int if set + pywarpx.amr.check_consistency('plot_int', self.period, 'The period must be the same for all simulation frame diagnostics') + pywarpx.amr.plot_int = self.period + + if 'rho' in self.data_list: + pywarpx.warpx.plot_rho = 1 + if 'dive' in self.data_list: + pywarpx.warpx.plot_dive = 1 + if 'divb' in self.data_list: + pywarpx.warpx.plot_divb = 1 + if 'F' in self.data_list: + pywarpx.warpx.plot_F = 1 + if 'proc_number' in self.data_list: + pywarpx.warpx.plot_proc_number = 1 + + pywarpx.warpx.plot_raw_fields = self.plot_raw_fields + pywarpx.warpx.plot_raw_fields_guards = self.plot_raw_fields_guards + + pywarpx.amr.check_consistency('plot_finepatch', self.plot_finepatch, 'The fine patch flag must be the same for all simulation frame field diagnostics') + pywarpx.amr.check_consistency('plot_crsepatch', self.plot_crsepatch, 'The coarse patch flag must be the same for all simulation frame field diagnostics') + pywarpx.warpx.plot_finepatch = self.plot_finepatch + pywarpx.warpx.plot_crsepatch = self.plot_crsepatch + +class ElectrostaticFieldDiagnostic(picmistandard.PICMI_ElectrostaticFieldDiagnostic): + def initialize_inputs(self): + # --- For now, the period must be the same as plot_int if set + pywarpx.amr.check_consistency('plot_int', self.period, 'The period must be the same for all simulation frame diagnostics') + pywarpx.amr.plot_int = self.period + + +class ParticleDiagnostic(picmistandard.PICMI_ParticleDiagnostic): + def initialize_inputs(self): + # --- For now, the period must be the same as plot_int if set + pywarpx.amr.check_consistency('plot_int', self.period, 'The period must be the same for all simulation frame diagnostics') + pywarpx.amr.plot_int = self.period + + if 'part_per_cell' in self.data_list: + pywarpx.warpx.plot_part_per_cell = 1 + if 'part_per_grid' in self.data_list: + pywarpx.warpx.plot_part_per_grid = 1 + if 'part_per_proc' in self.data_list: + pywarpx.warpx.plot_part_per_proc = 1 + + +# ---------------------------- +# Lab frame diagnostics +# ---------------------------- + + +class LabFrameFieldDiagnostic(picmistandard.PICMI_LabFrameFieldDiagnostic): + def initialize_inputs(self): + + pywarpx.warpx.check_consistency('num_snapshots_lab', self.num_snapshots, 'The number of snapshots must be the same in all lab frame diagnostics') + pywarpx.warpx.check_consistency('dt_snapshots_lab', self.dt_snapshots, 'The time between snapshots must be the same in all lab frame diagnostics') + + pywarpx.warpx.do_boosted_frame_diagnostic = 1 + pywarpx.warpx.num_snapshots_lab = self.num_snapshots + pywarpx.warpx.dt_snapshots_lab = self.dt_snapshots + pywarpx.warpx.do_boosted_frame_fields = 1 + + +class LabFrameParticleDiagnostic(picmistandard.PICMI_LabFrameParticleDiagnostic): + def initialize_inputs(self): + + pywarpx.warpx.check_consistency('num_snapshots_lab', self.num_snapshots, 'The number of snapshots must be the same in all lab frame diagnostics') + pywarpx.warpx.check_consistency('dt_snapshots_lab', self.dt_snapshots, 'The time between snapshots must be the same in all lab frame diagnostics') + + pywarpx.warpx.do_boosted_frame_diagnostic = 1 + pywarpx.warpx.num_snapshots_lab = self.num_snapshots + pywarpx.warpx.dt_snapshots_lab = self.dt_snapshots + pywarpx.warpx.do_boosted_frame_particles = 1 @@ -6,3 +6,4 @@ WarpX is an advanced electromagnetic Particle-In-Cell code. It supports many features including Perfectly-Matched Layers (PML), mesh refinement, and the boosted-frame technique. In order to learn how to install and run the code, please see the online documentation: https://ecp-warpx.github.io/index.html + diff --git a/Regression/WarpX-tests.ini b/Regression/WarpX-tests.ini index 14af09277..60647e89b 100644 --- a/Regression/WarpX-tests.ini +++ b/Regression/WarpX-tests.ini @@ -60,8 +60,9 @@ branch = master [pml_x_yee] buildDir = . inputFile = Examples/Tests/PML/inputs2d -runtime_params = warpx.do_dynamic_scheduling=0 warpx.maxwell_fdtd_solver=yee +runtime_params = warpx.do_dynamic_scheduling=0 algo.maxwell_fdtd_solver=yee dim = 2 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 2 @@ -69,13 +70,14 @@ useOMP = 1 numthreads = 2 compileTest = 0 doVis = 0 -analysisRoutine = Examples/Tests/PML/analysis_pml.py +analysisRoutine = Examples/Tests/PML/analysis_pml_yee.py [pml_x_ckc] buildDir = . inputFile = Examples/Tests/PML/inputs2d -runtime_params = warpx.do_dynamic_scheduling=0 warpx.maxwell_fdtd_solver=ckc +runtime_params = warpx.do_dynamic_scheduling=0 algo.maxwell_fdtd_solver=ckc dim = 2 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 2 @@ -83,13 +85,14 @@ useOMP = 1 numthreads = 2 compileTest = 0 doVis = 0 -analysisRoutine = Examples/Tests/PML/analysis_pml.py +analysisRoutine = Examples/Tests/PML/analysis_pml_ckc.py [nci_corrector] buildDir = . inputFile = Examples/Modules/nci_corrector/inputs2d runtime_params = warpx.do_dynamic_scheduling=0 dim = 2 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 2 @@ -103,6 +106,7 @@ analysisRoutine = Examples/Modules/nci_corrector/ncicorr_analysis.py buildDir = . inputFile = Examples/Tests/Langmuir/inputs.rt dim = 2 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 2 @@ -120,6 +124,7 @@ analysisOutputImage = langmuir2d_analysis.png buildDir = . inputFile = Examples/Tests/Langmuir/inputs.rt dim = 3 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 4 @@ -137,6 +142,7 @@ analysisOutputImage = langmuir_x_analysis.png buildDir = . inputFile = Examples/Tests/Langmuir/inputs.rt dim = 3 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 4 @@ -154,6 +160,7 @@ analysisOutputImage = langmuir_y_analysis.png buildDir = . inputFile = Examples/Tests/Langmuir/inputs.rt dim = 3 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 4 @@ -171,6 +178,7 @@ analysisOutputImage = langmuir_z_analysis.png buildDir = . inputFile = Examples/Tests/Langmuir/inputs.multi.rt dim = 3 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 4 @@ -224,6 +232,7 @@ analysisOutputImage = langmuir_multi_2d_analysis.png buildDir = . inputFile = Examples/Modules/laser_injection/inputs.rt dim = 3 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 4 @@ -239,6 +248,7 @@ analysisOutputImage = laser_analysis.png buildDir = . inputFile = Examples/Modules/laser_injection/inputs.2d.rt dim = 2 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 2 @@ -246,6 +256,7 @@ useOMP = 1 numthreads = 2 compileTest = 0 doVis = 0 +runtime_params = warpx.do_dynamic_scheduling=0 warpx.serialize_ics=1 compareParticles = 0 [LaserAcceleration] @@ -253,6 +264,7 @@ buildDir = . inputFile = Examples/Physics_applications/laser_acceleration/inputs.3d runtime_params = warpx.do_dynamic_scheduling=0 amr.n_cell=32 32 256 max_step=100 electrons.zmin=0.e-6 warpx.serialize_ics=1 dim = 3 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 2 @@ -268,6 +280,7 @@ buildDir = . inputFile = Examples/Tests/subcycling/inputs.2d runtime_params = warpx.serialize_ics=1 warpx.do_dynamic_scheduling=0 dim = 2 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 2 @@ -282,6 +295,7 @@ buildDir = . inputFile = Examples/Physics_applications/laser_acceleration/inputs.2d runtime_params = amr.max_level=1 max_step=100 warpx.serialize_ics=1 dim = 2 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 2 @@ -297,6 +311,7 @@ buildDir = . inputFile = Examples/Physics_applications/plasma_acceleration/inputs.2d runtime_params = amr.max_level=1 amr.n_cell=32 512 max_step=100 plasma_e.zmin=-200.e-6 warpx.serialize_ics=1 warpx.do_dynamic_scheduling=0 dim = 2 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 2 @@ -309,9 +324,10 @@ particleTypes = beam driver plasma_e [Python_Langmuir] buildDir = . -inputFile = Examples/Tests/Langmuir/langmuir_PICMI.py -customRunCmd = python langmuir_PICMI.py +inputFile = Examples/Tests/Langmuir/langmuir_PICMI_rt.py +customRunCmd = python langmuir_PICMI_rt.py dim = 3 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 2 @@ -327,6 +343,7 @@ outputFile = plotfiles/plt00040 buildDir = . inputFile = Examples/Physics_applications/uniform_plasma/inputs.3d dim = 3 +addToCompileString = restartTest = 1 restartFileNum = 6 useMPI = 1 @@ -342,6 +359,7 @@ particleTypes = electrons buildDir = tests/CurrentDeposition inputFile = inputs dim = 3 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 1 @@ -355,6 +373,7 @@ outputFile = plotfiles/plt00000 buildDir = tests/FieldGather inputFile = inputs dim = 3 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 1 @@ -368,6 +387,7 @@ outputFile = plotfiles/plt00000 buildDir = tests/FieldSolver inputFile = inputs dim = 3 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 1 @@ -381,6 +401,7 @@ outputFile = plotfiles/plt00000 buildDir = tests/ParticlePusher inputFile = inputs dim = 3 +addToCompileString = restartTest = 0 useMPI = 1 numprocs = 1 diff --git a/Regression/prepare_file_travis.py b/Regression/prepare_file_travis.py index 6d0e0dfc8..82b3c8e0e 100644 --- a/Regression/prepare_file_travis.py +++ b/Regression/prepare_file_travis.py @@ -1,22 +1,35 @@ # This script modifies `WarpX-test.ini` (which is used for nightly builds) # and creates the file `travis-test.ini` (which is used for continous # integration on TravisCI (https://travis-ci.org/) -# The subtests that are selected are controlled by the environement WARPX_DIM +# The subtests that are selected are controlled by WARPX_TEST_DIM +# The architecture (CPU/GPU) is selected by WARPX_TEST_ARCH import re import os # Get relevant environment variables -dim = os.environ.get('WARPX_DIM', None) +dim = os.environ.get('WARPX_TEST_DIM', None) +arch = os.environ.get('WARPX_TEST_ARCH', 'CPU') + +# Find the directory in which the tests should be run +current_dir = os.getcwd() +test_dir = re.sub('warpx/Regression', '', current_dir ) with open('WarpX-tests.ini') as f: text = f.read() # Replace default folder name -text = re.sub('/home/regtester/AMReX_RegTesting/', '/home/travis/', text) +text = re.sub('/home/regtester/AMReX_RegTesting', test_dir, text) # Add doComparison = 0 for each test text = re.sub( '\[(?P<name>.*)\]\nbuildDir = ', '[\g<name>]\ndoComparison = 0\nbuildDir = ', text ) +# Change compile options when running on GPU +if arch == 'GPU': + text = re.sub( 'addToCompileString =', + 'addToCompileString = USE_GPU=TRUE USE_OMP=FALSE USE_ACC=TRUE', text) + text = re.sub( 'COMP\s*=.*', 'COMP = pgi', text ) +print('Compiling for %s' %arch) + # Use only 2 cores for compiling text = re.sub( 'numMakeJobs = \d+', 'numMakeJobs = 2', text ) diff --git a/Source/LaserParticleContainer.cpp b/Source/LaserParticleContainer.cpp index 8c5d799e3..cf669d32d 100644 --- a/Source/LaserParticleContainer.cpp +++ b/Source/LaserParticleContainer.cpp @@ -91,7 +91,7 @@ LaserParticleContainer::LaserParticleContainer (AmrCore* amr_core, int ispecies) if (WarpX::gamma_boost > 1.) { // Check that the laser direction is equal to the boost direction - AMREX_ALWAYS_ASSERT_WITH_MESSAGE( + AMREX_ALWAYS_ASSERT_WITH_MESSAGE( nvec[0]*WarpX::boost_direction[0] + nvec[1]*WarpX::boost_direction[1] + nvec[2]*WarpX::boost_direction[2] - 1. < 1.e-12, @@ -110,22 +110,22 @@ LaserParticleContainer::LaserParticleContainer (AmrCore* amr_core, int ispecies) p_X = { p_X[0]*s, p_X[1]*s, p_X[2]*s }; Real dp = std::inner_product(nvec.begin(), nvec.end(), p_X.begin(), 0.0); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(std::abs(dp) < 1.0e-14, + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(std::abs(dp) < 1.0e-14, "Laser plane vector is not perpendicular to the main polarization vector"); p_Y = CrossProduct(nvec, p_X); // The second polarization vector s = 1.0/std::sqrt(stc_direction[0]*stc_direction[0] + stc_direction[1]*stc_direction[1] + stc_direction[2]*stc_direction[2]); - stc_direction = { stc_direction[0]*s, stc_direction[1]*s, stc_direction[2]*s }; + stc_direction = { stc_direction[0]*s, stc_direction[1]*s, stc_direction[2]*s }; dp = std::inner_product(nvec.begin(), nvec.end(), stc_direction.begin(), 0.0); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(std::abs(dp) < 1.0e-14, + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(std::abs(dp) < 1.0e-14, "stc_direction is not perpendicular to the laser plane vector"); - + // Get angle between p_X and stc_direction // in 2d, stcs are in the simulation plane #if AMREX_SPACEDIM == 3 - theta_stc = acos(stc_direction[0]*p_X[0] + - stc_direction[1]*p_X[1] + + theta_stc = acos(stc_direction[0]*p_X[0] + + stc_direction[1]*p_X[1] + stc_direction[2]*p_X[2]); #else theta_stc = 0.; @@ -250,7 +250,7 @@ LaserParticleContainer::InitData (int lev) BoxArray plane_ba { Box {IntVect(plane_lo[0],0), IntVect(plane_hi[0],0)} }; #endif - Vector<Real> particle_x, particle_y, particle_z, particle_w; + RealVector particle_x, particle_y, particle_z, particle_w; const DistributionMapping plane_dm {plane_ba, nprocs}; const Vector<int>& procmap = plane_dm.ProcessorMap(); @@ -263,7 +263,7 @@ LaserParticleContainer::InitData (int lev) { const Vector<Real>& pos = Transform(cell[0], cell[1]); #if (AMREX_SPACEDIM == 3) - const Real* x = pos.data(); + const Real* x = pos.dataPtr(); #else const Real x[2] = {pos[0], pos[2]}; #endif @@ -281,15 +281,15 @@ LaserParticleContainer::InitData (int lev) } } const int np = particle_z.size(); - Vector<Real> particle_ux(np, 0.0); - Vector<Real> particle_uy(np, 0.0); - Vector<Real> particle_uz(np, 0.0); + RealVector particle_ux(np, 0.0); + RealVector particle_uy(np, 0.0); + RealVector particle_uz(np, 0.0); if (Verbose()) amrex::Print() << "Adding laser particles\n"; AddNParticles(lev, - np, particle_x.data(), particle_y.data(), particle_z.data(), - particle_ux.data(), particle_uy.data(), particle_uz.data(), - 1, particle_w.data(), 1); + np, particle_x.dataPtr(), particle_y.dataPtr(), particle_z.dataPtr(), + particle_ux.dataPtr(), particle_uy.dataPtr(), particle_uz.dataPtr(), + 1, particle_w.dataPtr(), 1); } void @@ -297,8 +297,8 @@ LaserParticleContainer::Evolve (int lev, const MultiFab&, const MultiFab&, const MultiFab&, const MultiFab&, const MultiFab&, const MultiFab&, MultiFab& jx, MultiFab& jy, MultiFab& jz, - MultiFab*, MultiFab*, MultiFab*, - MultiFab* rho, MultiFab*, + MultiFab* cjx, MultiFab* cjy, MultiFab* cjz, + MultiFab* rho, MultiFab* crho, const MultiFab*, const MultiFab*, const MultiFab*, const MultiFab*, const MultiFab*, const MultiFab*, Real t, Real dt) @@ -307,11 +307,7 @@ LaserParticleContainer::Evolve (int lev, BL_PROFILE_VAR_NS("Laser::Evolve::Copy", blp_copy); BL_PROFILE_VAR_NS("PICSAR::LaserParticlePush", blp_pxr_pp); BL_PROFILE_VAR_NS("PICSAR::LaserCurrentDepo", blp_pxr_cd); - - const std::array<Real,3>& dx = WarpX::CellSize(lev); - - // WarpX assumes the same number of guard cells for Jx, Jy, Jz - long ngJ = jx.nGrow(); + BL_PROFILE_VAR_NS("Laser::Evolve::Accumulate", blp_accumulate); Real t_lab = t; if (WarpX::gamma_boost > 1) { @@ -329,8 +325,18 @@ LaserParticleContainer::Evolve (int lev, #pragma omp parallel #endif { - Vector<Real> xp, yp, zp, giv, plane_Xp, plane_Yp, amplitude_E; - FArrayBox local_rho, local_jx, local_jy, local_jz; +#ifdef _OPENMP + int thread_num = omp_get_thread_num(); +#else + int thread_num = 0; +#endif + + if (local_rho[thread_num] == nullptr) local_rho[thread_num].reset( new amrex::FArrayBox()); + if (local_jx[thread_num] == nullptr) local_jx[thread_num].reset( new amrex::FArrayBox()); + if (local_jy[thread_num] == nullptr) local_jy[thread_num].reset( new amrex::FArrayBox()); + if (local_jz[thread_num] == nullptr) local_jz[thread_num].reset( new amrex::FArrayBox()); + + Cuda::DeviceVector<Real> plane_Xp, plane_Yp, amplitude_E; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -346,13 +352,11 @@ LaserParticleContainer::Evolve (int lev, auto& uzp = attribs[PIdx::uz]; const long np = pti.numParticles(); + // For now, laser particles do not take the current buffers into account + const long np_current = np; - // Data on the grid - FArrayBox& jxfab = jx[pti]; - FArrayBox& jyfab = jy[pti]; - FArrayBox& jzfab = jz[pti]; + m_giv[thread_num].resize(np); - giv.resize(np); plane_Xp.resize(np); plane_Yp.resize(np); amplitude_E.resize(np); @@ -361,198 +365,78 @@ LaserParticleContainer::Evolve (int lev, // copy data from particle container to temp arrays // BL_PROFILE_VAR_START(blp_copy); - pti.GetPosition(xp, yp, zp); + pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); BL_PROFILE_VAR_STOP(blp_copy); - for (int i = 0; i < np; ++i) - { - // Find the coordinates of the particles in the emission plane -#if (AMREX_SPACEDIM == 3) - plane_Xp[i] = u_X[0]*(xp[i] - position[0]) - + u_X[1]*(yp[i] - position[1]) - + u_X[2]*(zp[i] - position[2]); - plane_Yp[i] = u_Y[0]*(xp[i] - position[0]) - + u_Y[1]*(yp[i] - position[1]) - + u_Y[2]*(zp[i] - position[2]); -#elif (AMREX_SPACEDIM == 2) - plane_Xp[i] = u_X[0]*(xp[i] - position[0]) - + u_X[2]*(zp[i] - position[2]); - plane_Yp[i] = 0; -#endif - } - - const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); - const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); - - long lvect = 8; - - auto depositCharge = [&] (MultiFab* rhomf, int icomp) - { - long ngRho = rhomf->nGrow(); - - Real* data_ptr; - const int *rholen; - FArrayBox& rhofab = (*rhomf)[pti]; - Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector()); - Box grown_box; - const std::array<Real, 3>& xyzmin = xyzmin_tile; - tile_box.grow(ngRho); - local_rho.resize(tile_box); - local_rho = 0.0; - data_ptr = local_rho.dataPtr(); - rholen = local_rho.length(); - -#if (AMREX_SPACEDIM == 3) - const long nx = rholen[0]-1-2*ngRho; - const long ny = rholen[1]-1-2*ngRho; - const long nz = rholen[2]-1-2*ngRho; -#else - const long nx = rholen[0]-1-2*ngRho; - const long ny = 0; - const long nz = rholen[1]-1-2*ngRho; -#endif - warpx_charge_deposition(data_ptr, &np, - xp.data(), yp.data(), zp.data(), wp.data(), - &this->charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dx[0], &dx[1], &dx[2], &nx, &ny, &nz, - &ngRho, &ngRho, &ngRho, - &WarpX::nox,&WarpX::noy,&WarpX::noz, - &lvect, &WarpX::charge_deposition_algo); - - const int ncomp = 1; - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho), - BL_TO_FORTRAN_N_3D(rhofab,icomp), ncomp); - }; - - if (rho) depositCharge(rho,0); + if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev); // // Particle Push // BL_PROFILE_VAR_START(blp_pxr_pp); + // Find the coordinates of the particles in the emission plane + calculate_laser_plane_coordinates( &np, + m_xp[thread_num].dataPtr(), + m_yp[thread_num].dataPtr(), + m_zp[thread_num].dataPtr(), + plane_Xp.dataPtr(), plane_Yp.dataPtr(), + &u_X[0], &u_X[1], &u_X[2], &u_Y[0], &u_Y[1], &u_Y[2], + &position[0], &position[1], &position[2] ); // Calculate the laser amplitude to be emitted, // at the position of the emission plane if (profile == laser_t::Gaussian) { - warpx_gaussian_laser( &np, plane_Xp.data(), plane_Yp.data(), + warpx_gaussian_laser( &np, plane_Xp.dataPtr(), plane_Yp.dataPtr(), &t_lab, &wavelength, &e_max, &profile_waist, &profile_duration, - &profile_t_peak, &profile_focal_distance, amplitude_E.data(), + &profile_t_peak, &profile_focal_distance, amplitude_E.dataPtr(), &zeta, &beta, &phi2, &theta_stc ); } if (profile == laser_t::Harris) { - warpx_harris_laser( &np, plane_Xp.data(), plane_Yp.data(), + warpx_harris_laser( &np, plane_Xp.dataPtr(), plane_Yp.dataPtr(), &t, &wavelength, &e_max, &profile_waist, &profile_duration, - &profile_focal_distance, amplitude_E.data() ); + &profile_focal_distance, amplitude_E.dataPtr() ); } if (profile == laser_t::parse_field_function) { - parse_function_laser( &np, plane_Xp.data(), plane_Yp.data(), &t, - amplitude_E.data(), parser_instance_number ); + parse_function_laser( &np, plane_Xp.dataPtr(), plane_Yp.dataPtr(), &t, + amplitude_E.dataPtr(), parser_instance_number ); } - // Calculate the corresponding momentum and position for the particles - for (int i = 0; i < np; ++i) - { - // Calculate the velocity according to the amplitude of E - Real sign_charge = std::copysign( 1.0, wp[i] ); - Real v_over_c = sign_charge * mobility * amplitude_E[i]; - AMREX_ALWAYS_ASSERT_WITH_MESSAGE( v_over_c < 1, - "The laser particles have to move unphysically in order to emit the laser."); - // The velocity is along the laser polarization p_X - Real vx = PhysConst::c * v_over_c * p_X[0]; - Real vy = PhysConst::c * v_over_c * p_X[1]; - Real vz = PhysConst::c * v_over_c * p_X[2]; - // When running in the boosted-frame, their is additional - // velocity along nvec - if (WarpX::gamma_boost > 1.) { - vx -= PhysConst::c * WarpX::beta_boost * nvec[0]; - vy -= PhysConst::c * WarpX::beta_boost * nvec[1]; - vz -= PhysConst::c * WarpX::beta_boost * nvec[2]; - } - // Get the corresponding momenta - giv[i] = std::sqrt(1 - std::pow(v_over_c,2))/WarpX::gamma_boost; - Real gamma = 1./giv[i]; - uxp[i] = gamma * vx; - uyp[i] = gamma * vy; - uzp[i] = gamma * vz; - // Push the the particle positions - xp[i] += vx * dt; -#if (AMREX_SPACEDIM == 3) - yp[i] += vy * dt; -#endif - zp[i] += vz * dt; - } - + update_laser_particle( + &np, + m_xp[thread_num].dataPtr(), + m_yp[thread_num].dataPtr(), + m_zp[thread_num].dataPtr(), + uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), + m_giv[thread_num].dataPtr(), + wp.dataPtr(), amplitude_E.dataPtr(), &p_X[0], &p_X[1], &p_X[2], + &nvec[0], &nvec[1], &nvec[2], &mobility, &dt, + &PhysConst::c, &WarpX::beta_boost, &WarpX::gamma_boost ); BL_PROFILE_VAR_STOP(blp_pxr_pp); // // Current Deposition // - BL_PROFILE_VAR_START(blp_pxr_cd); - Real *jx_ptr, *jy_ptr, *jz_ptr; - const int *jxntot, *jyntot, *jzntot; - Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag); - Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag); - Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag); - - const std::array<Real, 3>& xyzmin = xyzmin_tile; - - tbx.grow(ngJ); - tby.grow(ngJ); - tbz.grow(ngJ); - - local_jx.resize(tbx); - local_jy.resize(tby); - local_jz.resize(tbz); - - local_jx = 0.0; - local_jy = 0.0; - local_jz = 0.0; - - jx_ptr = local_jx.dataPtr(); - jy_ptr = local_jy.dataPtr(); - jz_ptr = local_jz.dataPtr(); - - jxntot = local_jx.length(); - jyntot = local_jy.length(); - jzntot = local_jz.length(); - - warpx_current_deposition( - jx_ptr, &ngJ, jxntot, - jy_ptr, &ngJ, jyntot, - jz_ptr, &ngJ, jzntot, - &np, xp.data(), yp.data(), zp.data(), - uxp.data(), uyp.data(), uzp.data(), - giv.data(), wp.data(), &this->charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dt, &dx[0], &dx[1], &dx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, - &lvect,&WarpX::current_deposition_algo); - - const int ncomp = 1; - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jx), - BL_TO_FORTRAN_3D(jxfab), ncomp); - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jy), - BL_TO_FORTRAN_3D(jyfab), ncomp); - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jz), - BL_TO_FORTRAN_3D(jzfab), ncomp); - - BL_PROFILE_VAR_STOP(blp_pxr_cd); - - if (rho) depositCharge(rho,1); + DepositCurrent(pti, wp, uxp, uyp, uzp, jx, jy, jz, + cjx, cjy, cjz, np_current, np, thread_num, lev, dt); // // copy particle data back // BL_PROFILE_VAR_START(blp_copy); - pti.SetPosition(xp, yp, zp); + pti.SetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); BL_PROFILE_VAR_STOP(blp_copy); + if (rho) DepositCharge(pti, wp, rho, crho, 1, np_current, np, thread_num, lev); + if (cost) { const Box& tbx = pti.tilebox(); wt = (amrex::second() - wt) / tbx.d_numPts(); - (*cost)[pti].plus(wt, tbx); + FArrayBox* costfab = cost->fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box, + { + costfab->plus(wt, work_box); + }); } } } diff --git a/Source/Make.WarpX b/Source/Make.WarpX index 9dfe667e0..e5da17e93 100644 --- a/Source/Make.WarpX +++ b/Source/Make.WarpX @@ -5,6 +5,13 @@ OPENBC_HOME ?= ../../../openbc_poisson USE_MPI = TRUE USE_PARTICLES = TRUE +ifeq ($(USE_GPU),TRUE) + USE_OMP = FALSE + USE_CUDA = TRUE + USE_ACC = TRUE + NVCC_HOST_COMP = gcc +endif + include $(AMREX_HOME)/Tools/GNUMake/Make.defs ifndef USE_PYTHON_MAIN @@ -55,8 +62,13 @@ ifeq ($(USE_PSATD),TRUE) endif USERSuffix += .PSATD DEFINES += -DWARPX_USE_PSATD - DEFINES += -DFFTW # PICSAR uses it - LIBRARIES += -lfftw3_mpi -lfftw3 -lfftw3_omp + ifeq ($(USE_CUDA),TRUE) + DEFINES += -DFFTW -DCUDA_FFT=1 # PICSAR uses it + LIBRARIES += -lfftw3_mpi -lfftw3 -lfftw3_threads -lcufft + else + DEFINES += -DFFTW # PICSAR uses it + LIBRARIES += -lfftw3_mpi -lfftw3 -lfftw3_threads + endif endif ifeq ($(STORE_OLD_PARTICLE_ATTRIBS),TRUE) diff --git a/Source/Make.package b/Source/Make.package index f2adfc07c..f55e12297 100644 --- a/Source/Make.package +++ b/Source/Make.package @@ -1,8 +1,11 @@ ifneq ($(USE_PYTHON_MAIN),TRUE) CEXE_sources += main.cpp +else + CEXE_sources += WarpXWrappers.cpp + CEXE_headers += WarpXWrappers.h endif -CEXE_sources += WarpXWrappers.cpp WarpX_py.cpp +CEXE_sources += WarpX_py.cpp CEXE_sources += WarpX.cpp WarpXInitData.cpp WarpXEvolve.cpp WarpXIO.cpp WarpXProb.cpp WarpXRegrid.cpp CEXE_sources += WarpXTagging.cpp WarpXComm.cpp WarpXMove.cpp WarpXBoostedFrameDiagnostic.cpp @@ -10,7 +13,7 @@ CEXE_sources += WarpXTagging.cpp WarpXComm.cpp WarpXMove.cpp WarpXBoostedFrameDi CEXE_sources += ParticleIO.cpp CEXE_sources += ParticleContainer.cpp WarpXParticleContainer.cpp PhysicalParticleContainer.cpp LaserParticleContainer.cpp RigidInjectedParticleContainer.cpp -CEXE_headers += WarpXWrappers.h WarpX_py.H +CEXE_headers += WarpX_py.H CEXE_headers += WarpX.H WarpX_f.H WarpXConst.H WarpXBoostedFrameDiagnostic.H CEXE_sources += WarpXConst.cpp diff --git a/Source/ParticleContainer.H b/Source/ParticleContainer.H index 026ba90a5..9f732f355 100644 --- a/Source/ParticleContainer.H +++ b/Source/ParticleContainer.H @@ -139,6 +139,8 @@ public: void WriteHeader (std::ostream& os) const; + void SortParticlesByCell (); + void Redistribute (); void RedistributeLocal (const int num_ghost); @@ -160,10 +162,6 @@ public: return r; } - int L_lower_order_in_v() {return l_lower_order_in_v;} - - bool Use_fdtd_nci_corr() {return use_fdtd_nci_corr;} - void GetLabFrameData(const std::string& snapshot_name, const int i_lab, const int direction, const amrex::Real z_old, const amrex::Real z_new, @@ -202,9 +200,5 @@ private: // runtime parameters int nspecies = 1; // physical particles only. If WarpX::use_laser, nspecies+1 == allcontainers.size(). - - int l_lower_order_in_v = true; - - bool use_fdtd_nci_corr = false; }; #endif /*WARPX_ParticleContainer_H_*/ diff --git a/Source/ParticleContainer.cpp b/Source/ParticleContainer.cpp index 75c13d793..8aac692d6 100644 --- a/Source/ParticleContainer.cpp +++ b/Source/ParticleContainer.cpp @@ -69,8 +69,8 @@ MultiParticleContainer::ReadParameters () } } } - pp.query("use_fdtd_nci_corr", use_fdtd_nci_corr); - pp.query("l_lower_order_in_v", l_lower_order_in_v); + pp.query("use_fdtd_nci_corr", WarpX::use_fdtd_nci_corr); + pp.query("l_lower_order_in_v", WarpX::l_lower_order_in_v); initialized = true; } } @@ -261,6 +261,14 @@ MultiParticleContainer::GetChargeDensity (int lev, bool local) } void +MultiParticleContainer::SortParticlesByCell () +{ + for (auto& pc : allcontainers) { + pc->SortParticlesByCell(); + } +} + +void MultiParticleContainer::Redistribute () { for (auto& pc : allcontainers) { diff --git a/Source/PhysicalParticleContainer.H b/Source/PhysicalParticleContainer.H index 52e59713b..bd6015437 100644 --- a/Source/PhysicalParticleContainer.H +++ b/Source/PhysicalParticleContainer.H @@ -61,10 +61,10 @@ public: amrex::Real dt) override; virtual void PushPX(WarpXParIter& pti, - amrex::Vector<amrex::Real>& xp, - amrex::Vector<amrex::Real>& yp, - amrex::Vector<amrex::Real>& zp, - amrex::Vector<amrex::Real>& giv, + amrex::Cuda::DeviceVector<amrex::Real>& xp, + amrex::Cuda::DeviceVector<amrex::Real>& yp, + amrex::Cuda::DeviceVector<amrex::Real>& zp, + amrex::Cuda::DeviceVector<amrex::Real>& giv, amrex::Real dt); virtual void PushP (int lev, amrex::Real dt, @@ -80,6 +80,10 @@ public: // Inject particles in Box 'part_box' virtual void AddParticles (int lev); void AddPlasma(int lev, amrex::RealBox part_realbox = amrex::RealBox()); + void AddPlasmaCPU (int lev, amrex::RealBox part_realbox); +#ifdef AMREX_USE_GPU + void AddPlasmaGPU (int lev, amrex::RealBox part_realbox); +#endif void MapParticletoBoostedFrame(amrex::Real& x, amrex::Real& y, amrex::Real& z, std::array<amrex::Real, 3>& u); @@ -105,9 +109,14 @@ protected: bool boost_adjust_transverse_positions = false; bool do_backward_propagation = false; + long NumParticlesToAdd (const amrex::Box& overlap_box, + const amrex::RealBox& overlap_realbox, + const amrex::RealBox& tile_real_box, + const amrex::RealBox& particle_real_box); + int GetRefineFac(const amrex::Real x, const amrex::Real y, const amrex::Real z); std::unique_ptr<amrex::IArrayBox> m_refined_injection_mask = nullptr; - + }; #endif diff --git a/Source/PhysicalParticleContainer.cpp b/Source/PhysicalParticleContainer.cpp index df0ee3b3c..c4fdd4464 100644 --- a/Source/PhysicalParticleContainer.cpp +++ b/Source/PhysicalParticleContainer.cpp @@ -10,6 +10,62 @@ using namespace amrex; +long PhysicalParticleContainer:: +NumParticlesToAdd(const Box& overlap_box, const RealBox& overlap_realbox, + const RealBox& tile_realbox, const RealBox& particle_real_box) +{ + const int lev = 0; + const Geometry& geom = Geom(lev); + int num_ppc = plasma_injector->num_particles_per_cell; + const Real* dx = geom.CellSize(); + + long np = 0; + const auto& overlap_corner = overlap_realbox.lo(); + for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv)) + { + int fac; + if (injected) { +#if ( AMREX_SPACEDIM == 3 ) + Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; + Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; + Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2]; +#elif ( AMREX_SPACEDIM == 2 ) + Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; + Real y = 0; + Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; +#endif + fac = GetRefineFac(x, y, z); + } else { + fac = 1.0; + } + + int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac); + for (int i_part=0; i_part<ref_num_ppc;i_part++) { + std::array<Real, 3> r; + plasma_injector->getPositionUnitBox(r, i_part, fac); +#if ( AMREX_SPACEDIM == 3 ) + Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; + Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1]; + Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2]; +#elif ( AMREX_SPACEDIM == 2 ) + Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; + Real y = 0; + Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1]; +#endif + // If the new particle is not inside the tile box, + // go to the next generated particle. +#if ( AMREX_SPACEDIM == 3 ) + if(!tile_realbox.contains( RealVect{x, y, z} )) continue; +#elif ( AMREX_SPACEDIM == 2 ) + if(!tile_realbox.contains( RealVect{x, z} )) continue; +#endif + ++np; + } + } + + return np; +} + PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int ispecies, const std::string& name) : WarpXParticleContainer(amr_core, ispecies), @@ -22,7 +78,7 @@ PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int isp ParmParse pp(species_name); pp.query("boost_adjust_transverse_positions", boost_adjust_transverse_positions); - pp.query("do_backward_propagation", do_backward_propagation); + pp.query("do_backward_propagation", do_backward_propagation); } void PhysicalParticleContainer::InitData() @@ -174,9 +230,19 @@ PhysicalParticleContainer::AddParticles (int lev) * but its boundaries need not be aligned with the actual cells of the simulation) */ void -PhysicalParticleContainer::AddPlasma(int lev, RealBox part_realbox) +PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) { - BL_PROFILE("PhysicalParticleContainer::AddPlasma"); +#ifdef AMREX_USE_GPU + AddPlasmaGPU(lev, part_realbox); +#else + AddPlasmaCPU(lev, part_realbox); +#endif +} + +void +PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox) +{ + BL_PROFILE("PhysicalParticleContainer::AddPlasmaCPU"); // If no part_realbox is provided, initialize particles in the whole domain const Geometry& geom = Geom(lev); @@ -365,18 +431,264 @@ PhysicalParticleContainer::AddPlasma(int lev, RealBox part_realbox) attribs[PIdx::uzold] = u[2]; #endif - AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs); + AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs); } } if (cost) { - wt = (amrex::second() - wt) / tile_box.d_numPts(); - (*cost)[mfi].plus(wt, tile_box); + wt = (amrex::second() - wt) / tile_box.d_numPts(); + FArrayBox* costfab = cost->fabPtr(mfi); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tile_box, work_box, + { + costfab->plus(wt, work_box); + }); } } } } +#ifdef AMREX_USE_GPU +void +PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox) +{ + BL_PROFILE("PhysicalParticleContainer::AddPlasmaGPU"); + + // If no part_realbox is provided, initialize particles in the whole domain + const Geometry& geom = Geom(lev); + if (!part_realbox.ok()) part_realbox = geom.ProbDomain(); + + int num_ppc = plasma_injector->num_particles_per_cell; + + const Real* dx = geom.CellSize(); + + Real scale_fac; +#if AMREX_SPACEDIM==3 + scale_fac = dx[0]*dx[1]*dx[2]/num_ppc; +#elif AMREX_SPACEDIM==2 + scale_fac = dx[0]*dx[1]/num_ppc; +#endif + +#ifdef _OPENMP + // First touch all tiles in the map in serial + for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) { + const int grid_id = mfi.index(); + const int tile_id = mfi.LocalTileIndex(); + GetParticles(lev)[std::make_pair(grid_id, tile_id)]; + } +#endif + + MultiFab* cost = WarpX::getCosts(lev); + + if ( (not m_refined_injection_mask) and WarpX::do_moving_window) + { + Box mask_box = geom.Domain(); + mask_box.setSmall(WarpX::moving_window_dir, 0); + mask_box.setBig(WarpX::moving_window_dir, 0); + m_refined_injection_mask.reset( new IArrayBox(mask_box)); + m_refined_injection_mask->setVal(-1); + } + + MFItInfo info; + if (do_tiling) { + info.EnableTiling(tile_size); + } + info.SetDynamic(true); + +#ifdef _OPENMP +#pragma omp parallel if (not WarpX::serialize_ics) +#endif + { + std::array<Real,PIdx::nattribs> attribs; + attribs.fill(0.0); + + // Loop through the tiles + for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) { + + Real wt = amrex::second(); + + const Box& tile_box = mfi.tilebox(); + const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev); + + // Find the cells of part_box that overlap with tile_realbox + // If there is no overlap, just go to the next tile in the loop + RealBox overlap_realbox; + Box overlap_box; + Real ncells_adjust; + bool no_overlap = 0; + + for (int dir=0; dir<AMREX_SPACEDIM; dir++) { + if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) { + ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] ); + overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]); + } else { + no_overlap = 1; break; + } + if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) { + ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] ); + overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]); + } else { + no_overlap = 1; break; + } + // Count the number of cells in this direction in overlap_realbox + overlap_box.setSmall( dir, 0 ); + overlap_box.setBig( dir, + int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1); + } + if (no_overlap == 1) { + continue; // Go to the next tile + } + + const int grid_id = mfi.index(); + const int tile_id = mfi.LocalTileIndex(); + + Cuda::HostVector<ParticleType> host_particles; + std::array<Cuda::HostVector<Real>, PIdx::nattribs> host_attribs; + + // Loop through the cells of overlap_box and inject + // the corresponding particles + const auto& overlap_corner = overlap_realbox.lo(); + for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv)) + { + int fac; + if (injected) { +#if ( AMREX_SPACEDIM == 3 ) + Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; + Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; + Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2]; +#elif ( AMREX_SPACEDIM == 2 ) + Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; + Real y = 0; + Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; +#endif + fac = GetRefineFac(x, y, z); + } else { + fac = 1.0; + } + + int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac); + for (int i_part=0; i_part<ref_num_ppc;i_part++) { + std::array<Real, 3> r; + plasma_injector->getPositionUnitBox(r, i_part, fac); +#if ( AMREX_SPACEDIM == 3 ) + Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; + Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1]; + Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2]; +#elif ( AMREX_SPACEDIM == 2 ) + Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; + Real y = 0; + Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1]; +#endif + // If the new particle is not inside the tile box, + // go to the next generated particle. +#if ( AMREX_SPACEDIM == 3 ) + if(!tile_realbox.contains( RealVect{x, y, z} )) continue; +#elif ( AMREX_SPACEDIM == 2 ) + if(!tile_realbox.contains( RealVect{x, z} )) continue; +#endif + + Real dens; + std::array<Real, 3> u; + if (WarpX::gamma_boost == 1.){ + // Lab-frame simulation + // If the particle is not within the species's + // xmin, xmax, ymin, ymax, zmin, zmax, go to + // the next generated particle. + if (!plasma_injector->insideBounds(x, y, z)) continue; + plasma_injector->getMomentum(u, x, y, z); + dens = plasma_injector->getDensity(x, y, z); + } else { + // Boosted-frame simulation + Real c = PhysConst::c; + Real gamma_boost = WarpX::gamma_boost; + Real beta_boost = WarpX::beta_boost; + // Since the user provides the density distribution + // at t_lab=0 and in the lab-frame coordinates, + // we need to find the lab-frame position of this + // particle at t_lab=0, from its boosted-frame coordinates + // Assuming ballistic motion, this is given by: + // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) ) + // where betaz_lab is the speed of the particle in the lab frame + // + // In order for this equation to be solvable, betaz_lab + // is explicitly assumed to have no dependency on z0_lab + plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency + // At this point u is the lab-frame momentum + // => Apply the above formula for z0_lab + Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) ); + Real betaz_lab = u[2]/gamma_lab/c; + Real t = WarpX::GetInstance().gett_new(lev); + Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) ); + // If the particle is not within the lab-frame zmin, zmax, etc. + // go to the next generated particle. + if (!plasma_injector->insideBounds(x, y, z0_lab)) continue; + // call `getDensity` with lab-frame parameters + dens = plasma_injector->getDensity(x, y, z0_lab); + // At this point u and dens are the lab-frame quantities + // => Perform Lorentz transform + dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab ); + u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab ); + } + attribs[PIdx::w ] = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac)); + attribs[PIdx::ux] = u[0]; + attribs[PIdx::uy] = u[1]; + attribs[PIdx::uz] = u[2]; + +#ifdef WARPX_STORE_OLD_PARTICLE_ATTRIBS + attribs[PIdx::xold] = x; + attribs[PIdx::yold] = y; + attribs[PIdx::zold] = z; + + attribs[PIdx::uxold] = u[0]; + attribs[PIdx::uyold] = u[1]; + attribs[PIdx::uzold] = u[2]; +#endif + + ParticleType p; + p.id() = ParticleType::NextID(); + p.cpu() = ParallelDescriptor::MyProc(); +#if (AMREX_SPACEDIM == 3) + p.pos(0) = x; + p.pos(1) = y; + p.pos(2) = z; +#elif (AMREX_SPACEDIM == 2) + p.pos(0) = x; + p.pos(1) = z; +#endif + + host_particles.push_back(p); + for (int kk = 0; kk < PIdx::nattribs; ++kk) + host_attribs[kk].push_back(attribs[kk]); + } + } + + auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)]; + auto old_size = particle_tile.GetArrayOfStructs().size(); + auto new_size = old_size + host_particles.size(); + particle_tile.resize(new_size); + + thrust::copy(host_particles.begin(), + host_particles.end(), + particle_tile.GetArrayOfStructs().begin() + old_size); + + for (int kk = 0; kk < PIdx::nattribs; ++kk) { + thrust::copy(host_attribs[kk].begin(), + host_attribs[kk].end(), + particle_tile.GetStructOfArrays().GetRealData(kk).begin() + old_size); + } + + if (cost) { + wt = (amrex::second() - wt) / tile_box.d_numPts(); + FArrayBox* costfab = cost->fabPtr(mfi); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tile_box, work_box, + { + costfab->plus(wt, work_box); + }); + } + } + } +} +#endif + #ifdef WARPX_DO_ELECTROSTATIC void PhysicalParticleContainer:: @@ -425,10 +737,10 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, const FArrayBox& ezfab = (*E[lev][2])[pti]; #endif - WRPX_INTERPOLATE_CIC(particles.data(), nstride, np, - Exp.data(), Eyp.data(), + WRPX_INTERPOLATE_CIC(particles.dataPtr(), nstride, np, + Exp.dataPtr(), Eyp.dataPtr(), #if AMREX_SPACEDIM == 3 - Ezp.data(), + Ezp.dataPtr(), #endif exfab.dataPtr(), eyfab.dataPtr(), #if AMREX_SPACEDIM == 3 @@ -495,10 +807,10 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, #endif if (lev == 0) { - WRPX_INTERPOLATE_CIC(particles.data(), nstride, np, - Exp.data(), Eyp.data(), + WRPX_INTERPOLATE_CIC(particles.dataPtr(), nstride, np, + Exp.dataPtr(), Eyp.dataPtr(), #if AMREX_SPACEDIM == 3 - Ezp.data(), + Ezp.dataPtr(), #endif exfab.dataPtr(), eyfab.dataPtr(), #if AMREX_SPACEDIM == 3 @@ -515,10 +827,10 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, const Box& coarse_box = coarsened_fine_BA[pti]; const Real* coarse_dx = Geom(0).CellSize(); - WRPX_INTERPOLATE_CIC_TWO_LEVELS(particles.data(), nstride, np, - Exp.data(), Eyp.data(), + WRPX_INTERPOLATE_CIC_TWO_LEVELS(particles.dataPtr(), nstride, np, + Exp.dataPtr(), Eyp.dataPtr(), #if AMREX_SPACEDIM == 3 - Ezp.data(), + Ezp.dataPtr(), #endif exfab.dataPtr(), eyfab.dataPtr(), #if AMREX_SPACEDIM == 3 @@ -573,14 +885,14 @@ PhysicalParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<Mul // // Particle Push // - WRPX_PUSH_LEAPFROG(particles.data(), nstride, np, - uxp.data(), uyp.data(), + WRPX_PUSH_LEAPFROG(particles.dataPtr(), nstride, np, + uxp.dataPtr(), uyp.dataPtr(), #if AMREX_SPACEDIM == 3 - uzp.data(), + uzp.dataPtr(), #endif - Exp.data(), Eyp.data(), + Exp.dataPtr(), Eyp.dataPtr(), #if AMREX_SPACEDIM == 3 - Ezp.data(), + Ezp.dataPtr(), #endif &this->charge, &this->mass, &dt, prob_domain.lo(), prob_domain.hi()); @@ -607,7 +919,7 @@ PhysicalParticleContainer::FieldGather (int lev, #pragma omp parallel #endif { - Vector<Real> xp, yp, zp; + Cuda::DeviceVector<Real> xp, yp, zp; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -653,12 +965,14 @@ PhysicalParticleContainer::FieldGather (int lev, // Field Gather // const int ll4symtry = false; - const int l_lower_order_in_v = warpx_l_lower_order_in_v(); long lvect_fieldgathe = 64; warpx_geteb_energy_conserving( - &np, xp.data(), yp.data(), zp.data(), - Exp.data(),Eyp.data(),Ezp.data(), - Bxp.data(),Byp.data(),Bzp.data(), + &np, + xp.dataPtr(), + yp.dataPtr(), + zp.dataPtr(), + Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(), + Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(), ixyzmin, &xyzmin[0], &xyzmin[1], &xyzmin[2], &dx[0], &dx[1], &dx[2], @@ -669,13 +983,17 @@ PhysicalParticleContainer::FieldGather (int lev, BL_TO_FORTRAN_ANYD(bxfab), BL_TO_FORTRAN_ANYD(byfab), BL_TO_FORTRAN_ANYD(bzfab), - &ll4symtry, &l_lower_order_in_v, + &ll4symtry, &WarpX::l_lower_order_in_v, &lvect_fieldgathe, &WarpX::field_gathering_algo); if (cost) { const Box& tbx = pti.tilebox(); wt = (amrex::second() - wt) / tbx.d_numPts(); - (*cost)[pti].plus(wt, tbx); + FArrayBox* costfab = cost->fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box, + { + costfab->plus(wt, work_box); + }); } } } @@ -696,20 +1014,15 @@ PhysicalParticleContainer::Evolve (int lev, BL_PROFILE_VAR_NS("PPC::Evolve::Copy", blp_copy); BL_PROFILE_VAR_NS("PICSAR::FieldGather", blp_pxr_fg); BL_PROFILE_VAR_NS("PICSAR::ParticlePush", blp_pxr_pp); - BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); - BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); BL_PROFILE_VAR_NS("PPC::Evolve::partition", blp_partition); - + const std::array<Real,3>& dx = WarpX::CellSize(lev); const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0)); const auto& mypc = WarpX::GetInstance().GetPartContainer(); const int nstencilz_fdtd_nci_corr = mypc.nstencilz_fdtd_nci_corr; - // WarpX assumes the same number of guard cells for Jx, Jy, Jz - long ngJ = jx.nGrow(); - - BL_ASSERT(OnSameGrids(lev,Ex)); + BL_ASSERT(OnSameGrids(lev,jx)); MultiFab* cost = WarpX::getCosts(lev); @@ -719,17 +1032,26 @@ PhysicalParticleContainer::Evolve (int lev, bool has_buffer = cEx || cjx; #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel #endif { - Vector<Real> xp, yp, zp, giv; - FArrayBox local_rho, local_jx, local_jy, local_jz; +#ifdef _OPENMP + int thread_num = omp_get_thread_num(); +#else + int thread_num = 0; +#endif + + if (local_rho[thread_num] == nullptr) local_rho[thread_num].reset( new amrex::FArrayBox()); + if (local_jx[thread_num] == nullptr) local_jx[thread_num].reset( new amrex::FArrayBox()); + if (local_jy[thread_num] == nullptr) local_jy[thread_num].reset( new amrex::FArrayBox()); + if (local_jz[thread_num] == nullptr) local_jz[thread_num].reset( new amrex::FArrayBox()); + FArrayBox filtered_Ex, filtered_Ey, filtered_Ez; FArrayBox filtered_Bx, filtered_By, filtered_Bz; std::vector<bool> inexflag; Vector<long> pid; - Vector<Real> tmp; - Vector<ParticleType> particle_tmp; + RealVector tmp; + ParticleVector particle_tmp; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -760,7 +1082,7 @@ PhysicalParticleContainer::Evolve (int lev, FArrayBox const* byfab = &(By[pti]); FArrayBox const* bzfab = &(Bz[pti]); - if (warpx_use_fdtd_nci_corr()) + if (WarpX::use_fdtd_nci_corr) { #if (AMREX_SPACEDIM == 2) const Box& tbox = amrex::grow(pti.tilebox(),{static_cast<int>(WarpX::nox), @@ -823,10 +1145,6 @@ PhysicalParticleContainer::Evolve (int lev, #endif } - FArrayBox& jxfab = jx[pti]; - FArrayBox& jyfab = jy[pti]; - FArrayBox& jzfab = jz[pti]; - Exp.assign(np,0.0); Eyp.assign(np,0.0); Ezp.assign(np,0.0); @@ -834,7 +1152,7 @@ PhysicalParticleContainer::Evolve (int lev, Byp.assign(np,WarpX::B_external[1]); Bzp.assign(np,WarpX::B_external[2]); - giv.resize(np); + m_giv[thread_num].resize(np); long nfine_current = np; long nfine_gather = np; @@ -933,121 +1251,33 @@ PhysicalParticleContainer::Evolve (int lev, // copy data from particle container to temp arrays // BL_PROFILE_VAR_START(blp_copy); - pti.GetPosition(xp, yp, zp); + pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); BL_PROFILE_VAR_STOP(blp_copy); - const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); - const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); - const int* ixyzmin_grid = box.loVect(); - - long lvect = 8; - - auto depositCharge = [&] (MultiFab* rhomf, MultiFab* crhomf, int icomp) - { - long ngRho = rhomf->nGrow(); - Real* data_ptr; - Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector()); - const int *rholen; - - if (np_current > 0) - { - FArrayBox& rhofab = (*rhomf)[pti]; - const std::array<Real, 3>& xyzmin = xyzmin_tile; - tile_box.grow(ngRho); - local_rho.resize(tile_box); - local_rho = 0.0; - data_ptr = local_rho.dataPtr(); - rholen = local_rho.length(); - -#if (AMREX_SPACEDIM == 3) - const long nx = rholen[0]-1-2*ngRho; - const long ny = rholen[1]-1-2*ngRho; - const long nz = rholen[2]-1-2*ngRho; -#else - const long nx = rholen[0]-1-2*ngRho; - const long ny = 0; - const long nz = rholen[1]-1-2*ngRho; -#endif - warpx_charge_deposition(data_ptr, &np_current, - xp.data(), yp.data(), zp.data(), wp.data(), - &this->charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dx[0], &dx[1], &dx[2], &nx, &ny, &nz, - &ngRho, &ngRho, &ngRho, - &WarpX::nox,&WarpX::noy,&WarpX::noz, - &lvect, &WarpX::charge_deposition_algo); - - const int ncomp = 1; - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho), - BL_TO_FORTRAN_N_3D(rhofab,icomp), ncomp); - } - - if (np_current < np) - { - const IntVect& ref_ratio = WarpX::RefRatio(lev-1); - const Box& ctilebox = amrex::coarsen(pti.tilebox(), ref_ratio); - const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); - - tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector()); - tile_box.grow(ngRho); - - local_rho.resize(tile_box); - - local_rho = 0.0; - - data_ptr = local_rho.dataPtr(); - rholen = local_rho.length(); - -#if (AMREX_SPACEDIM == 3) - const long nx = rholen[0]-1-2*ngRho; - const long ny = rholen[1]-1-2*ngRho; - const long nz = rholen[2]-1-2*ngRho; -#else - const long nx = rholen[0]-1-2*ngRho; - const long ny = 0; - const long nz = rholen[1]-1-2*ngRho; -#endif - - long ncrse = np - nfine_current; - warpx_charge_deposition(data_ptr, &ncrse, - xp.data() + nfine_current, - yp.data() + nfine_current, - zp.data() + nfine_current, - wp.data() + nfine_current, - &this->charge, - &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], - &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz, - &ngRho, &ngRho, &ngRho, - &WarpX::nox,&WarpX::noy,&WarpX::noz, - &lvect, &WarpX::charge_deposition_algo); - - FArrayBox& crhofab = (*crhomf)[pti]; - - const int ncomp = 1; - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho), - BL_TO_FORTRAN_N_3D(crhofab,icomp), ncomp); - } - }; - - if (rho) depositCharge(rho, crho, 0); - + if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev); + if (! do_not_push) { // // Field Gather of Aux Data (i.e., the full solution) // const int ll4symtry = false; - const int l_lower_order_in_v = warpx_l_lower_order_in_v(); long lvect_fieldgathe = 64; + const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); + const int* ixyzmin_grid = box.loVect(); + const long np_gather = (cEx) ? nfine_gather : np; BL_PROFILE_VAR_START(blp_pxr_fg); warpx_geteb_energy_conserving( - &np_gather, xp.data(), yp.data(), zp.data(), - Exp.data(),Eyp.data(),Ezp.data(), - Bxp.data(),Byp.data(),Bzp.data(), + &np_gather, + m_xp[thread_num].dataPtr(), + m_yp[thread_num].dataPtr(), + m_zp[thread_num].dataPtr(), + Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(), + Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(), ixyzmin_grid, &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2], &dx[0], &dx[1], &dx[2], @@ -1058,7 +1288,7 @@ PhysicalParticleContainer::Evolve (int lev, BL_TO_FORTRAN_ANYD(*bxfab), BL_TO_FORTRAN_ANYD(*byfab), BL_TO_FORTRAN_ANYD(*bzfab), - &ll4symtry, &l_lower_order_in_v, + &ll4symtry, &WarpX::l_lower_order_in_v, &lvect_fieldgathe, &WarpX::field_gathering_algo); if (np_gather < np) @@ -1075,7 +1305,7 @@ PhysicalParticleContainer::Evolve (int lev, const FArrayBox* cbyfab = &(*cBy)[pti]; const FArrayBox* cbzfab = &(*cBz)[pti]; - if (warpx_use_fdtd_nci_corr()) + if (WarpX::use_fdtd_nci_corr) { #if (AMREX_SPACEDIM == 2) const Box& tbox = amrex::grow(cbox,{static_cast<int>(WarpX::nox), @@ -1139,9 +1369,12 @@ PhysicalParticleContainer::Evolve (int lev, long ncrse = np - nfine_gather; warpx_geteb_energy_conserving( - &ncrse, xp.data()+nfine_gather, yp.data()+nfine_gather, zp.data()+nfine_gather, - Exp.data()+nfine_gather, Eyp.data()+nfine_gather, Ezp.data()+nfine_gather, - Bxp.data()+nfine_gather, Byp.data()+nfine_gather, Bzp.data()+nfine_gather, + &ncrse, + m_xp[thread_num].dataPtr()+nfine_gather, + m_yp[thread_num].dataPtr()+nfine_gather, + m_zp[thread_num].dataPtr()+nfine_gather, + Exp.dataPtr()+nfine_gather, Eyp.dataPtr()+nfine_gather, Ezp.dataPtr()+nfine_gather, + Bxp.dataPtr()+nfine_gather, Byp.dataPtr()+nfine_gather, Bzp.dataPtr()+nfine_gather, cixyzmin_grid, &cxyzmin_grid[0], &cxyzmin_grid[1], &cxyzmin_grid[2], &cdx[0], &cdx[1], &cdx[2], @@ -1152,7 +1385,7 @@ PhysicalParticleContainer::Evolve (int lev, BL_TO_FORTRAN_ANYD(*cbxfab), BL_TO_FORTRAN_ANYD(*cbyfab), BL_TO_FORTRAN_ANYD(*cbzfab), - &ll4symtry, &l_lower_order_in_v, + &ll4symtry, &WarpX::l_lower_order_in_v, &lvect_fieldgathe, &WarpX::field_gathering_algo); } @@ -1162,141 +1395,34 @@ PhysicalParticleContainer::Evolve (int lev, // Particle Push // BL_PROFILE_VAR_START(blp_pxr_pp); - PushPX(pti, xp, yp, zp, giv, dt); + PushPX(pti, m_xp[thread_num], m_yp[thread_num], m_zp[thread_num], + m_giv[thread_num], dt); BL_PROFILE_VAR_STOP(blp_pxr_pp); // - // Current Deposition onto fine patch + // Current Deposition // - - BL_PROFILE_VAR_START(blp_pxr_cd); - Real *jx_ptr, *jy_ptr, *jz_ptr; - const int *jxntot, *jyntot, *jzntot; - Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag); - Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag); - Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag); - Box gtbx, gtby, gtbz; - - const std::array<Real, 3>& xyzmin = xyzmin_tile; - - if (np_current > 0) - { - tbx.grow(ngJ); - tby.grow(ngJ); - tbz.grow(ngJ); - - local_jx.resize(tbx); - local_jy.resize(tby); - local_jz.resize(tbz); - - local_jx = 0.0; - local_jy = 0.0; - local_jz = 0.0; - - jx_ptr = local_jx.dataPtr(); - jy_ptr = local_jy.dataPtr(); - jz_ptr = local_jz.dataPtr(); - - jxntot = local_jx.length(); - jyntot = local_jy.length(); - jzntot = local_jz.length(); - - warpx_current_deposition( - jx_ptr, &ngJ, jxntot, - jy_ptr, &ngJ, jyntot, - jz_ptr, &ngJ, jzntot, - &np_current, xp.data(), yp.data(), zp.data(), - uxp.data(), uyp.data(), uzp.data(), - giv.data(), wp.data(), &this->charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dt, &dx[0], &dx[1], &dx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, - &lvect,&WarpX::current_deposition_algo); - - BL_PROFILE_VAR_STOP(blp_pxr_cd); - - BL_PROFILE_VAR_START(blp_accumulate); - const int ncomp = 1; - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jx), - BL_TO_FORTRAN_3D(jxfab), ncomp); - - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jy), - BL_TO_FORTRAN_3D(jyfab), ncomp); - - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jz), - BL_TO_FORTRAN_3D(jzfab), ncomp); - BL_PROFILE_VAR_STOP(blp_accumulate); - } - - if (np_current < np) - { - const IntVect& ref_ratio = WarpX::RefRatio(lev-1); - const Box& ctilebox = amrex::coarsen(pti.tilebox(),ref_ratio); - const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); - - tbx = amrex::convert(ctilebox, WarpX::jx_nodal_flag); - tby = amrex::convert(ctilebox, WarpX::jy_nodal_flag); - tbz = amrex::convert(ctilebox, WarpX::jz_nodal_flag); - tbx.grow(ngJ); - tby.grow(ngJ); - tbz.grow(ngJ); - - local_jx.resize(tbx); - local_jy.resize(tby); - local_jz.resize(tbz); - - local_jx = 0.0; - local_jy = 0.0; - local_jz = 0.0; - - jx_ptr = local_jx.dataPtr(); - jy_ptr = local_jy.dataPtr(); - jz_ptr = local_jz.dataPtr(); - - jxntot = local_jx.length(); - jyntot = local_jy.length(); - jzntot = local_jz.length(); - - long ncrse = np - nfine_current; - warpx_current_deposition( - jx_ptr, &ngJ, jxntot, - jy_ptr, &ngJ, jyntot, - jz_ptr, &ngJ, jzntot, - &ncrse, xp.data()+nfine_current, yp.data()+nfine_current, zp.data()+nfine_current, - uxp.data()+nfine_current, uyp.data()+nfine_current, uzp.data()+nfine_current, - giv.data()+nfine_current, wp.data()+nfine_current, &this->charge, - &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], - &dt, &cdx[0], &cdx[1], &cdx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, - &lvect,&WarpX::current_deposition_algo); - - FArrayBox& cjxfab = (*cjx)[pti]; - FArrayBox& cjyfab = (*cjy)[pti]; - FArrayBox& cjzfab = (*cjz)[pti]; - - const int ncomp = 1; - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jx), - BL_TO_FORTRAN_3D(cjxfab), ncomp); - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jy), - BL_TO_FORTRAN_3D(cjyfab), ncomp); - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jz), - BL_TO_FORTRAN_3D(cjzfab), ncomp); - } - + DepositCurrent(pti, wp, uxp, uyp, uzp, jx, jy, jz, + cjx, cjy, cjz, np_current, np, thread_num, lev, dt); + // // copy particle data back // BL_PROFILE_VAR_START(blp_copy); - pti.SetPosition(xp, yp, zp); + pti.SetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); BL_PROFILE_VAR_STOP(blp_copy); } - - if (rho) depositCharge(rho, crho, 1); + + if (rho) DepositCharge(pti, wp, rho, crho, 1, np_current, np, thread_num, lev); if (cost) { const Box& tbx = pti.tilebox(); wt = (amrex::second() - wt) / tbx.d_numPts(); - (*cost)[pti].plus(wt, tbx); + FArrayBox* costfab = cost->fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box, + { + costfab->plus(wt, work_box); + }); } } } @@ -1304,8 +1430,10 @@ PhysicalParticleContainer::Evolve (int lev, void PhysicalParticleContainer::PushPX(WarpXParIter& pti, - Vector<Real>& xp, Vector<Real>& yp, Vector<Real>& zp, - Vector<Real>& giv, + Cuda::DeviceVector<Real>& xp, + Cuda::DeviceVector<Real>& yp, + Cuda::DeviceVector<Real>& zp, + Cuda::DeviceVector<Real>& giv, Real dt) { @@ -1330,15 +1458,19 @@ PhysicalParticleContainer::PushPX(WarpXParIter& pti, auto& uypold = attribs[PIdx::uyold]; auto& uzpold = attribs[PIdx::uzold]; - warpx_copy_attribs(&np, xp.data(), yp.data(), zp.data(), - uxp.data(), uyp.data(), uzp.data(), - xpold.data(), ypold.data(), zpold.data(), - uxpold.data(), uypold.data(), uzpold.data()); + warpx_copy_attribs(&np, xp.dataPtr(), yp.dataPtr(), zp.dataPtr(), + uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), + xpold.dataPtr(), ypold.dataPtr(), zpold.dataPtr(), + uxpold.dataPtr(), uypold.dataPtr(), uzpold.dataPtr()); #endif - warpx_particle_pusher(&np, xp.data(), yp.data(), zp.data(), - uxp.data(), uyp.data(), uzp.data(), giv.data(), + warpx_particle_pusher(&np, + xp.dataPtr(), + yp.dataPtr(), + zp.dataPtr(), + uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), + giv.dataPtr(), Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(), Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(), &this->charge, &this->mass, &dt, @@ -1351,6 +1483,8 @@ PhysicalParticleContainer::PushP (int lev, Real dt, const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez, const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz) { + BL_PROFILE("PhysicalParticleContainer::PushP"); + if (do_not_push) return; const std::array<Real,3>& dx = WarpX::CellSize(lev); @@ -1359,8 +1493,11 @@ PhysicalParticleContainer::PushP (int lev, Real dt, #pragma omp parallel #endif { - Vector<Real> xp, yp, zp, giv; - +#ifdef _OPENMP + int thread_num = omp_get_thread_num(); +#else + int thread_num = 0; +#endif for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { const Box& box = pti.validbox(); @@ -1394,23 +1531,25 @@ PhysicalParticleContainer::PushP (int lev, Real dt, Byp.assign(np,WarpX::B_external[1]); Bzp.assign(np,WarpX::B_external[2]); - giv.resize(np); + m_giv[thread_num].resize(np); // // copy data from particle container to temp arrays // - pti.GetPosition(xp, yp, zp); + pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); const int* ixyzmin_grid = box.loVect(); const int ll4symtry = false; - const int l_lower_order_in_v = true; long lvect_fieldgathe = 64; warpx_geteb_energy_conserving( - &np, xp.data(), yp.data(), zp.data(), - Exp.data(),Eyp.data(),Ezp.data(), - Bxp.data(),Byp.data(),Bzp.data(), + &np, + m_xp[thread_num].dataPtr(), + m_yp[thread_num].dataPtr(), + m_zp[thread_num].dataPtr(), + Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(), + Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(), ixyzmin_grid, &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2], &dx[0], &dx[1], &dx[2], @@ -1421,11 +1560,15 @@ PhysicalParticleContainer::PushP (int lev, Real dt, BL_TO_FORTRAN_ANYD(bxfab), BL_TO_FORTRAN_ANYD(byfab), BL_TO_FORTRAN_ANYD(bzfab), - &ll4symtry, &l_lower_order_in_v, + &ll4symtry, &WarpX::l_lower_order_in_v, &lvect_fieldgathe, &WarpX::field_gathering_algo); - warpx_particle_pusher_momenta(&np, xp.data(), yp.data(), zp.data(), - uxp.data(), uyp.data(), uzp.data(), giv.data(), + warpx_particle_pusher_momenta(&np, + m_xp[thread_num].dataPtr(), + m_yp[thread_num].dataPtr(), + m_zp[thread_num].dataPtr(), + uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), + m_giv[thread_num].dataPtr(), Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(), Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(), &this->charge, &this->mass, &dt, @@ -1482,7 +1625,7 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real #pragma omp parallel #endif { - Vector<Real> xp_new, yp_new, zp_new; + RealVector xp_new, yp_new, zp_new; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -1615,7 +1758,7 @@ int PhysicalParticleContainer::GetRefineFac(const Real x, const Real y, const Re stretched_boxes.push_back(bx); } - BoxArray stretched_ba(stretched_boxes.data(), stretched_boxes.size()); + BoxArray stretched_ba(stretched_boxes.dataPtr(), stretched_boxes.size()); const int num_ghost = 0; if ( stretched_ba.intersects(Box(iv, iv), num_ghost) ) diff --git a/Source/RigidInjectedParticleContainer.H b/Source/RigidInjectedParticleContainer.H index f2440ad7a..8a9ac9e6e 100644 --- a/Source/RigidInjectedParticleContainer.H +++ b/Source/RigidInjectedParticleContainer.H @@ -43,10 +43,10 @@ public: amrex::Real dt) override; virtual void PushPX(WarpXParIter& pti, - amrex::Vector<amrex::Real>& xp, - amrex::Vector<amrex::Real>& yp, - amrex::Vector<amrex::Real>& zp, - amrex::Vector<amrex::Real>& giv, + amrex::Cuda::DeviceVector<amrex::Real>& xp, + amrex::Cuda::DeviceVector<amrex::Real>& yp, + amrex::Cuda::DeviceVector<amrex::Real>& zp, + amrex::Cuda::DeviceVector<amrex::Real>& giv, amrex::Real dt) override; virtual void PushP (int lev, amrex::Real dt, diff --git a/Source/RigidInjectedParticleContainer.cpp b/Source/RigidInjectedParticleContainer.cpp index 96bdc59a4..eb2c1c4a8 100644 --- a/Source/RigidInjectedParticleContainer.cpp +++ b/Source/RigidInjectedParticleContainer.cpp @@ -73,7 +73,7 @@ RigidInjectedParticleContainer::RemapParticles() // Note that the particles are already in the boosted frame. // This value is saved to advance the particles not injected yet - Vector<Real> xp, yp, zp; + Cuda::DeviceVector<Real> xp, yp, zp; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -134,7 +134,7 @@ RigidInjectedParticleContainer::BoostandRemapParticles() #pragma omp parallel #endif { - Vector<Real> xp, yp, zp; + Cuda::DeviceVector<Real> xp, yp, zp; for (WarpXParIter pti(*this, 0); pti.isValid(); ++pti) { @@ -205,8 +205,10 @@ RigidInjectedParticleContainer::BoostandRemapParticles() void RigidInjectedParticleContainer::PushPX(WarpXParIter& pti, - Vector<Real>& xp, Vector<Real>& yp, Vector<Real>& zp, - Vector<Real>& giv, + Cuda::DeviceVector<Real>& xp, + Cuda::DeviceVector<Real>& yp, + Cuda::DeviceVector<Real>& zp, + Cuda::DeviceVector<Real>& giv, Real dt) { @@ -231,15 +233,16 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti, auto& uypold = attribs[PIdx::uyold]; auto& uzpold = attribs[PIdx::uzold]; - warpx_copy_attribs(&np, xp.data(), yp.data(), zp.data(), - uxp.data(), uyp.data(), uzp.data(), - xpold.data(), ypold.data(), zpold.data(), - uxpold.data(), uypold.data(), uzpold.data()); + warpx_copy_attribs(&np, xp.dataPtr(), yp.dataPtr(), zp.dataPtr(), + uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), + xpold.dataPtr(), ypold.dataPtr(), zpold.dataPtr(), + uxpold.dataPtr(), uypold.dataPtr(), uzpold.dataPtr()); #endif // Save the position and momenta, making copies - Vector<Real> xp_save, yp_save, zp_save, uxp_save, uyp_save, uzp_save; + Cuda::DeviceVector<Real> xp_save, yp_save, zp_save; + RealVector uxp_save, uyp_save, uzp_save; if (!done_injecting_lev) { xp_save = xp; @@ -265,8 +268,12 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti, } } - warpx_particle_pusher(&np, xp.data(), yp.data(), zp.data(), - uxp.data(), uyp.data(), uzp.data(), giv.data(), + warpx_particle_pusher(&np, + xp.dataPtr(), + yp.dataPtr(), + zp.dataPtr(), + uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), + giv.dataPtr(), Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(), Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(), &this->charge, &this->mass, &dt, @@ -355,7 +362,7 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt, #pragma omp parallel #endif { - Vector<Real> xp, yp, zp, giv; + Cuda::DeviceVector<Real> xp, yp, zp, giv; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -404,9 +411,12 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt, const int l_lower_order_in_v = true; long lvect_fieldgathe = 64; warpx_geteb_energy_conserving( - &np, xp.data(), yp.data(), zp.data(), - Exp.data(),Eyp.data(),Ezp.data(), - Bxp.data(),Byp.data(),Bzp.data(), + &np, + xp.dataPtr(), + yp.dataPtr(), + zp.dataPtr(), + Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(), + Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(), ixyzmin_grid, &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2], &dx[0], &dx[1], &dx[2], @@ -425,8 +435,12 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt, auto uyp_save = uyp; auto uzp_save = uzp; - warpx_particle_pusher_momenta(&np, xp.data(), yp.data(), zp.data(), - uxp.data(), uyp.data(), uzp.data(), giv.data(), + warpx_particle_pusher_momenta(&np, + xp.dataPtr(), + yp.dataPtr(), + zp.dataPtr(), + uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), + giv.dataPtr(), Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(), Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(), &this->charge, &this->mass, &dt, diff --git a/Source/WarpX.H b/Source/WarpX.H index ec41115cd..08fe657b4 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -86,6 +86,9 @@ public: static long noy; static long noz; + static bool use_fdtd_nci_corr; + static int l_lower_order_in_v; + static bool use_laser; static bool use_filter; static bool serialize_ics; @@ -105,6 +108,8 @@ public: static bool do_dynamic_scheduling; static bool refine_plasma; + static int sort_int; + // buffers static int n_field_gather_buffer; static int n_current_deposition_buffer; @@ -139,9 +144,13 @@ public: void EvolveB (int lev, amrex::Real dt); void EvolveF ( amrex::Real dt, DtType dt_type); void EvolveF (int lev, amrex::Real dt, DtType dt_type); + void EvolveB (int lev, PatchType patch_type, amrex::Real dt); + void EvolveE (int lev, PatchType patch_type, amrex::Real dt); + void EvolveF (int lev, PatchType patch_type, amrex::Real dt, DtType dt_type); void DampPML (); void DampPML (int lev); + void DampPML (int lev, PatchType patch_type); void PushParticlesandDepose (int lev, amrex::Real cur_time); void PushParticlesandDepose ( amrex::Real cur_time); @@ -248,11 +257,6 @@ private: void FillBoundaryE (int lev, PatchType patch_type); void FillBoundaryF (int lev, PatchType patch_type); - void EvolveB (int lev, PatchType patch_type, amrex::Real dt); - void EvolveE (int lev, PatchType patch_type, amrex::Real dt); - void EvolveF (int lev, PatchType patch_type, amrex::Real dt, DtType dt_type); - void DampPML (int lev, PatchType patch_type); - void OneStep_nosub (amrex::Real t); void OneStep_sub1 (amrex::Real t); @@ -350,10 +354,18 @@ private: const std::array<const amrex::MultiFab*, 3>& B, const std::array<amrex::Real,3>& dx); + static void ComputeDivB (amrex::MultiFab& divB, int dcomp, + const std::array<const amrex::MultiFab*, 3>& B, + const std::array<amrex::Real,3>& dx, int ngrow); + static void ComputeDivE (amrex::MultiFab& divE, int dcomp, const std::array<const amrex::MultiFab*, 3>& B, const std::array<amrex::Real,3>& dx); + static void ComputeDivE (amrex::MultiFab& divE, int dcomp, + const std::array<const amrex::MultiFab*, 3>& B, + const std::array<amrex::Real,3>& dx, int ngrow); + void SyncCurrent (const std::array<const amrex::MultiFab*,3>& fine, const std::array< amrex::MultiFab*,3>& crse, int ref_ratio); @@ -425,6 +437,13 @@ private: amrex::Vector<std::array< std::unique_ptr<amrex::MultiFab>, 3 > > current_buf; amrex::Vector<std::unique_ptr<amrex::MultiFab> > charge_buf; + amrex::Vector<std::array<std::unique_ptr<amrex::iMultiFab>, 3 > > current_fp_owner_masks; + amrex::Vector<std::array<std::unique_ptr<amrex::iMultiFab>, 3 > > current_cp_owner_masks; + + amrex::Vector<std::unique_ptr<amrex::iMultiFab> > rho_fp_owner_masks; + amrex::Vector<std::unique_ptr<amrex::iMultiFab> > rho_cp_owner_masks; + + // div E cleaning int do_dive_cleaning = 0; @@ -582,6 +601,7 @@ private: int insitu_int; int insitu_start; std::string insitu_config; + int insitu_pin_mesh; }; #endif diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 44eff9cd4..1828ebcec 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -43,11 +43,16 @@ long WarpX::nox = 1; long WarpX::noy = 1; long WarpX::noz = 1; +bool WarpX::use_fdtd_nci_corr = false; +int WarpX::l_lower_order_in_v = true; + bool WarpX::use_laser = false; bool WarpX::use_filter = false; bool WarpX::serialize_ics = false; bool WarpX::refine_plasma = false; +int WarpX::sort_int = -1; + bool WarpX::do_boosted_frame_diagnostic = false; int WarpX::num_snapshots_lab = std::numeric_limits<int>::lowest(); Real WarpX::dt_snapshots_lab = std::numeric_limits<Real>::lowest(); @@ -91,6 +96,8 @@ int WarpX::n_current_deposition_buffer = -1; WarpX* WarpX::m_instance = nullptr; + + WarpX& WarpX::GetInstance () { @@ -169,6 +176,11 @@ WarpX::WarpX () current_buf.resize(nlevs_max); charge_buf.resize(nlevs_max); + current_fp_owner_masks.resize(nlevs_max); + current_cp_owner_masks.resize(nlevs_max); + rho_fp_owner_masks.resize(nlevs_max); + rho_cp_owner_masks.resize(nlevs_max); + pml.resize(nlevs_max); #ifdef WARPX_DO_ELECTROSTATIC @@ -335,6 +347,7 @@ WarpX::ReadParameters () pp.query("do_dive_cleaning", do_dive_cleaning); pp.query("n_field_gather_buffer", n_field_gather_buffer); pp.query("n_current_deposition_buffer", n_current_deposition_buffer); + pp.query("sort_int", sort_int); pp.query("do_pml", do_pml); pp.query("pml_ncell", pml_ncell); @@ -450,11 +463,13 @@ WarpX::ReadParameters () insitu_start = 0; insitu_int = 0; insitu_config = ""; + insitu_pin_mesh = 0; ParmParse pp("insitu"); pp.query("int", insitu_int); pp.query("start", insitu_start); pp.query("config", insitu_config); + pp.query("pin_mesh", insitu_pin_mesh); } } @@ -492,8 +507,14 @@ WarpX::ClearLevel (int lev) Efield_cax[lev][i].reset(); Bfield_cax[lev][i].reset(); current_buf[lev][i].reset(); + + current_fp_owner_masks[lev][i].reset(); + current_cp_owner_masks[lev][i].reset(); } + rho_fp_owner_masks[lev].reset(); + rho_cp_owner_masks[lev].reset(); + charge_buf[lev].reset(); current_buffer_masks[lev].reset(); @@ -550,7 +571,7 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d int ngy = (ngy_tmp % 2) ? ngy_tmp+1 : ngy_tmp; // Always even number int ngz_nonci = (ngz_tmp % 2) ? ngz_tmp+1 : ngz_tmp; // Always even number int ngz; - if (warpx_use_fdtd_nci_corr()) { + if (WarpX::use_fdtd_nci_corr) { int ng = ngz_tmp + (mypc->nstencilz_fdtd_nci_corr-1); ngz = (ng % 2) ? ng+1 : ng; } else { @@ -613,16 +634,24 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d current_fp[lev][1].reset( new MultiFab(amrex::convert(ba,jy_nodal_flag),dm,1,ngJ)); current_fp[lev][2].reset( new MultiFab(amrex::convert(ba,jz_nodal_flag),dm,1,ngJ)); - if (do_subcycling == 1 && lev == 0) { + const auto& period = Geom(lev).periodicity(); + current_fp_owner_masks[lev][0] = std::move(current_fp[lev][0]->OwnerMask(period)); + current_fp_owner_masks[lev][1] = std::move(current_fp[lev][1]->OwnerMask(period)); + current_fp_owner_masks[lev][2] = std::move(current_fp[lev][2]->OwnerMask(period)); + + if (do_dive_cleaning || plot_rho) + { + rho_fp[lev].reset(new MultiFab(amrex::convert(ba,IntVect::TheUnitVector()),dm,2,ngRho)); + rho_fp_owner_masks[lev] = std::move(rho_fp[lev]->OwnerMask(period)); + } + + if (do_subcycling == 1 && lev == 0) + { current_store[lev][0].reset( new MultiFab(amrex::convert(ba,jx_nodal_flag),dm,1,ngJ)); current_store[lev][1].reset( new MultiFab(amrex::convert(ba,jy_nodal_flag),dm,1,ngJ)); current_store[lev][2].reset( new MultiFab(amrex::convert(ba,jz_nodal_flag),dm,1,ngJ)); } - if (do_dive_cleaning || plot_rho){ - rho_fp[lev].reset(new MultiFab(amrex::convert(ba,IntVect::TheUnitVector()),dm,2,ngRho)); - } - if (do_dive_cleaning) { F_fp[lev].reset (new MultiFab(amrex::convert(ba,IntVect::TheUnitVector()),dm,1, ngF)); @@ -631,6 +660,7 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d else { rho_fp[lev].reset(new MultiFab(amrex::convert(ba,IntVect::TheUnitVector()),dm,2,ngRho)); + rho_fp_owner_masks[lev] = std::move(rho_fp[lev]->OwnerMask(period)); } #endif @@ -678,8 +708,14 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d current_cp[lev][1].reset( new MultiFab(amrex::convert(cba,jy_nodal_flag),dm,1,ngJ)); current_cp[lev][2].reset( new MultiFab(amrex::convert(cba,jz_nodal_flag),dm,1,ngJ)); + const auto& cperiod = Geom(lev).periodicity(); + current_cp_owner_masks[lev][0] = std::move(current_cp[lev][0]->OwnerMask(cperiod)); + current_cp_owner_masks[lev][1] = std::move(current_cp[lev][1]->OwnerMask(cperiod)); + current_cp_owner_masks[lev][2] = std::move(current_cp[lev][2]->OwnerMask(cperiod)); + if (do_dive_cleaning || plot_rho){ rho_cp[lev].reset(new MultiFab(amrex::convert(cba,IntVect::TheUnitVector()),dm,2,ngRho)); + rho_cp_owner_masks[lev] = std::move(rho_cp[lev]->OwnerMask(cperiod)); } if (do_dive_cleaning) { @@ -689,6 +725,7 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d else { rho_cp[lev].reset(new MultiFab(amrex::convert(cba,IntVect::TheUnitVector()),dm,2,ngRho)); + rho_cp_owner_masks[lev] = std::move(rho_cp[lev]->OwnerMask(cperiod)); } #endif } @@ -808,6 +845,26 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp, } void +WarpX::ComputeDivB (MultiFab& divB, int dcomp, + const std::array<const MultiFab*, 3>& B, + const std::array<Real,3>& dx, int ngrow) +{ +#ifdef _OPENMP +#pragma omp parallel +#endif + for (MFIter mfi(divB, true); mfi.isValid(); ++mfi) + { + Box bx = mfi.growntilebox(ngrow); + WRPX_COMPUTE_DIVB(bx.loVect(), bx.hiVect(), + BL_TO_FORTRAN_N_ANYD(divB[mfi],dcomp), + BL_TO_FORTRAN_ANYD((*B[0])[mfi]), + BL_TO_FORTRAN_ANYD((*B[1])[mfi]), + BL_TO_FORTRAN_ANYD((*B[2])[mfi]), + dx.data()); + } +} + +void WarpX::ComputeDivE (MultiFab& divE, int dcomp, const std::array<const MultiFab*, 3>& E, const std::array<Real,3>& dx) @@ -828,6 +885,26 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp, } void +WarpX::ComputeDivE (MultiFab& divE, int dcomp, + const std::array<const MultiFab*, 3>& E, + const std::array<Real,3>& dx, int ngrow) +{ +#ifdef _OPENMP +#pragma omp parallel +#endif + for (MFIter mfi(divE, true); mfi.isValid(); ++mfi) + { + Box bx = mfi.growntilebox(ngrow); + WRPX_COMPUTE_DIVE(bx.loVect(), bx.hiVect(), + BL_TO_FORTRAN_N_ANYD(divE[mfi],dcomp), + BL_TO_FORTRAN_ANYD((*E[0])[mfi]), + BL_TO_FORTRAN_ANYD((*E[1])[mfi]), + BL_TO_FORTRAN_ANYD((*E[2])[mfi]), + dx.data()); + } +} + +void WarpX::applyFilter (MultiFab& dstmf, const MultiFab& srcmf, int scomp, int dcomp, int ncomp) { ncomp = std::min(ncomp, srcmf.nComp()); diff --git a/Source/WarpXComm.cpp b/Source/WarpXComm.cpp index f225f1015..32b9e80f5 100644 --- a/Source/WarpXComm.cpp +++ b/Source/WarpXComm.cpp @@ -491,16 +491,16 @@ WarpX::SyncCurrent () for (int lev = 0; lev <= finest_level; ++lev) { const auto& period = Geom(lev).periodicity(); - current_fp[lev][0]->OverrideSync(period); - current_fp[lev][1]->OverrideSync(period); - current_fp[lev][2]->OverrideSync(period); + current_fp[lev][0]->OverrideSync(*current_fp_owner_masks[lev][0], period); + current_fp[lev][1]->OverrideSync(*current_fp_owner_masks[lev][1], period); + current_fp[lev][2]->OverrideSync(*current_fp_owner_masks[lev][2],period); } for (int lev = 1; lev <= finest_level; ++lev) { const auto& cperiod = Geom(lev-1).periodicity(); - current_cp[lev][0]->OverrideSync(cperiod); - current_cp[lev][1]->OverrideSync(cperiod); - current_cp[lev][2]->OverrideSync(cperiod); + current_cp[lev][0]->OverrideSync(*current_cp_owner_masks[lev][0], cperiod); + current_cp[lev][1]->OverrideSync(*current_cp_owner_masks[lev][1], cperiod); + current_cp[lev][2]->OverrideSync(*current_cp_owner_masks[lev][2], cperiod); } } @@ -645,12 +645,12 @@ WarpX::SyncRho (amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rhof, for (int lev = 0; lev <= finest_level; ++lev) { const auto& period = Geom(lev).periodicity(); - rhof[lev]->OverrideSync(period); + rhof[lev]->OverrideSync(*rho_fp_owner_masks[lev], period); } for (int lev = 1; lev <= finest_level; ++lev) { const auto& cperiod = Geom(lev-1).periodicity(); - rhoc[lev]->OverrideSync(cperiod); + rhoc[lev]->OverrideSync(*rho_cp_owner_masks[lev], cperiod); } } diff --git a/Source/WarpXEvolve.cpp b/Source/WarpXEvolve.cpp index 7c1629e94..6d917e36d 100644 --- a/Source/WarpXEvolve.cpp +++ b/Source/WarpXEvolve.cpp @@ -155,6 +155,12 @@ WarpX::EvolveEM (int numsteps) mypc->Redistribute(); } + bool to_sort = (sort_int > 0) && ((step+1) % sort_int == 0); + if (to_sort) { + amrex::Print() << "re-sorting particles \n"; + mypc->SortParticlesByCell(); + } + amrex::Print()<< "STEP " << step+1 << " ends." << " TIME = " << cur_time << " DT = " << dt[0] << "\n"; Real walltime_end_step = amrex::second(); @@ -492,9 +498,9 @@ WarpX::EvolveB (int lev, PatchType patch_type, amrex::Real dt) // Loop through the grids, and over the tiles within each grid #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for ( MFIter mfi(*Bx,true); mfi.isValid(); ++mfi ) + for ( MFIter mfi(*Bx, TilingIfNotGPU()); mfi.isValid(); ++mfi ) { Real wt = amrex::second(); @@ -519,8 +525,12 @@ WarpX::EvolveB (int lev, PatchType patch_type, amrex::Real dt) if (cost) { Box cbx = mfi.tilebox(IntVect{AMREX_D_DECL(0,0,0)}); if (patch_type == PatchType::coarse) cbx.refine(rr); - wt = (amrex::second() - wt) / cbx.d_numPts(); - (*cost)[mfi].plus(wt, cbx); + wt = (amrex::second() - wt) / cbx.d_numPts();\ + FArrayBox* costfab = cost->fabPtr(mfi); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( cbx, work_box, + { + costfab->plus(wt, work_box); + }); } } @@ -530,9 +540,9 @@ WarpX::EvolveB (int lev, PatchType patch_type, amrex::Real dt) const auto& pml_E = (patch_type == PatchType::fine) ? pml[lev]->GetE_fp() : pml[lev]->GetE_cp(); #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for ( MFIter mfi(*pml_B[0],true); mfi.isValid(); ++mfi ) + for ( MFIter mfi(*pml_B[0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { const Box& tbx = mfi.tilebox(Bx_nodal_flag); const Box& tby = mfi.tilebox(By_nodal_flag); @@ -617,9 +627,9 @@ WarpX::EvolveE (int lev, PatchType patch_type, amrex::Real dt) // Loop through the grids, and over the tiles within each grid #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for ( MFIter mfi(*Ex,true); mfi.isValid(); ++mfi ) + for ( MFIter mfi(*Ex, TilingIfNotGPU()); mfi.isValid(); ++mfi ) { Real wt = amrex::second(); @@ -662,7 +672,11 @@ WarpX::EvolveE (int lev, PatchType patch_type, amrex::Real dt) Box cbx = mfi.tilebox(IntVect{AMREX_D_DECL(0,0,0)}); if (patch_type == PatchType::coarse) cbx.refine(rr); wt = (amrex::second() - wt) / cbx.d_numPts(); - (*cost)[mfi].plus(wt, cbx); + FArrayBox* costfab = cost->fabPtr(mfi); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( cbx, work_box, + { + costfab->plus(wt, work_box); + }); } } @@ -674,9 +688,9 @@ WarpX::EvolveE (int lev, PatchType patch_type, amrex::Real dt) const auto& pml_E = (patch_type == PatchType::fine) ? pml[lev]->GetE_fp() : pml[lev]->GetE_cp(); const auto& pml_F = (patch_type == PatchType::fine) ? pml[lev]->GetF_fp() : pml[lev]->GetF_cp(); #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for ( MFIter mfi(*pml_E[0],true); mfi.isValid(); ++mfi ) + for ( MFIter mfi(*pml_E[0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { const Box& tex = mfi.tilebox(Ex_nodal_flag); const Box& tey = mfi.tilebox(Ey_nodal_flag); @@ -775,9 +789,9 @@ WarpX::EvolveF (int lev, PatchType patch_type, Real dt, DtType dt_type) const auto& pml_E = (patch_type == PatchType::fine) ? pml[lev]->GetE_fp() : pml[lev]->GetE_cp(); #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for ( MFIter mfi(*pml_F,true); mfi.isValid(); ++mfi ) + for ( MFIter mfi(*pml_F, TilingIfNotGPU()); mfi.isValid(); ++mfi ) { const Box& bx = mfi.tilebox(); WRPX_PUSH_PML_F(bx.loVect(), bx.hiVect(), @@ -821,9 +835,9 @@ WarpX::DampPML (int lev, PatchType patch_type) : pml[lev]->GetMultiSigmaBox_cp(); #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for ( MFIter mfi(*pml_E[0],true); mfi.isValid(); ++mfi ) + for ( MFIter mfi(*pml_E[0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { const Box& tex = mfi.tilebox(Ex_nodal_flag); const Box& tey = mfi.tilebox(Ey_nodal_flag); diff --git a/Source/WarpXIO.cpp b/Source/WarpXIO.cpp index 70985ac7e..93a359d7c 100644 --- a/Source/WarpXIO.cpp +++ b/Source/WarpXIO.cpp @@ -488,11 +488,11 @@ WarpX::UpdateInSitu () const Vector<const MultiFab*> srcmf(AMREX_SPACEDIM); PackPlotDataPtrs(srcmf, current_fp[lev]); int dcomp = 0; - amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf); + amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf, ngrow); #if (AMREX_SPACEDIM == 2) MultiFab::Copy(mf[lev], mf[lev], dcomp+1, dcomp+2, 1, ngrow); - amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *current_fp[lev][1], 0, 1); + amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *current_fp[lev][1], 0, 1, ngrow); #endif if (lev == 0) { @@ -503,10 +503,10 @@ WarpX::UpdateInSitu () const dcomp += 3; PackPlotDataPtrs(srcmf, Efield_aux[lev]); - amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf); + amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf, ngrow); #if (AMREX_SPACEDIM == 2) MultiFab::Copy(mf[lev], mf[lev], dcomp+1, dcomp+2, 1, ngrow); - amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *Efield_aux[lev][1], 0, 1); + amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *Efield_aux[lev][1], 0, 1, ngrow); #endif if (lev == 0) { @@ -517,10 +517,10 @@ WarpX::UpdateInSitu () const dcomp += 3; PackPlotDataPtrs(srcmf, Bfield_aux[lev]); - amrex::average_face_to_cellcenter(mf[lev], dcomp, srcmf); + amrex::average_face_to_cellcenter(mf[lev], dcomp, srcmf, ngrow); #if (AMREX_SPACEDIM == 2) MultiFab::Copy(mf[lev], mf[lev], dcomp+1, dcomp+2, 1, ngrow); - MultiFab::Copy(mf[lev], *Bfield_aux[lev][1], 0, dcomp+1, 1, ngrow); + MultiFab::Copy(mf[lev], *Bfield_aux[lev][1], 0, dcomp+1, 1, ngrow); #endif if (lev == 0) { @@ -532,16 +532,14 @@ WarpX::UpdateInSitu () const if (plot_part_per_cell) { - MultiFab temp_dat(grids[lev],mf[lev].DistributionMap(),1,0); - temp_dat.setVal(0); + MultiFab temp_dat(grids[lev], mf[lev].DistributionMap(), 1, ngrow); + temp_dat.setVal(0, ngrow); // MultiFab containing number of particles in each cell mypc->Increment(temp_dat, lev); - MultiFab::Copy(mf[lev], temp_dat, 0, dcomp, 1, 0); + MultiFab::Copy(mf[lev], temp_dat, 0, dcomp, 1, ngrow); if (lev == 0) - { varnames.push_back("part_per_cell"); - } dcomp += 1; } @@ -554,14 +552,13 @@ WarpX::UpdateInSitu () const #ifdef _OPENMP #pragma omp parallel #endif - for (MFIter mfi(mf[lev]); mfi.isValid(); ++mfi) { - (mf[lev])[mfi].setVal(static_cast<Real>(npart_in_grid[mfi.index()]), dcomp); - } - if (lev == 0) - { - varnames.push_back("part_per_grid"); - } - dcomp += 1; + for (MFIter mfi(mf[lev]); mfi.isValid(); ++mfi) + (mf[lev])[mfi].setVal(static_cast<Real>(npart_in_grid[mfi.index()]), dcomp); + + if (lev == 0) + varnames.push_back("part_per_grid"); + + dcomp += 1; } if (plot_part_per_proc) @@ -570,30 +567,30 @@ WarpX::UpdateInSitu () const #ifdef _OPENMP #pragma omp parallel reduction(+:n_per_proc) #endif - for (MFIter mfi(mf[lev]); mfi.isValid(); ++mfi) { + for (MFIter mfi(mf[lev]); mfi.isValid(); ++mfi) n_per_proc += npart_in_grid[mfi.index()]; - } - mf[lev].setVal(static_cast<Real>(n_per_proc), dcomp,1); + + mf[lev].setVal(static_cast<Real>(n_per_proc), dcomp, ngrow); + if (lev == 0) - { varnames.push_back("part_per_proc"); - } + dcomp += 1; } } if (plot_proc_number) { + Real procid = static_cast<Real>(ParallelDescriptor::MyProc()); #ifdef _OPENMP #pragma omp parallel #endif - for (MFIter mfi(mf[lev]); mfi.isValid(); ++mfi) { - (mf[lev])[mfi].setVal(static_cast<Real>(ParallelDescriptor::MyProc()), dcomp); - } + for (MFIter mfi(mf[lev]); mfi.isValid(); ++mfi) + (mf[lev])[mfi].setVal(procid, dcomp); + if (lev == 0) - { varnames.push_back("proc_number"); - } + dcomp += 1; } @@ -601,56 +598,56 @@ WarpX::UpdateInSitu () const { ComputeDivB(mf[lev], dcomp, {Bfield_aux[lev][0].get(),Bfield_aux[lev][1].get(),Bfield_aux[lev][2].get()}, - WarpX::CellSize(lev)); + WarpX::CellSize(lev), ngrow); if (lev == 0) - { varnames.push_back("divB"); - } + dcomp += 1; } if (plot_dive) { const BoxArray& ba = amrex::convert(boxArray(lev),IntVect::TheUnitVector()); - MultiFab dive(ba,DistributionMap(lev),1,0); + MultiFab dive(ba, DistributionMap(lev), 1, ngrow); + ComputeDivE(dive, 0, {Efield_aux[lev][0].get(), Efield_aux[lev][1].get(), Efield_aux[lev][2].get()}, - WarpX::CellSize(lev)); - amrex::average_node_to_cellcenter(mf[lev], dcomp, dive, 0, 1); + WarpX::CellSize(lev), ngrow); + + amrex::average_node_to_cellcenter(mf[lev], dcomp, dive, 0, 1, ngrow); + if (lev == 0) - { varnames.push_back("divE"); - } + dcomp += 1; } if (plot_rho) { - amrex::average_node_to_cellcenter(mf[lev], dcomp, *rho_fp[lev], 0, 1); + amrex::average_node_to_cellcenter(mf[lev], dcomp, *rho_fp[lev], 0, 1, ngrow); if (lev == 0) - { varnames.push_back("rho"); - } + dcomp += 1; } if (plot_F) { - amrex::average_node_to_cellcenter(mf[lev], dcomp, *F_fp[lev], 0, 1); + amrex::average_node_to_cellcenter(mf[lev], dcomp, *F_fp[lev], 0, 1, ngrow); + if (lev == 0) - { varnames.push_back("F"); - } + dcomp += 1; } if (plot_finepatch) { PackPlotDataPtrs(srcmf, Efield_fp[lev]); - amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf); + amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf, ngrow); #if (AMREX_SPACEDIM == 2) MultiFab::Copy(mf[lev], mf[lev], dcomp+1, dcomp+2, 1, ngrow); - amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *Efield_fp[lev][1], 0, 1); + amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *Efield_fp[lev][1], 0, 1, ngrow); #endif if (lev == 0) { @@ -661,7 +658,7 @@ WarpX::UpdateInSitu () const dcomp += 3; PackPlotDataPtrs(srcmf, Bfield_fp[lev]); - amrex::average_face_to_cellcenter(mf[lev], dcomp, srcmf); + amrex::average_face_to_cellcenter(mf[lev], dcomp, srcmf, ngrow); #if (AMREX_SPACEDIM == 2) MultiFab::Copy(mf[lev], mf[lev], dcomp+1, dcomp+2, 1, ngrow); MultiFab::Copy(mf[lev], *Bfield_fp[lev][1], 0, dcomp+1, 1, ngrow); @@ -686,10 +683,10 @@ WarpX::UpdateInSitu () const { std::array<std::unique_ptr<MultiFab>, 3> E = getInterpolatedE(lev); PackPlotDataPtrs(srcmf, E); - amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf); + amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf, ngrow); #if (AMREX_SPACEDIM == 2) MultiFab::Copy(mf[lev], mf[lev], dcomp+1, dcomp+2, 1, ngrow); - amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *E[1], 0, 1); + amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *E[1], 0, 1, ngrow); #endif } if (lev == 0) @@ -709,7 +706,7 @@ WarpX::UpdateInSitu () const { std::array<std::unique_ptr<MultiFab>, 3> B = getInterpolatedB(lev); PackPlotDataPtrs(srcmf, B); - amrex::average_face_to_cellcenter(mf[lev], dcomp, srcmf); + amrex::average_face_to_cellcenter(mf[lev], dcomp, srcmf, ngrow); #if (AMREX_SPACEDIM == 2) MultiFab::Copy(mf[lev], mf[lev], dcomp+1, dcomp+2, 1, ngrow); MultiFab::Copy(mf[lev], *B[1], 0, dcomp+1, 1, ngrow); diff --git a/Source/WarpXInitData.cpp b/Source/WarpXInitData.cpp index a6b63a92d..496b14e7a 100644 --- a/Source/WarpXInitData.cpp +++ b/Source/WarpXInitData.cpp @@ -5,7 +5,6 @@ #include <WarpX.H> #include <WarpX_f.H> -#include <WarpXWrappers.h> #ifdef BL_USE_SENSEI_INSITU #include <AMReX_AmrMeshInSituBridge.H> @@ -34,7 +33,7 @@ WarpX::InitData () ComputePMLFactors(); - if (warpx_use_fdtd_nci_corr()) { + if (WarpX::use_fdtd_nci_corr) { WarpX::InitNCICorrector(); } @@ -51,6 +50,7 @@ WarpX::InitData () insitu_bridge = new amrex::AmrMeshInSituBridge; insitu_bridge->setEnabled(insitu_int > 0 ? 1 : 0); insitu_bridge->setConfig(insitu_config); + insitu_bridge->setPinMesh(insitu_pin_mesh); if (insitu_bridge->initialize()) { amrex::ErrorStream() @@ -153,7 +153,7 @@ WarpX::ComputePMLFactors () void WarpX::InitNCICorrector () { - if (warpx_use_fdtd_nci_corr()) + if (WarpX::use_fdtd_nci_corr) { mypc->fdtd_nci_stencilz_ex.resize(max_level+1); mypc->fdtd_nci_stencilz_by.resize(max_level+1); @@ -161,7 +161,6 @@ WarpX::InitNCICorrector () { const Geometry& gm = Geom(lev); const Real* dx = gm.CellSize(); - const int l_lower_order_in_v = warpx_l_lower_order_in_v(); amrex::Real dz, cdtodz; if (AMREX_SPACEDIM == 3){ dz = dx[2]; @@ -172,7 +171,7 @@ WarpX::InitNCICorrector () WRPX_PXR_NCI_CORR_INIT( (mypc->fdtd_nci_stencilz_ex)[lev].data(), (mypc->fdtd_nci_stencilz_by)[lev].data(), mypc->nstencilz_fdtd_nci_corr, cdtodz, - l_lower_order_in_v); + WarpX::l_lower_order_in_v); } } } diff --git a/Source/WarpXMove.cpp b/Source/WarpXMove.cpp index 401211554..908c70573 100644 --- a/Source/WarpXMove.cpp +++ b/Source/WarpXMove.cpp @@ -40,8 +40,8 @@ WarpX::MoveWindow (bool move_j) Real new_hi[AMREX_SPACEDIM]; const Real* current_lo = geom[0].ProbLo(); const Real* current_hi = geom[0].ProbHi(); - const Real* dx = geom[0].CellSize(); - int num_shift_base = static_cast<int>((moving_window_x - current_lo[dir]) / dx[dir]); + const Real* cdx = geom[0].CellSize(); + int num_shift_base = static_cast<int>((moving_window_x - current_lo[dir]) / cdx[dir]); if (num_shift_base == 0) return 0; @@ -51,8 +51,8 @@ WarpX::MoveWindow (bool move_j) new_lo[i] = current_lo[i]; new_hi[i] = current_hi[i]; } - new_lo[dir] = current_lo[dir] + num_shift_base * dx[dir]; - new_hi[dir] = current_hi[dir] + num_shift_base * dx[dir]; + new_lo[dir] = current_lo[dir] + num_shift_base * cdx[dir]; + new_hi[dir] = current_hi[dir] + num_shift_base * cdx[dir]; RealBox new_box(new_lo, new_hi); Geometry::ProbDomain(new_box); @@ -181,6 +181,7 @@ WarpX::MoveWindow (bool move_j) void WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir) { + BL_PROFILE("WarpX::shiftMF()"); const BoxArray& ba = mf.boxArray(); const DistributionMapping& dm = mf.DistributionMap(); const int nc = mf.nComp(); @@ -221,25 +222,24 @@ WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir) #endif for (MFIter mfi(tmpmf); mfi.isValid(); ++mfi ) { - FArrayBox& srcfab = tmpmf[mfi]; - - Box outbox = mfi.fabbox(); - outbox &= adjBox; - if (outbox.ok()) { // outbox is the region that the window moved into - srcfab.setVal(0.0, outbox, 0, nc); - } - - FArrayBox& dstfab = mf[mfi]; - dstfab.setVal(0.0); - - Box dstBox = dstfab.box(); - - if (num_shift > 0) { - dstBox.growHi(dir, -num_shift); - } else { - dstBox.growLo(dir, num_shift); - } - - dstfab.copy(srcfab, amrex::shift(dstBox,dir,num_shift), 0, dstBox, 0, nc); + FArrayBox* dstfab = mf.fabPtr(mfi); + FArrayBox* srcfab = tmpmf.fabPtr(mfi); + Box outbox = mfi.fabbox(); + outbox &= adjBox; + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(outbox, toutbox, + { + srcfab->setVal(0.0, toutbox, 0, nc); + }); + Box dstBox = mf[mfi].box(); + if (num_shift > 0) { + dstBox.growHi(dir, -num_shift); + } else { + dstBox.growLo(dir, num_shift); + } + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(dstBox, tdstBox, + { + dstfab->setVal(0.0, tdstBox, 0, nc); + dstfab->copy(*srcfab, amrex::shift(tdstBox,dir,num_shift), 0, tdstBox, 0, nc); + }); } } diff --git a/Source/WarpXPML.H b/Source/WarpXPML.H index 0cf367284..7a86bf0da 100644 --- a/Source/WarpXPML.H +++ b/Source/WarpXPML.H @@ -62,6 +62,13 @@ namespace amrex { virtual void destroy (SigmaBox* fab) const final { delete fab; } +#ifdef AMREX_USE_GPU + virtual SigmaBox* createHostAlias (SigmaBox const& src) const final + { + return const_cast<SigmaBox*>(&src); + } + virtual void destroyHostAlias (SigmaBox* fab) const final {} +#endif virtual FabFactory<SigmaBox>* clone () const { return new FabFactory<SigmaBox>(*this); } diff --git a/Source/WarpXParticleContainer.H b/Source/WarpXParticleContainer.H index a2d5c0fac..b879f49d9 100644 --- a/Source/WarpXParticleContainer.H +++ b/Source/WarpXParticleContainer.H @@ -36,27 +36,27 @@ public: WarpXParIter (ContainerType& pc, int level); #if (AMREX_SPACEDIM == 2) - void GetPosition (amrex::Vector<amrex::Real>& x, - amrex::Vector<amrex::Real>& y, - amrex::Vector<amrex::Real>& z) const; - void SetPosition (const amrex::Vector<amrex::Real>& x, - const amrex::Vector<amrex::Real>& y, - const amrex::Vector<amrex::Real>& z); + void GetPosition (amrex::Cuda::DeviceVector<amrex::Real>& x, + amrex::Cuda::DeviceVector<amrex::Real>& y, + amrex::Cuda::DeviceVector<amrex::Real>& z) const; + void SetPosition (const amrex::Cuda::DeviceVector<amrex::Real>& x, + const amrex::Cuda::DeviceVector<amrex::Real>& y, + const amrex::Cuda::DeviceVector<amrex::Real>& z); #endif - const std::array<amrex::Vector<amrex::Real>, PIdx::nattribs>& GetAttribs () const { + const std::array<RealVector, PIdx::nattribs>& GetAttribs () const { return GetStructOfArrays().GetRealData(); } - std::array<amrex::Vector<amrex::Real>, PIdx::nattribs>& GetAttribs () { + std::array<RealVector, PIdx::nattribs>& GetAttribs () { return GetStructOfArrays().GetRealData(); } - const amrex::Vector<amrex::Real>& GetAttribs (int comp) const { + const RealVector& GetAttribs (int comp) const { return GetStructOfArrays().GetRealData(comp); } - amrex::Vector<amrex::Real>& GetAttribs (int comp) { + RealVector& GetAttribs (int comp) { return GetStructOfArrays().GetRealData(comp); } }; @@ -141,6 +141,33 @@ public: bool local = false); std::unique_ptr<amrex::MultiFab> GetChargeDensity(int lev, bool local = false); + virtual void DepositCharge(WarpXParIter& pti, + RealVector& wp, + amrex::MultiFab* rhomf, + amrex::MultiFab* crhomf, + int icomp, + const long np_current, + const long np, + int thread_num, + int lev ); + + virtual void DepositCurrent(WarpXParIter& pti, + RealVector& wp, + RealVector& uxp, + RealVector& uyp, + RealVector& uzp, + amrex::MultiFab& jx, + amrex::MultiFab& jy, + amrex::MultiFab& jz, + amrex::MultiFab* cjx, + amrex::MultiFab* cjy, + amrex::MultiFab* cjz, + const long np_current, + const long np, + int thread_num, + int lev, + amrex::Real dt ); + /// /// This returns the total charge for all the particles in this ParticleContainer. /// This is needed when solving Poisson's equation with periodic boundary conditions. @@ -182,6 +209,14 @@ protected: bool deposit_on_main_grid = false; static int do_not_push; + + amrex::Vector<std::unique_ptr<amrex::FArrayBox> > local_rho; + amrex::Vector<std::unique_ptr<amrex::FArrayBox> > local_jx; + amrex::Vector<std::unique_ptr<amrex::FArrayBox> > local_jy; + amrex::Vector<std::unique_ptr<amrex::FArrayBox> > local_jz; + + amrex::Vector<amrex::Cuda::DeviceVector<amrex::Real> > m_xp, m_yp, m_zp, m_giv; + }; #endif diff --git a/Source/WarpXParticleContainer.cpp b/Source/WarpXParticleContainer.cpp index ad6979462..a9699071c 100644 --- a/Source/WarpXParticleContainer.cpp +++ b/Source/WarpXParticleContainer.cpp @@ -18,14 +18,14 @@ WarpXParIter::WarpXParIter (ContainerType& pc, int level) #if (AMREX_SPACEDIM == 2) void -WarpXParIter::GetPosition (Vector<Real>& x, Vector<Real>& y, Vector<Real>& z) const +WarpXParIter::GetPosition (Cuda::DeviceVector<Real>& x, Cuda::DeviceVector<Real>& y, Cuda::DeviceVector<Real>& z) const { amrex::ParIter<0,0,PIdx::nattribs>::GetPosition(x, z); y.resize(x.size(), std::numeric_limits<Real>::quiet_NaN()); } void -WarpXParIter::SetPosition (const Vector<Real>& x, const Vector<Real>& y, const Vector<Real>& z) +WarpXParIter::SetPosition (const Cuda::DeviceVector<Real>& x, const Cuda::DeviceVector<Real>& y, const Cuda::DeviceVector<Real>& z) { amrex::ParIter<0,0,PIdx::nattribs>::SetPosition(x, z); } @@ -40,6 +40,30 @@ WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies) } SetParticleSize(); ReadParameters(); + + // Initialize temporary local arrays for charge/current deposition + int num_threads = 1; + #ifdef _OPENMP + #pragma omp parallel + #pragma omp single + num_threads = omp_get_num_threads(); + #endif + local_rho.resize(num_threads); + local_jx.resize(num_threads); + local_jy.resize(num_threads); + local_jz.resize(num_threads); + m_xp.resize(num_threads); + m_yp.resize(num_threads); + m_zp.resize(num_threads); + m_giv.resize(num_threads); + for (int i = 0; i < num_threads; ++i) + { + local_rho[i].reset(nullptr); + local_jx[i].reset(nullptr); + local_jy[i].reset(nullptr); + local_jz[i].reset(nullptr); + } + } void @@ -50,10 +74,14 @@ WarpXParticleContainer::ReadParameters () { ParmParse pp("particles"); - do_tiling = true; // because the default in amrex is false +#ifdef AMREX_USE_GPU + do_tiling = false; // By default, tiling is off on GPU +#else + do_tiling = true; +#endif pp.query("do_tiling", do_tiling); pp.query("do_not_push", do_not_push); - + initialized = true; } } @@ -73,7 +101,7 @@ WarpXParticleContainer::AddOneParticle (int lev, int grid, int tile, const std::array<Real,PIdx::nattribs>& attribs) { auto& particle_tile = GetParticles(lev)[std::make_pair(grid,tile)]; - AddOneParticle(particle_tile, x, y, z, attribs); + AddOneParticle(particle_tile, x, y, z, attribs); } void @@ -92,7 +120,7 @@ WarpXParticleContainer::AddOneParticle (ParticleTileType& particle_tile, p.pos(0) = x; p.pos(1) = z; #endif - + particle_tile.push_back(p); particle_tile.push_back_real(attribs); } @@ -153,16 +181,341 @@ WarpXParticleContainer::AddNParticles (int lev, particle_tile.push_back_real(PIdx::ux, vx + ibegin, vx + iend); particle_tile.push_back_real(PIdx::uy, vy + ibegin, vy + iend); particle_tile.push_back_real(PIdx::uz, vz + ibegin, vz + iend); - + for (int comp = PIdx::uz+1; comp < PIdx::nattribs; ++comp) { particle_tile.push_back_real(comp, np, 0.0); } - } + } Redistribute(); } + +void +WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, + RealVector& wp, RealVector& uxp, + RealVector& uyp, RealVector& uzp, + MultiFab& jx, MultiFab& jy, MultiFab& jz, + MultiFab* cjx, MultiFab* cjy, MultiFab* cjz, + const long np_current, const long np, + int thread_num, int lev, Real dt ) +{ + Real *jx_ptr, *jy_ptr, *jz_ptr; + const int *jxntot, *jyntot, *jzntot; + const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); + const std::array<Real,3>& dx = WarpX::CellSize(lev); + const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0)); + const std::array<Real, 3>& xyzmin = xyzmin_tile; + const long lvect = 8; + + BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); + BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); + + Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag); + Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag); + Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag); + + // WarpX assumes the same number of guard cells for Jx, Jy, Jz + long ngJ = jx.nGrow(); + + // Deposit charge for particles that are not in the current buffers + if (np_current > 0) + { + tbx.grow(ngJ); + tby.grow(ngJ); + tbz.grow(ngJ); + + local_jx[thread_num]->resize(tbx); + local_jy[thread_num]->resize(tby); + local_jz[thread_num]->resize(tbz); + + jx_ptr = local_jx[thread_num]->dataPtr(); + jy_ptr = local_jy[thread_num]->dataPtr(); + jz_ptr = local_jz[thread_num]->dataPtr(); + + FArrayBox* local_jx_ptr = local_jx[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, b, + { + local_jx_ptr->setVal(0.0, b, 0, 1); + }); + + FArrayBox* local_jy_ptr = local_jy[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, b, + { + local_jy_ptr->setVal(0.0, b, 0, 1); + }); + + FArrayBox* local_jz_ptr = local_jz[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, b, + { + local_jz_ptr->setVal(0.0, b, 0, 1); + }); + + jxntot = local_jx[thread_num]->length(); + jyntot = local_jy[thread_num]->length(); + jzntot = local_jz[thread_num]->length(); + + BL_PROFILE_VAR_START(blp_pxr_cd); + warpx_current_deposition( + jx_ptr, &ngJ, jxntot, + jy_ptr, &ngJ, jyntot, + jz_ptr, &ngJ, jzntot, + &np_current, + m_xp[thread_num].dataPtr(), + m_yp[thread_num].dataPtr(), + m_zp[thread_num].dataPtr(), + uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), + m_giv[thread_num].dataPtr(), + wp.dataPtr(), &this->charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dt, &dx[0], &dx[1], &dx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, + &lvect,&WarpX::current_deposition_algo); + BL_PROFILE_VAR_STOP(blp_pxr_cd); + + BL_PROFILE_VAR_START(blp_accumulate); + + FArrayBox const* local_jx_const_ptr = local_jx[thread_num].get(); + FArrayBox* global_jx_ptr = jx.fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx, + { + global_jx_ptr->atomicAdd(*local_jx_const_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + FArrayBox const* local_jy_const_ptr = local_jy[thread_num].get(); + FArrayBox* global_jy_ptr = jy.fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx, + { + global_jy_ptr->atomicAdd(*local_jy_const_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + FArrayBox const* local_jz_const_ptr = local_jz[thread_num].get(); + FArrayBox* global_jz_ptr = jz.fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx, + { + global_jz_ptr->atomicAdd(*local_jz_const_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + BL_PROFILE_VAR_STOP(blp_accumulate); + } + + // Deposit charge for particles that are in the current buffers + if (np_current < np) + { + const IntVect& ref_ratio = WarpX::RefRatio(lev-1); + const Box& ctilebox = amrex::coarsen(pti.tilebox(),ref_ratio); + const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); + + tbx = amrex::convert(ctilebox, WarpX::jx_nodal_flag); + tby = amrex::convert(ctilebox, WarpX::jy_nodal_flag); + tbz = amrex::convert(ctilebox, WarpX::jz_nodal_flag); + tbx.grow(ngJ); + tby.grow(ngJ); + tbz.grow(ngJ); + + local_jx[thread_num]->resize(tbx); + local_jy[thread_num]->resize(tby); + local_jz[thread_num]->resize(tbz); + + jx_ptr = local_jx[thread_num]->dataPtr(); + jy_ptr = local_jy[thread_num]->dataPtr(); + jz_ptr = local_jz[thread_num]->dataPtr(); + + FArrayBox* local_jx_ptr = local_jx[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, b, + { + local_jx_ptr->setVal(0.0, b, 0, 1); + }); + + FArrayBox* local_jy_ptr = local_jy[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, b, + { + local_jy_ptr->setVal(0.0, b, 0, 1); + }); + + FArrayBox* local_jz_ptr = local_jz[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, b, + { + local_jz_ptr->setVal(0.0, b, 0, 1); + }); + jxntot = local_jx[thread_num]->length(); + jyntot = local_jy[thread_num]->length(); + jzntot = local_jz[thread_num]->length(); + + long ncrse = np - np_current; + BL_PROFILE_VAR_START(blp_pxr_cd); + warpx_current_deposition( + jx_ptr, &ngJ, jxntot, + jy_ptr, &ngJ, jyntot, + jz_ptr, &ngJ, jzntot, + &ncrse, + m_xp[thread_num].dataPtr() +np_current, + m_yp[thread_num].dataPtr() +np_current, + m_zp[thread_num].dataPtr() +np_current, + uxp.dataPtr()+np_current, + uyp.dataPtr()+np_current, + uzp.dataPtr()+np_current, + m_giv[thread_num].dataPtr()+np_current, + wp.dataPtr()+np_current, &this->charge, + &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], + &dt, &cdx[0], &cdx[1], &cdx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, + &lvect,&WarpX::current_deposition_algo); + BL_PROFILE_VAR_STOP(blp_pxr_cd); + + BL_PROFILE_VAR_START(blp_accumulate); + + FArrayBox const* local_jx_const_ptr = local_jx[thread_num].get(); + FArrayBox* global_jx_ptr = cjx->fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx, + { + global_jx_ptr->atomicAdd(*local_jx_const_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + FArrayBox const* local_jy_const_ptr = local_jy[thread_num].get(); + FArrayBox* global_jy_ptr = cjy->fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx, + { + global_jy_ptr->atomicAdd(*local_jy_const_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + FArrayBox const* local_jz_const_ptr = local_jz[thread_num].get(); + FArrayBox* global_jz_ptr = cjz->fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx, + { + global_jz_ptr->atomicAdd(*local_jz_const_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + BL_PROFILE_VAR_STOP(blp_accumulate); + } +}; + + +void +WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, + MultiFab* rhomf, MultiFab* crhomf, int icomp, + const long np_current, + const long np, int thread_num, int lev ) +{ + + BL_PROFILE_VAR_NS("PICSAR::ChargeDeposition", blp_pxr_chd); + BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); + + const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); + const long lvect = 8; + + long ngRho = rhomf->nGrow(); + Real* data_ptr; + Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector()); + const int *rholen; + + const std::array<Real,3>& dx = WarpX::CellSize(lev); + const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0)); + + // Deposit charge for particles that are not in the current buffers + if (np_current > 0) + { + const std::array<Real, 3>& xyzmin = xyzmin_tile; + tile_box.grow(ngRho); + local_rho[thread_num]->resize(tile_box); + FArrayBox* local_rho_ptr = local_rho[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, b, + { + local_rho_ptr->setVal(0.0, b, 0, 1); + }); + + data_ptr = local_rho[thread_num]->dataPtr(); + rholen = local_rho[thread_num]->length(); +#if (AMREX_SPACEDIM == 3) + const long nx = rholen[0]-1-2*ngRho; + const long ny = rholen[1]-1-2*ngRho; + const long nz = rholen[2]-1-2*ngRho; +#else + const long nx = rholen[0]-1-2*ngRho; + const long ny = 0; + const long nz = rholen[1]-1-2*ngRho; +#endif + BL_PROFILE_VAR_START(blp_pxr_chd); + warpx_charge_deposition(data_ptr, &np_current, + m_xp[thread_num].dataPtr(), + m_yp[thread_num].dataPtr(), + m_zp[thread_num].dataPtr(), + wp.dataPtr(), + &this->charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dx[0], &dx[1], &dx[2], &nx, &ny, &nz, + &ngRho, &ngRho, &ngRho, + &WarpX::nox,&WarpX::noy,&WarpX::noz, + &lvect, &WarpX::charge_deposition_algo); + BL_PROFILE_VAR_STOP(blp_pxr_chd); + + const int ncomp = 1; + FArrayBox const* local_fab = local_rho[thread_num].get(); + FArrayBox* global_fab = rhomf->fabPtr(pti); + BL_PROFILE_VAR_START(blp_accumulate); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx, + { + global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp); + }); + BL_PROFILE_VAR_STOP(blp_accumulate); + } + + // Deposit charge for particles that are in the current buffers + if (np_current < np) + { + const IntVect& ref_ratio = WarpX::RefRatio(lev-1); + const Box& ctilebox = amrex::coarsen(pti.tilebox(), ref_ratio); + const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); + + tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector()); + tile_box.grow(ngRho); + local_rho[thread_num]->resize(tile_box); + FArrayBox* local_rho_ptr = local_rho[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, b, + { + local_rho_ptr->setVal(0.0, b, 0, 1); + }); + + data_ptr = local_rho[thread_num]->dataPtr(); + rholen = local_rho[thread_num]->length(); +#if (AMREX_SPACEDIM == 3) + const long nx = rholen[0]-1-2*ngRho; + const long ny = rholen[1]-1-2*ngRho; + const long nz = rholen[2]-1-2*ngRho; +#else + const long nx = rholen[0]-1-2*ngRho; + const long ny = 0; + const long nz = rholen[1]-1-2*ngRho; +#endif + + long ncrse = np - np_current; + BL_PROFILE_VAR_START(blp_pxr_chd); + warpx_charge_deposition(data_ptr, &ncrse, + m_xp[thread_num].dataPtr() + np_current, + m_yp[thread_num].dataPtr() + np_current, + m_zp[thread_num].dataPtr() + np_current, + wp.dataPtr() + np_current, + &this->charge, + &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], + &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz, + &ngRho, &ngRho, &ngRho, + &WarpX::nox,&WarpX::noy,&WarpX::noz, + &lvect, &WarpX::charge_deposition_algo); + BL_PROFILE_VAR_STOP(blp_pxr_chd); + + const int ncomp = 1; + FArrayBox const* local_fab = local_rho[thread_num].get(); + FArrayBox* global_fab = crhomf->fabPtr(pti); + BL_PROFILE_VAR_START(blp_accumulate); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx, + { + global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp); + }); + BL_PROFILE_VAR_STOP(blp_accumulate); + } +}; + void WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, bool local) { @@ -172,32 +525,32 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, // each level deposits it's own particles const int ng = rho[0]->nGrow(); - for (int lev = 0; lev < num_levels; ++lev) { + for (int lev = 0; lev < num_levels; ++lev) { rho[lev]->setVal(0.0, ng); const auto& gm = m_gdb->Geom(lev); const auto& ba = m_gdb->ParticleBoxArray(lev); const auto& dm = m_gdb->DistributionMap(lev); - + const Real* dx = gm.CellSize(); const Real* plo = gm.ProbLo(); BoxArray nba = ba; nba.surroundingNodes(); - + for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { const Box& box = nba[pti]; - + auto& wp = pti.GetAttribs(PIdx::w); const auto& particles = pti.GetArrayOfStructs(); int nstride = particles.dataShape().first; const long np = pti.numParticles(); - + FArrayBox& rhofab = (*rho[lev])[pti]; - - WRPX_DEPOSIT_CIC(particles.data(), nstride, np, - wp.data(), &this->charge, - rhofab.dataPtr(), box.loVect(), box.hiVect(), + + WRPX_DEPOSIT_CIC(particles.dataPtr(), nstride, np, + wp.dataPtr(), &this->charge, + rhofab.dataPtr(), box.loVect(), box.hiVect(), plo, dx, &ng); } @@ -211,12 +564,12 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, const DistributionMapping& fine_dm = rho[lev+1]->DistributionMap(); BoxArray coarsened_fine_BA = fine_BA; coarsened_fine_BA.coarsen(m_gdb->refRatio(lev)); - + MultiFab coarsened_fine_data(coarsened_fine_BA, fine_dm, 1, 0); coarsened_fine_data.setVal(0.0); - + IntVect ratio(AMREX_D_DECL(2, 2, 2)); // FIXME - + for (MFIter mfi(coarsened_fine_data); mfi.isValid(); ++mfi) { const Box& bx = mfi.validbox(); const Box& crse_box = coarsened_fine_data[mfi].box(); @@ -225,7 +578,7 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, coarsened_fine_data[mfi].dataPtr(), crse_box.loVect(), crse_box.hiVect(), (*rho[lev+1])[mfi].dataPtr(), fine_box.loVect(), fine_box.hiVect()); } - + rho[lev]->copy(coarsened_fine_data, m_gdb->Geom(lev).periodicity(), FabArrayBase::ADD); } } @@ -250,19 +603,19 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local) #pragma omp parallel #endif { - Vector<Real> xp, yp, zp; + Cuda::DeviceVector<Real> xp, yp, zp; FArrayBox local_rho; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { const Box& box = pti.validbox(); - + auto& wp = pti.GetAttribs(PIdx::w); - + const long np = pti.numParticles(); pti.GetPosition(xp, yp, zp); - + const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); @@ -298,26 +651,26 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local) long nyg = ng; long nzg = ng; long lvect = 8; - + warpx_charge_deposition(data_ptr, - &np, xp.data(), yp.data(), zp.data(), wp.data(), - &this->charge, &xyzmin[0], &xyzmin[1], &xyzmin[2], + &np, + xp.dataPtr(), + yp.dataPtr(), + zp.dataPtr(), wp.dataPtr(), + &this->charge, &xyzmin[0], &xyzmin[1], &xyzmin[2], &dx[0], &dx[1], &dx[2], &nx, &ny, &nz, &nxg, &nyg, &nzg, &WarpX::nox,&WarpX::noy,&WarpX::noz, &lvect, &WarpX::charge_deposition_algo); - + #ifdef _OPENMP - const Box& fabbox = rhofab.box(); - const int ncomp = 1; - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho), - BL_TO_FORTRAN_3D(rhofab), ncomp); + rhofab.atomicAdd(local_rho); #endif } - + } if (!local) rho->SumBoundary(gm.periodicity()); - + return rho; } @@ -327,7 +680,7 @@ Real WarpXParticleContainer::sumParticleCharge(bool local) { for (int lev = 0; lev < finestLevel(); ++lev) { - + #ifdef _OPENMP #pragma omp parallel reduction(+:total_charge) #endif @@ -356,7 +709,7 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) { amrex::Real inv_clight_sq = 1.0/PhysConst::c/PhysConst::c; for (int lev = 0; lev <= finestLevel(); ++lev) { - + #ifdef _OPENMP #pragma omp parallel reduction(+:vx_total, vy_total, vz_total, np_total) #endif @@ -365,9 +718,9 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) { auto& ux = pti.GetAttribs(PIdx::ux); auto& uy = pti.GetAttribs(PIdx::uy); auto& uz = pti.GetAttribs(PIdx::uz); - + np_total += pti.numParticles(); - + for (unsigned long i = 0; i < ux.size(); i++) { Real usq = (ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i])*inv_clight_sq; Real gaminv = 1.0/std::sqrt(1.0 + usq); @@ -377,14 +730,14 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) { } } } - + if (!local) { ParallelDescriptor::ReduceRealSum(vx_total); ParallelDescriptor::ReduceRealSum(vy_total); ParallelDescriptor::ReduceRealSum(vz_total); ParallelDescriptor::ReduceLongSum(np_total); } - + std::array<Real, 3> mean_v; if (np_total > 0) { mean_v[0] = vx_total / np_total; @@ -415,7 +768,7 @@ Real WarpXParticleContainer::maxParticleVelocity(bool local) { } } } - + if (!local) ParallelDescriptor::ReduceRealMax(max_v); return max_v; } @@ -427,23 +780,23 @@ WarpXParticleContainer::PushXES (Real dt) int num_levels = finestLevel() + 1; - for (int lev = 0; lev < num_levels; ++lev) { + for (int lev = 0; lev < num_levels; ++lev) { const auto& gm = m_gdb->Geom(lev); const RealBox& prob_domain = gm.ProbDomain(); for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { auto& particles = pti.GetArrayOfStructs(); - int nstride = particles.dataShape().first; + int nstride = particles.dataShape().first; const long np = pti.numParticles(); - - auto& attribs = pti.GetAttribs(); + + auto& attribs = pti.GetAttribs(); auto& uxp = attribs[PIdx::ux]; auto& uyp = attribs[PIdx::uy]; auto& uzp = attribs[PIdx::uz]; - - WRPX_PUSH_LEAPFROG_POSITIONS(particles.data(), nstride, np, - uxp.data(), uyp.data(), + + WRPX_PUSH_LEAPFROG_POSITIONS(particles.dataPtr(), nstride, np, + uxp.dataPtr(), uyp.dataPtr(), #if AMREX_SPACEDIM == 3 - uzp.data(), + uzp.dataPtr(), #endif &dt, prob_domain.lo(), prob_domain.hi()); @@ -474,7 +827,7 @@ WarpXParticleContainer::PushX (int lev, Real dt) #pragma omp parallel #endif { - Vector<Real> xp, yp, zp, giv; + Cuda::DeviceVector<Real> xp, yp, zp, giv; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -485,26 +838,30 @@ WarpXParticleContainer::PushX (int lev, Real dt) auto& uxp = attribs[PIdx::ux]; auto& uyp = attribs[PIdx::uy]; auto& uzp = attribs[PIdx::uz]; - + const long np = pti.numParticles(); - + giv.resize(np); - + // // copy data from particle container to temp arrays // BL_PROFILE_VAR_START(blp_copy); pti.GetPosition(xp, yp, zp); BL_PROFILE_VAR_STOP(blp_copy); - + // // Particle Push // BL_PROFILE_VAR_START(blp_pxr_pp); - warpx_particle_pusher_positions(&np, xp.data(), yp.data(), zp.data(), - uxp.data(), uyp.data(), uzp.data(), giv.data(), &dt); + warpx_particle_pusher_positions(&np, + xp.dataPtr(), + yp.dataPtr(), + zp.dataPtr(), + uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), + giv.dataPtr(), &dt); BL_PROFILE_VAR_STOP(blp_pxr_pp); - + // // copy particle data back // @@ -515,9 +872,12 @@ WarpXParticleContainer::PushX (int lev, Real dt) if (cost) { const Box& tbx = pti.tilebox(); wt = (amrex::second() - wt) / tbx.d_numPts(); - (*cost)[pti].plus(wt, tbx); + FArrayBox* costfab = cost->fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box, + { + costfab->plus(wt, work_box); + }); } } } } - diff --git a/Source/WarpXRegrid.cpp b/Source/WarpXRegrid.cpp index d21e2ad9c..8d7873041 100644 --- a/Source/WarpXRegrid.cpp +++ b/Source/WarpXRegrid.cpp @@ -40,6 +40,7 @@ WarpX::RemakeLevel (int lev, Real time, const BoxArray& ba, const DistributionMa // Fine patch + const auto& period = Geom(lev).periodicity(); for (int idim=0; idim < 3; ++idim) { { @@ -60,8 +61,8 @@ WarpX::RemakeLevel (int lev, Real time, const BoxArray& ba, const DistributionMa const IntVect& ng = current_fp[lev][idim]->nGrowVect(); auto pmf = std::unique_ptr<MultiFab>(new MultiFab(current_fp[lev][idim]->boxArray(), dm, 1, ng)); - // pmf->Redistribute(*current_fp[lev][idim], 0, 0, 1, ng); current_fp[lev][idim] = std::move(pmf); + current_fp_owner_masks[lev][idim] = std::move(current_fp[lev][idim]->OwnerMask(period)); } if (current_store[lev][idim]) { @@ -86,8 +87,8 @@ WarpX::RemakeLevel (int lev, Real time, const BoxArray& ba, const DistributionMa const IntVect& ng = rho_fp[lev]->nGrowVect(); auto pmf = std::unique_ptr<MultiFab>(new MultiFab(rho_fp[lev]->boxArray(), dm, nc, ng)); - // pmf->Redistribute(*rho_fp[lev], 0, 0, nc, ng); rho_fp[lev] = std::move(pmf); + rho_fp_owner_masks[lev] = std::move(rho_fp[lev]->OwnerMask(period)); } // Aux patch @@ -120,6 +121,7 @@ WarpX::RemakeLevel (int lev, Real time, const BoxArray& ba, const DistributionMa // Coarse patch if (lev > 0) { + const auto& cperiod = Geom(lev-1).periodicity(); for (int idim=0; idim < 3; ++idim) { { @@ -138,10 +140,11 @@ WarpX::RemakeLevel (int lev, Real time, const BoxArray& ba, const DistributionMa } { const IntVect& ng = current_cp[lev][idim]->nGrowVect(); - auto pmf = std::unique_ptr<MultiFab>(new MultiFab(current_cp[lev][idim]->boxArray(), - dm, 1, ng)); - // pmf->Redistribute(*current_cp[lev][idim], 0, 0, 1, ng); + auto pmf = std::unique_ptr<MultiFab>( new MultiFab(current_cp[lev][idim]->boxArray(), + dm, 1, ng)); current_cp[lev][idim] = std::move(pmf); + current_cp_owner_masks[lev][idim] = std::move( + current_cp[lev][idim]->OwnerMask(cperiod)); } } @@ -158,8 +161,8 @@ WarpX::RemakeLevel (int lev, Real time, const BoxArray& ba, const DistributionMa const IntVect& ng = rho_cp[lev]->nGrowVect(); auto pmf = std::unique_ptr<MultiFab>(new MultiFab(rho_cp[lev]->boxArray(), dm, nc, ng)); - // pmf->Redistribute(*rho_cp[lev], 0, 0, nc, ng); rho_cp[lev] = std::move(pmf); + rho_cp_owner_masks[lev] = std::move(rho_cp[lev]->OwnerMask(cperiod)); } } diff --git a/Source/WarpXUtil.cpp b/Source/WarpXUtil.cpp index eb0eec92b..4a884330a 100644 --- a/Source/WarpXUtil.cpp +++ b/Source/WarpXUtil.cpp @@ -38,9 +38,8 @@ void ReadBoostedFrameParameters(Real& gamma_boost, Real& beta_boost, void ConvertLabParamsToBoost() { - - Real gamma_boost, beta_boost; - int max_level; + Real gamma_boost = 1., beta_boost = 0.; + int max_level = 0; Vector<int> boost_direction {0,0,0}; ReadBoostedFrameParameters(gamma_boost, beta_boost, boost_direction); diff --git a/Source/WarpXWrappers.cpp b/Source/WarpXWrappers.cpp index 198cd818d..6ee06a7d5 100644 --- a/Source/WarpXWrappers.cpp +++ b/Source/WarpXWrappers.cpp @@ -56,14 +56,12 @@ extern "C" bool warpx_use_fdtd_nci_corr() { - auto & mypc = WarpX::GetInstance().GetPartContainer(); - return mypc.Use_fdtd_nci_corr(); + return WarpX::use_fdtd_nci_corr; } int warpx_l_lower_order_in_v() { - auto & mypc = WarpX::GetInstance().GetPartContainer(); - return mypc.L_lower_order_in_v(); + return WarpX::l_lower_order_in_v; } int warpx_nComps() diff --git a/Source/WarpX_f.H b/Source/WarpX_f.H index e709c1332..578785a14 100644 --- a/Source/WarpX_f.H +++ b/Source/WarpX_f.H @@ -180,6 +180,22 @@ extern "C" void parse_function_laser( const long* np, amrex::Real* Xp, amrex::Real* Yp, amrex::Real* t, amrex::Real* amplitude, const int parser_instance_number ); + void calculate_laser_plane_coordinates( const long* np, + amrex::Real* xp, amrex::Real* yp, amrex::Real* zp, + amrex::Real* plane_Xp, amrex::Real* plane_Yp, + amrex::Real* u_Xx, amrex::Real* u_Xy, amrex::Real* u_Xz, + amrex::Real* u_Yx, amrex::Real* u_Yy, amrex::Real* u_Yz, + amrex::Real* positionx, amrex::Real* positiony, amrex::Real* positionz ); + + void update_laser_particle( const long* np, + amrex::Real* xp, amrex::Real* yp, amrex::Real* zp, + amrex::Real* uxp, amrex::Real* uyp, amrex::Real* uzp, + amrex::Real* giv, amrex::Real* wp, amrex::Real* amplitude_E, + amrex::Real* p_Xx, amrex::Real* p_Xy, amrex::Real* p_Xz, + amrex::Real* nvecx, amrex::Real* nvecy, amrex::Real* nvecz, + amrex::Real* mobility, amrex::Real* dt, const amrex::Real* c, + const amrex::Real* beta_boost, const amrex::Real* gamma_boost ); + // Maxwell solver void warpx_push_evec( diff --git a/Source/WarpX_fft.F90 b/Source/WarpX_fft.F90 index 8d697d184..e605b2be0 100644 --- a/Source/WarpX_fft.F90 +++ b/Source/WarpX_fft.F90 @@ -198,7 +198,7 @@ contains call c_f_pointer(fft_data(21), rhof, shp) fft_data(22) = fftw_alloc_complex(sz) call c_f_pointer(fft_data(22), rhooldf, shp) - +!$acc enter data create (exf,eyf,ezf,bxf,byf,bzf,jxf,jyf,jzf,rhof,rhooldf) if (ndata < 22) then call amrex_abort("size of fft_data is too small") end if diff --git a/Source/WarpX_laser.F90 b/Source/WarpX_laser.F90 index a99cec8c7..b08413019 100644 --- a/Source/WarpX_laser.F90 +++ b/Source/WarpX_laser.F90 @@ -1,3 +1,10 @@ +#if AMREX_USE_GPU && __PGIC__ <= 18 && __PGIC_MINOR__ <= 7 +#define USE_ACC_LASER_SUBROUTINE +! Define a separate subroutine that contains the openacc pragmas +! This is because fortran subroutines containing openacc pramas +! ignore the `bind(C,` statement, and so cannot directly be called from C++ +! (at least until pgi version 18.7) +#endif module warpx_laser_module @@ -27,6 +34,31 @@ contains stc_exponent, stcfactor complex*16, parameter :: j=cmplx(0., 1.) +#ifdef USE_ACC_LASER_SUBROUTINE + call warpx_gaussian_laser_acc( np, Xp, Yp, t, & + wavelength, e_max, waist, duration, t_peak, f, amplitude, & + zeta, beta, phi2, theta_stc ) + + end subroutine warpx_gaussian_laser + + subroutine warpx_gaussian_laser_acc( np, Xp, Yp, t, & + wavelength, e_max, waist, duration, t_peak, f, amplitude, & + zeta, beta, phi2, theta_stc ) + + integer(c_long), intent(in) :: np + real(amrex_real), intent(in) :: Xp(np),Yp(np) + real(amrex_real), intent(in) :: e_max, t, t_peak, wavelength, duration, f, waist + real(amrex_real), intent(in) :: zeta, beta, phi2, theta_stc + real(amrex_real), intent(inout) :: amplitude(np) + + integer(c_long) :: i + real(amrex_real) :: k0, oscillation_phase, inv_tau2 + complex*16 :: diffract_factor, exp_argument, prefactor, & + inv_complex_waist_2, stretch_factor, & + stc_exponent, stcfactor + complex*16, parameter :: j=cmplx(0., 1.) +#endif + ! This function uses the complex expression of a Gaussian laser ! (Including Gouy phase and laser oscillations) @@ -38,13 +70,13 @@ contains ! laser diffraction, and phase front curvature diffract_factor = 1 + j * f * 2./(k0*waist**2) inv_complex_waist_2 = 1./( waist**2 * diffract_factor ) - + ! Time stretching due to STCs and phi2 complex envelope ! (1 if zeta=0, beta=0, phi2=0) - stretch_factor = 1. + \ - 4*(zeta + beta*f)**2 * (inv_tau2*inv_complex_waist_2) + \ + stretch_factor = 1. + & + 4*(zeta + beta*f)**2 * (inv_tau2*inv_complex_waist_2) + & 2*j*(phi2 - beta**2*k0*f) * inv_tau2 - + ! Amplitude and monochromatic oscillations prefactor = e_max * exp( j * oscillation_phase ) @@ -58,21 +90,29 @@ contains #endif ! Loop through the macroparticle to calculate the proper amplitude + !$acc parallel deviceptr(amplitude, Xp, Yp) + !$acc loop do i = 1, np ! Exp argument for the temporal gaussian envelope + STCs - stc_exponent = 1./stretch_factor * inv_tau2 * \ - (t - t_peak - beta*k0*(Xp(i)*cos(theta_stc) + Yp(i)*sin(theta_stc)) -\ - 2*j*(Xp(i)*cos(theta_stc) + Yp(i)*sin(theta_stc))*( zeta - beta*f ) *\ - inv_complex_waist_2)**2 - ! stcfactor = everything but complex transverse envelope + stc_exponent = 1./stretch_factor * inv_tau2 * & + (t - t_peak - beta*k0*(Xp(i)*cos(theta_stc) + Yp(i)*sin(theta_stc)) - & + 2*j*(Xp(i)*cos(theta_stc) + Yp(i)*sin(theta_stc))*( zeta - beta*f ) * & + inv_complex_waist_2)**2 + ! stcfactor = everything but complex transverse envelope stcfactor = prefactor * exp( - stc_exponent ) ! Exp argument for transverse envelope exp_argument = - ( Xp(i)*Xp(i) + Yp(i)*Yp(i) ) * inv_complex_waist_2 ! stcfactor + transverse envelope amplitude(i) = DREAL( stcfactor * exp( exp_argument ) ) - enddo + enddo + !$acc end loop + !$acc end parallel +#ifdef USE_ACC_LASER_SUBROUTINE + end subroutine warpx_gaussian_laser_acc +#else end subroutine warpx_gaussian_laser +#endif ! Harris function for the laser temporal profile subroutine warpx_harris_laser( np, Xp, Yp, t, & @@ -88,6 +128,25 @@ contains real(amrex_real) :: space_envelope, time_envelope, arg_osc, arg_env real(amrex_real) :: omega0, zR, wz, inv_Rz, oscillations, inv_wz_2 +#ifdef USE_ACC_LASER_SUBROUTINE + call warpx_harris_laser_acc( np, Xp, Yp, t, & + wavelength, e_max, waist, duration, f, amplitude ) + + end subroutine warpx_harris_laser + + subroutine warpx_harris_laser_acc( np, Xp, Yp, t, & + wavelength, e_max, waist, duration, f, amplitude ) + + integer(c_long), intent(in) :: np + real(amrex_real), intent(in) :: Xp(np),Yp(np) + real(amrex_real), intent(in) :: e_max, t, wavelength, duration, f, waist + real(amrex_real), intent(inout) :: amplitude(np) + + integer(c_long) :: i + real(amrex_real) :: space_envelope, time_envelope, arg_osc, arg_env + real(amrex_real) :: omega0, zR, wz, inv_Rz, oscillations, inv_wz_2 +#endif + ! This function uses the Harris function as the temporal profile of the pulse omega0 = 2*pi*clight/wavelength zR = pi * waist**2 / wavelength @@ -110,14 +169,22 @@ contains end if ! Loop through the macroparticle to calculate the proper amplitude + !$acc parallel deviceptr(amplitude, Xp, Yp) + !$acc loop do i = 1, np space_envelope = exp(- ( Xp(i)*Xp(i) + Yp(i)*Yp(i) ) * inv_wz_2) arg_osc = omega0*t - omega0/clight*(Xp(i)*Xp(i) + Yp(i)*Yp(i)) * inv_Rz / 2 oscillations = cos(arg_osc) amplitude(i) = e_max * time_envelope * space_envelope * oscillations enddo + !$acc end loop + !$acc end parallel +#ifdef USE_ACC_LASER_SUBROUTINE + end subroutine warpx_harris_laser_acc +#else end subroutine warpx_harris_laser +#endif ! Parse function from the input script for the laser temporal profile subroutine parse_function_laser( np, Xp, Yp, t, amplitude, parser_instance_number ) bind(C, name="parse_function_laser") @@ -136,4 +203,135 @@ contains enddo end subroutine parse_function_laser + + subroutine calculate_laser_plane_coordinates(np, xp, yp, zp, & + plane_Xp, plane_Yp, u_Xx, u_Xy, u_Xz, u_Yx, u_Yy, u_Yz, & + positionx, positiony, positionz ) & + bind(C, name="calculate_laser_plane_coordinates") + integer(c_long), intent(in) :: np + real(amrex_real), intent(in) :: xp(np), yp(np), zp(np) + real(amrex_real), intent(in) :: u_Xx, u_Xy, u_Xz + real(amrex_real), intent(in) :: u_Yx, u_Yy, u_Yz + real(amrex_real), intent(in) :: positionx, positiony, positionz + real(amrex_real), intent(out) :: plane_Xp(np), plane_Yp(np) + integer :: ip + +#ifdef USE_ACC_LASER_SUBROUTINE + call calculate_laser_plane_coordinates_acc(np, xp, yp, zp, & + plane_Xp, plane_Yp, u_Xx, u_Xy, u_Xz, u_Yx, u_Yy, u_Yz, & + positionx, positiony, positionz ) + + end subroutine calculate_laser_plane_coordinates + + subroutine calculate_laser_plane_coordinates_acc(np, xp, yp, zp, & + plane_Xp, plane_Yp, u_Xx, u_Xy, u_Xz, u_Yx, u_Yy, u_Yz, & + positionx, positiony, positionz ) + integer(c_long), intent(in) :: np + real(amrex_real), intent(in) :: xp(np), yp(np), zp(np) + real(amrex_real), intent(in) :: u_Xx, u_Xy, u_Xz + real(amrex_real), intent(in) :: u_Yx, u_Yy, u_Yz + real(amrex_real), intent(in) :: positionx, positiony, positionz + real(amrex_real), intent(out) :: plane_Xp(np), plane_Yp(np) + integer :: ip +#endif + + !$acc parallel deviceptr(xp, yp, zp, plane_Xp, plane_Yp) + !$acc loop + do ip = 1, np +#if (BL_SPACEDIM == 3) + plane_Xp(ip) = u_Xx*(xp(ip) - positionx) & + + u_Xy*(yp(ip) - positiony) & + + u_Xz*(zp(ip) - positionz) + plane_Yp(ip) = u_Yx*(xp(ip) - positionx) & + + u_Yy*(yp(ip) - positiony) & + + u_Yz*(zp(ip) - positionz) +#elif (AMREX_SPACEDIM == 2) + plane_Xp(ip) = u_Xx*(xp(ip) - positionx) & + + u_Xz*(zp(ip) - positionz) + plane_Yp(ip) = 0 +#endif + enddo + !$acc end loop + !$acc end parallel + +#ifdef USE_ACC_LASER_SUBROUTINE + end subroutine calculate_laser_plane_coordinates_acc +#else + end subroutine calculate_laser_plane_coordinates +#endif + + subroutine update_laser_particle( np, & + xp, yp, zp, uxp, uyp, uzp, giv, wp, amplitude_E, p_Xx, p_Xy, p_Xz, & + nvecx, nvecy, nvecz, mobility, dt, c, beta_boost, gamma_boost ) & + bind(C, name="update_laser_particle") + + integer(c_long), intent(in) :: np + real(amrex_real), intent(in) :: amplitude_E(np), wp(np) + real(amrex_real), intent(inout) :: xp(np), yp(np), zp(np) + real(amrex_real), intent(out) :: uxp(np), uyp(np), uzp(np), giv(np) + real(amrex_real), intent(in) :: p_Xx, p_Xy, p_Xz + real(amrex_real), intent(in) :: nvecx, nvecy, nvecz + real(amrex_real), intent(in) :: c, beta_boost, gamma_boost, mobility, dt + +#ifdef USE_ACC_LASER_SUBROUTINE + call update_laser_particle_acc( np, & + xp, yp, zp, uxp, uyp, uzp, giv, wp, amplitude_E, p_Xx, p_Xy, p_Xz, & + nvecx, nvecy, nvecz, mobility, dt, c, beta_boost, gamma_boost ) + + end subroutine update_laser_particle + + subroutine update_laser_particle_acc( np, & + xp, yp, zp, uxp, uyp, uzp, giv, wp, amplitude_E, p_Xx, p_Xy, p_Xz, & + nvecx, nvecy, nvecz, mobility, dt, c, beta_boost, gamma_boost ) + + integer(c_long), intent(in) :: np + real(amrex_real), intent(in) :: amplitude_E(np), wp(np) + real(amrex_real), intent(inout) :: xp(np), yp(np), zp(np) + real(amrex_real), intent(out) :: uxp(np), uyp(np), uzp(np), giv(np) + real(amrex_real), intent(in) :: p_Xx, p_Xy, p_Xz + real(amrex_real), intent(in) :: nvecx, nvecy, nvecz + real(amrex_real), intent(in) :: c, beta_boost, gamma_boost, mobility, dt +#endif + + real(amrex_real) :: vx, vy, vz, v_over_c, sign_charge, gamma + integer :: ip + + !$acc parallel deviceptr(amplitude_E, wp, xp, yp, zp, uxp, uyp, uzp, giv) + !$acc loop + do ip = 1, np + ! Calculate the velocity according to the amplitude of E + sign_charge = SIGN( 1.0_amrex_real, wp(ip) ) + v_over_c = sign_charge * mobility * amplitude_E(ip) + ! The velocity is along the laser polarization p_X + vx = c * v_over_c * p_Xx + vy = c * v_over_c * p_Xy + vz = c * v_over_c * p_Xz + ! When running in the boosted-frame, their is additional velocity along nvec + if (gamma_boost > 1.) then + vx = vx - c * beta_boost * nvecx + vy = vy - c * beta_boost * nvecy + vz = vz - c * beta_boost * nvecz + endif + ! Get the corresponding momenta + gamma = gamma_boost/sqrt(1 - v_over_c*v_over_c) + giv(ip) = 1./gamma + uxp(ip) = gamma * vx + uyp(ip) = gamma * vy + uzp(ip) = gamma * vz + ! Push the the particle positions + xp(ip) = xp(ip) + vx * dt +#if (BL_SPACEDIM == 3) + yp(ip) = yp(ip) + vy * dt +#endif + zp(ip) = zp(ip) + vz * dt + enddo + !$acc end loop + !$acc end parallel + +#ifdef USE_ACC_LASER_SUBROUTINE + end subroutine update_laser_particle_acc +#else + end subroutine update_laser_particle +#endif + end module warpx_laser_module diff --git a/Source/WarpX_picsar.F90 b/Source/WarpX_picsar.F90 index 91a2f08e7..13168ceca 100644 --- a/Source/WarpX_picsar.F90 +++ b/Source/WarpX_picsar.F90 @@ -294,7 +294,8 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n jy,jy_nguards,jy_nvalid, & jz,jz_nguards,jz_nvalid, & np,xp,yp,zp,uxp,uyp,uzp,gaminv,w,q, & - xmin,zmin,dt,dx,dz,nox,noz,lvect) + xmin,zmin,dt,dx,dz,nox,noz,lvect, & + current_depo_algo) #endif end subroutine warpx_current_deposition diff --git a/Tools/parallel_postproc.py b/Tools/parallel_postproc.py new file mode 100644 index 000000000..0942118ac --- /dev/null +++ b/Tools/parallel_postproc.py @@ -0,0 +1,83 @@ +import matplotlib +matplotlib.use('Agg') +from mpi4py import MPI +import glob, read_raw_data +import numpy as np +import scipy.constants as scc +import matplotlib.pyplot as plt + +species = 'electrons' +fieldname = 'Ey' +lambda0 = .81e-6 + +# --- custom functions --- # +# ------------------------ # +omega0 = 2*np.pi*scc.c/lambda0 +# Read field fieldname and return normalized max +def get_a0(res_dir, snapshot): + header = res_dir + '/Header' + print( snapshot ) + allrd, info = read_raw_data.read_lab_snapshot(snapshot, header) + F = allrd[ fieldname ] + return info['z'][-1], np.max(np.abs(F)) * scc.e/(scc.m_e*omega0*scc.c) +# Convert elements of a list to numpy arrays +def convert_to_np_array(list_in): + list_out = [] + for elem in list_in: + list_out.append( np.array( elem ) ) + return list_out + +# --- MPI parallelization --- # +# --------------------------- # +# Get ordered list of snapshot files +res_dir = './lab_frame_data/'; +file_list = glob.glob(res_dir + '/snapshot?????') +file_list.sort() +# Total number of files +nfiles = len(file_list) +# Each file has a unique number +number_list = range(nfiles) +comm_world = MPI.COMM_WORLD +me = comm_world.Get_rank() +nrank = comm_world.Get_size() +# List of files to process for current proc +my_list = file_list[ (me*nfiles)/nrank : ((me+1)*nfiles)/nrank ] +# List if file numbers for current proc +my_number_list = number_list[ (me*nfiles)/nrank : ((me+1)*nfiles)/nrank ] + +# --- Run parallel analysis --- # +# ----------------------------- # +# Each MPI rank reads roughly (nb snapshots)/(nb ranks) snapshots. +# Works with any number of snapshots. +for count, filename in enumerate(my_list): + zwin, a0 = get_a0( res_dir, filename ) + uzlab = read_raw_data.get_particle_field(filename, species, 'uz')/scc.c + select_particles = (uzlab > 5.) + uzlab = uzlab[ select_particles ] + uzmean = np.mean(uzlab) + +# --- gather and rank 0 plots --- # +# ------------------------------- # +# Gather particle quantities to rank 0 to plot history of quantities. +UZMEAN = comm_world.gather(uzmean, root=0) +ZWIN = comm_world.gather(zwin, root=0) +A0 = comm_world.gather(a0, root=0) +# Rank 0 does the plot. +if me == 0: + # Convert to numpy arrays + UZMEAN, ZWIN, A0 = convert_to_np_array([UZMEAN, ZWIN, A0]) + # Plot and save + fig = plt.figure() + plt.subplot(2,1,1) + plt.plot(ZWIN*1.e3, UZMEAN) + plt.xlabel('z (mm)') + plt.ylabel('uz') + plt.title( 'beam energy' ) + plt.grid() + plt.subplot(2,1,2) + plt.plot(ZWIN*1.e3, A0) + plt.ylabel('a0') + plt.title( 'beam propag angle' ) + plt.grid() + fig.savefig('./image.png', bbox_inches='tight') + plt.close() |