aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Docs/source/building/building.rst1
-rw-r--r--Docs/source/building/spack.rst47
-rw-r--r--Docs/source/building/summitdev.rst4
-rw-r--r--Docs/source/conf.py2
-rw-r--r--Docs/source/running_cpp/parameters.rst14
-rw-r--r--Docs/source/visualization/sensei.rst13
-rw-r--r--Examples/Physics_applications/laser_acceleration/laser_acceleration_PICMI.py17
-rw-r--r--Examples/Tests/Langmuir/inputs.multi.2d.rt2
-rw-r--r--Examples/Tests/Langmuir/inputs.rt2
-rw-r--r--Examples/Tests/Langmuir/langmuir_PICMI_rt.py48
-rwxr-xr-xExamples/Tests/PML/analysis_pml_ckc.py34
-rwxr-xr-xExamples/Tests/PML/analysis_pml_yee.py34
-rw-r--r--Examples/Tests/gpu_test/inputs69
-rwxr-xr-xExamples/Tests/gpu_test/script.sh48
-rw-r--r--GNUmakefile2
-rw-r--r--Python/pywarpx/Bucket.py4
-rw-r--r--Python/pywarpx/WarpX.py2
-rw-r--r--Python/pywarpx/picmi.py90
-rw-r--r--README.md1
-rw-r--r--Regression/WarpX-tests.ini33
-rw-r--r--Regression/prepare_file_travis.py19
-rw-r--r--Source/LaserParticleContainer.cpp254
-rw-r--r--Source/Make.WarpX16
-rw-r--r--Source/Make.package7
-rw-r--r--Source/ParticleContainer.H10
-rw-r--r--Source/ParticleContainer.cpp12
-rw-r--r--Source/PhysicalParticleContainer.H19
-rw-r--r--Source/PhysicalParticleContainer.cpp729
-rw-r--r--Source/RigidInjectedParticleContainer.H8
-rw-r--r--Source/RigidInjectedParticleContainer.cpp48
-rw-r--r--Source/WarpX.H30
-rw-r--r--Source/WarpX.cpp89
-rw-r--r--Source/WarpXComm.cpp16
-rw-r--r--Source/WarpXEvolve.cpp44
-rw-r--r--Source/WarpXIO.cpp95
-rw-r--r--Source/WarpXInitData.cpp9
-rw-r--r--Source/WarpXMove.cpp48
-rw-r--r--Source/WarpXPML.H7
-rw-r--r--Source/WarpXParticleContainer.H55
-rw-r--r--Source/WarpXParticleContainer.cpp480
-rw-r--r--Source/WarpXRegrid.cpp15
-rw-r--r--Source/WarpXUtil.cpp5
-rw-r--r--Source/WarpXWrappers.cpp6
-rw-r--r--Source/WarpX_f.H16
-rw-r--r--Source/WarpX_fft.F902
-rw-r--r--Source/WarpX_laser.F90218
-rw-r--r--Source/WarpX_picsar.F903
-rw-r--r--Tools/parallel_postproc.py83
48 files changed, 2053 insertions, 757 deletions
diff --git a/Docs/source/building/building.rst b/Docs/source/building/building.rst
index 5913f283d..33357a4b6 100644
--- a/Docs/source/building/building.rst
+++ b/Docs/source/building/building.rst
@@ -107,3 +107,4 @@ Advanced building instructions
spectral
cori
summitdev
+ spack
diff --git a/Docs/source/building/spack.rst b/Docs/source/building/spack.rst
new file mode 100644
index 000000000..d54c6cc10
--- /dev/null
+++ b/Docs/source/building/spack.rst
@@ -0,0 +1,47 @@
+Building WarpX with Spack
+===============================
+
+WarpX can be installed using Spack. From the Spack web page: "Spack is a package management tool designed to support multiple
+versions and configurations of software on a wide variety of platforms and environments."
+
+Spack is available from `github <https://github.com/spack/spack>`__. Spack only needs to be cloned and can be used right away - there are no installation
+steps. The spack command, "spack/bin/spack", can be used directly or "spack/bin" can be added to your execute path.
+
+WarpX is built with the single command
+
+::
+
+ spack install warpx
+
+This will build the 3-D version of WarpX using the master branch.
+At the very end of the output from build sequence, Spack tells you where the WarpX executable has been placed.
+Alternatively, the "spack load" command can be configured so that "spack load warpx" will put the executable in your execute path.
+
+To build using the dev branch, the command is
+
+::
+
+ spack install warpx@dev
+
+
+Other variants of WarpX can be installed, for example
+
+::
+
+ spack install warpx dims=2
+
+will build the 2-D version.
+
+::
+
+ spack install warpx debug=True
+
+will build with debugging turned on.
+
+::
+
+ spack install warpx %intel
+
+will build using the intel compiler (instead of gcc).
+
+The Python verson of WarpX is not yet available with Spack.
diff --git a/Docs/source/building/summitdev.rst b/Docs/source/building/summitdev.rst
index e194db73c..c5a3a7034 100644
--- a/Docs/source/building/summitdev.rst
+++ b/Docs/source/building/summitdev.rst
@@ -13,7 +13,7 @@ correct branch:
git clone https://github.com/ECP-WarpX/WarpX.git
cd WarpX
- git checkout gpu
+ git checkout dev
cd ..
git clone https://bitbucket.org/berkeleylab/picsar.git
@@ -33,4 +33,4 @@ Then, use the following set of commands to compile:
module load pgi
module load cuda
- make
+ make -j 4 USE_GPU=TRUE COMP=pgi
diff --git a/Docs/source/conf.py b/Docs/source/conf.py
index 1d4b97c49..ce6f436d3 100644
--- a/Docs/source/conf.py
+++ b/Docs/source/conf.py
@@ -88,6 +88,8 @@ todo_include_todos = False
#
html_theme = 'sphinx_rtd_theme'
+numfig = True
+
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
diff --git a/Docs/source/running_cpp/parameters.rst b/Docs/source/running_cpp/parameters.rst
index 3c579d6fd..e0a0ac2b2 100644
--- a/Docs/source/running_cpp/parameters.rst
+++ b/Docs/source/running_cpp/parameters.rst
@@ -376,11 +376,10 @@ Numerics and algorithms
- ``2``: Direct deposition, vectorized
- ``3``: Direct deposition, non-optimized
- .. warning ::
+ .. warning::
- The vectorized Esirkepov deposition
- (``algo.current_deposition=0``) is currently not functional in WarpX.
- All the other methods (``1``, ``2`` and ``3``) are functional.
+ On GPU, use ``algo.current_deposition=0`` for Esirkepov
+ or ``3`` for direct deposition.
* ``algo.charge_deposition`` (`integer`)
The algorithm for the charge density deposition:
@@ -393,6 +392,10 @@ Numerics and algorithms
- ``0``: Vectorized version
- ``1``: Non-optimized version
+ .. warning::
+
+ The vectorized version does not run on GPU. Use
+ ``algo.field_gather=1`` when running on GPU.
* ``algo.particle_pusher`` (`integer`)
The algorithm for the particle pusher:
@@ -470,6 +473,9 @@ Diagnostics and output
Only used when mesh refinement is activated and ``warpx.plot_raw_fields`` is ``1``.
Whether to output the data of the coarse patch, in the plot files.
+* ``amr.plot_file`` (`string`)
+ Root for output file names. Supports sub-directories. Default `plotfiles/plt`
+
Checkpoints and restart
-----------------------
WarpX supports checkpoints/restart via AMReX.
diff --git a/Docs/source/visualization/sensei.rst b/Docs/source/visualization/sensei.rst
index 4881f9d78..59f879e47 100644
--- a/Docs/source/visualization/sensei.rst
+++ b/Docs/source/visualization/sensei.rst
@@ -233,19 +233,6 @@ To access the SENSEI modulefiles on cori first add the SENSEI install to the sea
module use /usr/common/software/sensei/modulefiles
-Examples
-===================
-
-2D LPA Example
---------------
-
-* :download:`input file<./inputs.2d>`
-* :download:`xml file<./ez2d.xml>`
-* :download:`session file<./ez2d.session>`
-* :E field screen shot at time step 40
-.. figure:: ez2d_00040.png
- :alt: picture
-
3D LPA Example
--------------
This section shows an example of using SENSEI and three different back ends on
diff --git a/Examples/Physics_applications/laser_acceleration/laser_acceleration_PICMI.py b/Examples/Physics_applications/laser_acceleration/laser_acceleration_PICMI.py
index 77cd2e176..867e7a951 100644
--- a/Examples/Physics_applications/laser_acceleration/laser_acceleration_PICMI.py
+++ b/Examples/Physics_applications/laser_acceleration/laser_acceleration_PICMI.py
@@ -95,6 +95,20 @@ solver = picmi.ElectromagneticSolver(grid=grid, method='CKC', cfl=1.)
##########################
+# diagnostics
+##########################
+
+field_diag1 = picmi.FieldDiagnostic(grid = grid,
+ period = 100,
+ warpx_plot_raw_fields = 1,
+ warpx_plot_raw_fields_guards = 1,
+ warpx_plot_finepatch = 1,
+ warpx_plot_crsepatch = 1)
+
+part_diag1 = picmi.ParticleDiagnostic(period = 100,
+ species = [electrons])
+
+##########################
# simulation setup
##########################
@@ -102,7 +116,6 @@ sim = picmi.Simulation(solver = solver,
max_steps = max_steps,
verbose = 1,
cfl = 1.0,
- warpx_plot_int = 100,
warpx_current_deposition_algo = 3,
warpx_charge_deposition_algo = 0,
warpx_field_gathering_algo = 0,
@@ -112,6 +125,8 @@ sim.add_species(electrons, layout=picmi.GriddedLayout(grid=grid, n_macroparticle
sim.add_laser(laser, injection_method=laser_antenna)
+sim.add_diagnostic(field_diag1)
+sim.add_diagnostic(part_diag1)
##########################
# simulation run
diff --git a/Examples/Tests/Langmuir/inputs.multi.2d.rt b/Examples/Tests/Langmuir/inputs.multi.2d.rt
index 6f301a7c2..ce82fd1c7 100644
--- a/Examples/Tests/Langmuir/inputs.multi.2d.rt
+++ b/Examples/Tests/Langmuir/inputs.multi.2d.rt
@@ -25,7 +25,7 @@ warpx.serialize_ics = 1
warpx.verbose = 1
# Algorithms
-algo.current_deposition = 3
+algo.current_deposition = 0
algo.charge_deposition = 0
algo.field_gathering = 1
algo.particle_pusher = 0
diff --git a/Examples/Tests/Langmuir/inputs.rt b/Examples/Tests/Langmuir/inputs.rt
index ec265bca7..3ec8fc47d 100644
--- a/Examples/Tests/Langmuir/inputs.rt
+++ b/Examples/Tests/Langmuir/inputs.rt
@@ -25,7 +25,7 @@ warpx.serialize_ics = 1
warpx.verbose = 1
# Algorithms
-algo.current_deposition = 3
+algo.current_deposition = 0
algo.charge_deposition = 0
algo.field_gathering = 1
algo.particle_pusher = 0
diff --git a/Examples/Tests/Langmuir/langmuir_PICMI_rt.py b/Examples/Tests/Langmuir/langmuir_PICMI_rt.py
new file mode 100644
index 000000000..dab414b3b
--- /dev/null
+++ b/Examples/Tests/Langmuir/langmuir_PICMI_rt.py
@@ -0,0 +1,48 @@
+# --- Simple example of Langmuir oscillations in a uniform plasma
+
+import numpy as np
+from pywarpx import picmi
+
+nx = 64
+ny = 64
+nz = 64
+
+xmin = -20.e-6
+ymin = -20.e-6
+zmin = -20.e-6
+xmax = +20.e-6
+ymax = +20.e-6
+zmax = +20.e-6
+
+uniform_plasma = picmi.UniformDistribution(density=1.e25, upper_bound=[0., None, None], directed_velocity=[0.1, 0., 0.])
+
+electrons = picmi.Species(particle_type='electron', name='electrons', initial_distribution=uniform_plasma)
+
+grid = picmi.Cartesian3DGrid(number_of_cells = [nx, ny, nz],
+ lower_bound = [xmin, ymin, zmin],
+ upper_bound = [xmax, ymax, zmax],
+ lower_boundary_conditions = ['periodic', 'periodic', 'periodic'],
+ upper_boundary_conditions = ['periodic', 'periodic', 'periodic'],
+ moving_window_velocity = [0., 0., 0.],
+ warpx_max_grid_size=32, warpx_coord_sys=0)
+
+solver = picmi.ElectromagneticSolver(grid=grid, cfl=1.)
+
+sim = picmi.Simulation(solver = solver,
+ max_steps = 40,
+ verbose = 1,
+ warpx_plot_int = 40,
+ warpx_current_deposition_algo = 3,
+ warpx_charge_deposition_algo = 0,
+ warpx_field_gathering_algo = 0,
+ warpx_particle_pusher_algo = 0)
+
+sim.add_species(electrons, layout=picmi.GriddedLayout(n_macroparticle_per_cell=[2,2,2], grid=grid))
+
+# write_inputs will create an inputs file that can be used to run
+# with the compiled version.
+sim.write_input_file(file_name='inputs_from_PICMI')
+
+# Alternatively, sim.step will run WarpX, controlling it from Python
+sim.step()
+
diff --git a/Examples/Tests/PML/analysis_pml_ckc.py b/Examples/Tests/PML/analysis_pml_ckc.py
new file mode 100755
index 000000000..90d13cecc
--- /dev/null
+++ b/Examples/Tests/PML/analysis_pml_ckc.py
@@ -0,0 +1,34 @@
+#! /usr/bin/env python
+
+import sys
+import yt ; yt.funcs.mylog.setLevel(0)
+import numpy as np
+import scipy.constants as scc
+
+filename = sys.argv[1]
+
+############################
+### INITIAL LASER ENERGY ###
+############################
+energy_start = 9.1301289517e-08
+
+##########################
+### FINAL LASER ENERGY ###
+##########################
+ds = yt.load( filename )
+all_data_level_0 = ds.covering_grid(level=0,left_edge=ds.domain_left_edge, dims=ds.domain_dimensions)
+Bx = all_data_level_0['boxlib', 'Bx'].v.squeeze()
+By = all_data_level_0['boxlib', 'By'].v.squeeze()
+Bz = all_data_level_0['boxlib', 'Bz'].v.squeeze()
+Ex = all_data_level_0['boxlib', 'Ex'].v.squeeze()
+Ey = all_data_level_0['boxlib', 'Ey'].v.squeeze()
+Ez = all_data_level_0['boxlib', 'Ez'].v.squeeze()
+energyE = np.sum(scc.epsilon_0/2*(Ex**2+Ey**2+Ez**2))
+energyB = np.sum(1./scc.mu_0/2*(Bx**2+By**2+Bz**2))
+energy_end = energyE + energyB
+
+Reflectivity = energy_end/energy_start
+Reflectivity_theory = 1.8015e-06
+
+assert( abs(Reflectivity-Reflectivity_theory) < 5./100 * Reflectivity_theory )
+
diff --git a/Examples/Tests/PML/analysis_pml_yee.py b/Examples/Tests/PML/analysis_pml_yee.py
new file mode 100755
index 000000000..6234cd5d2
--- /dev/null
+++ b/Examples/Tests/PML/analysis_pml_yee.py
@@ -0,0 +1,34 @@
+#! /usr/bin/env python
+
+import sys
+import yt ; yt.funcs.mylog.setLevel(0)
+import numpy as np
+import scipy.constants as scc
+
+filename = sys.argv[1]
+
+############################
+### INITIAL LASER ENERGY ###
+############################
+energy_start = 9.1301289517e-08
+
+##########################
+### FINAL LASER ENERGY ###
+##########################
+ds = yt.load( filename )
+all_data_level_0 = ds.covering_grid(level=0,left_edge=ds.domain_left_edge, dims=ds.domain_dimensions)
+Bx = all_data_level_0['boxlib', 'Bx'].v.squeeze()
+By = all_data_level_0['boxlib', 'By'].v.squeeze()
+Bz = all_data_level_0['boxlib', 'Bz'].v.squeeze()
+Ex = all_data_level_0['boxlib', 'Ex'].v.squeeze()
+Ey = all_data_level_0['boxlib', 'Ey'].v.squeeze()
+Ez = all_data_level_0['boxlib', 'Ez'].v.squeeze()
+energyE = np.sum(scc.epsilon_0/2*(Ex**2+Ey**2+Ez**2))
+energyB = np.sum(1./scc.mu_0/2*(Bx**2+By**2+Bz**2))
+energy_end = energyE + energyB
+
+Reflectivity = energy_end/energy_start
+Reflectivity_theory = 5.683000058954201e-07
+
+assert( abs(Reflectivity-Reflectivity_theory) < 5./100 * Reflectivity_theory )
+
diff --git a/Examples/Tests/gpu_test/inputs b/Examples/Tests/gpu_test/inputs
new file mode 100644
index 000000000..18bb80d26
--- /dev/null
+++ b/Examples/Tests/gpu_test/inputs
@@ -0,0 +1,69 @@
+# Maximum number of time steps
+max_step = 10
+
+# number of grid points
+amr.n_cell = 64 64 64
+
+# Maximum allowable size of each subdomain in the problem domain;
+# this is used to decompose the domain for parallel calculations.
+amr.max_grid_size = 64
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+
+# Geometry
+geometry.coord_sys = 0 # 0: Cartesian
+geometry.is_periodic = 1 1 1 # Is periodic?
+geometry.prob_lo = -20.e-6 -20.e-6 -20.e-6 # physical domain
+geometry.prob_hi = 20.e-6 20.e-6 20.e-6
+
+warpx.serialize_ics = 1
+warpx.do_pml = 0
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 3
+algo.charge_deposition = 0
+algo.field_gathering = 1
+algo.particle_pusher = 0
+
+interpolation.nox = 1
+interpolation.noy = 1
+interpolation.noz = 1
+
+particles.do_tiling = 0
+
+# CFL
+warpx.cfl = 1.0
+
+amr.plot_int = -10
+
+particles.nspecies = 1
+particles.species_names = electrons
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "NUniformPerCell"
+electrons.num_particles_per_cell_each_dim = 2 2 2
+electrons.profile = constant
+electrons.density = 1.e25 # number of electrons per m^3
+electrons.momentum_distribution_type = "gaussian"
+electrons.ux_th = 0.01 # uth the std of the (unitless) momentum
+electrons.uy_th = 0.01 # uth the std of the (unitless) momentum
+electrons.uz_th = 0.01 # uth the std of the (unitless) momentum
+electrons.uz_m = 10. # Mean momentum along z (unitless)
+
+# Laser
+warpx.use_laser = 1
+laser.profile = Gaussian
+laser.position = 0. 0. 0.e-6 # This point is on the laser plane
+laser.direction = 0. 0. 1. # The plane normal direction
+laser.polarization = 1. 0. 0. # The main polarization vector
+laser.e_max = 16.e12 # Maximum amplitude of the laser field (in V/m)
+laser.profile_waist = 3.e-6 # The waist of the laser (in meters)
+laser.profile_duration = 15.e-15 # The duration of the laser (in seconds)
+laser.profile_t_peak = 30.e-15 # The time at which the laser reaches its peak (in seconds)
+laser.profile_focal_distance = 100.e-6 # Focal distance from the antenna (in meters)
+laser.wavelength = 0.8e-6 # The wavelength of the laser (in meters)
diff --git a/Examples/Tests/gpu_test/script.sh b/Examples/Tests/gpu_test/script.sh
new file mode 100755
index 000000000..cd6b0eadd
--- /dev/null
+++ b/Examples/Tests/gpu_test/script.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+#BSUB -P GEN109
+#BSUB -W 0:10
+#BSUB -nnodes 1
+#BSUB -J WarpX
+#BSUB -o WarpXo.%J
+#BSUB -e WarpXe.%J
+
+module load pgi
+module load cuda/9.1.85
+module list
+set -x
+
+omp=1
+export OMP_NUM_THREADS=${omp}
+#EXE="../main3d.pgi.DEBUG.TPROF.MPI.ACC.CUDA.ex"
+EXE="../main3d.pgi.TPROF.MPI.ACC.CUDA.ex"
+#JSRUN="jsrun -n 4 -a 1 -g 1 -c 1 --bind=packed:${omp} "
+#JSRUN="jsrun -n 1 -a 4 -g 4 -c 4 --bind=packed:${omp} "
+JSRUN="jsrun -n 1 -a 1 -g 1 -c 1 --bind=packed:${omp} "
+
+rundir="${LSB_JOBNAME}-${LSB_JOBID}"
+mkdir $rundir
+cp $0 $rundir
+cp inputs $rundir
+cd $rundir
+
+# 1. Run normally
+${JSRUN} --smpiargs="-gpu" ${EXE} inputs
+
+# 2. Run under cuda-memcheck
+# ${JSRUN} --smpiargs="-gpu" cuda-memcheck ${EXE} inputs &> memcheck.txt
+
+# 3. Run under nvprof and direct all stdout and stderr to nvprof.txt
+#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes ${EXE} inputs &> nvprof.txt
+
+# 4. Run under nvprof and store performance data in a nvvp file
+# Can be converted to text using nvprof -i nvprof-timeline-%p.nvvp
+#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes -o nvprof-timeline-%p.nvvp ${EXE} inputs
+
+# COLLECT PERFORMANCE METRICS - THIS IS MUCH SLOWER. Set nsteps=2 in the inputs files
+# 5. Run under nvprof and collect metrics for a subset of kernels
+#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes --kernels '(deposit_current|gather_\w+_field|push_\w+_boris)' --analysis-metrics -o nvprof-metrics-kernel-%p.nvvp ${EXE} inputs
+
+# 6. Run under nvprof and collect metrics for all kernels -- much slower!
+#${JSRUN} --smpiargs="-gpu" nvprof --profile-child-processes --analysis-metrics -o nvprof-metrics-%p.nvvp ${EXE} inputs
+
+cp ../WarpX*.${LSB_JOBID} .
diff --git a/GNUmakefile b/GNUmakefile
index 01b9f03ef..06f4c6229 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -10,6 +10,7 @@ DIM = 3
COMP = gcc
#COMP = intel
+#COMP = pgi
TINY_PROFILE = TRUE
#PROFILE = TRUE
@@ -19,6 +20,7 @@ TINY_PROFILE = TRUE
STORE_OLD_PARTICLE_ATTRIBS = FALSE
USE_OMP = TRUE
+USE_GPU = FALSE
EBASE = main
diff --git a/Python/pywarpx/Bucket.py b/Python/pywarpx/Bucket.py
index c73b3dac9..66494a700 100644
--- a/Python/pywarpx/Bucket.py
+++ b/Python/pywarpx/Bucket.py
@@ -20,6 +20,10 @@ class Bucket(object):
except KeyError:
return object.__getattr__(self, name)
+ def check_consistency(self, vname, value, errmsg):
+ if vname in self.argvattrs:
+ assert (self.argvattrs[vname] is None) or (self.argvattrs[vname] == value), Exception(errmsg)
+
def attrlist(self):
"Concatenate the attributes into a string"
result = []
diff --git a/Python/pywarpx/WarpX.py b/Python/pywarpx/WarpX.py
index 9c4a4b380..f58d4f111 100644
--- a/Python/pywarpx/WarpX.py
+++ b/Python/pywarpx/WarpX.py
@@ -70,7 +70,7 @@ class WarpX(Bucket):
argv = self.create_argv_list()
with open(filename, 'w') as ff:
- for k, v in kw.iteritems():
+ for k, v in kw.items():
ff.write('{0} = {1}\n'.format(k, v))
for arg in argv:
diff --git a/Python/pywarpx/picmi.py b/Python/pywarpx/picmi.py
index 1d02f8b29..81b1543bb 100644
--- a/Python/pywarpx/picmi.py
+++ b/Python/pywarpx/picmi.py
@@ -462,6 +462,9 @@ class Simulation(picmistandard.PICMI_Simulation):
self.lasers[i].initialize_inputs()
self.laser_injection_methods[i].initialize_inputs(self.lasers[i])
+ for diagnostic in self.diagnostics:
+ diagnostic.initialize_inputs()
+
def initialize_warpx(self):
if self.warpx_initialized:
return
@@ -492,3 +495,90 @@ class Simulation(picmistandard.PICMI_Simulation):
if self.warpx_initialized:
self.warpx_initialized = False
pywarpx.warpx.finalize()
+
+
+# ----------------------------
+# Simulation frame diagnostics
+# ----------------------------
+
+
+class FieldDiagnostic(picmistandard.PICMI_FieldDiagnostic):
+ def init(self, kw):
+
+ self.plot_raw_fields = kw.pop('warpx_plot_raw_fields', None)
+ self.plot_raw_fields_guards = kw.pop('warpx_plot_raw_fields_guards', None)
+ self.plot_finepatch = kw.pop('warpx_plot_finepatch', None)
+ self.plot_crsepatch = kw.pop('warpx_plot_crsepatch', None)
+
+ def initialize_inputs(self):
+ # --- For now, the period must be the same as plot_int if set
+ pywarpx.amr.check_consistency('plot_int', self.period, 'The period must be the same for all simulation frame diagnostics')
+ pywarpx.amr.plot_int = self.period
+
+ if 'rho' in self.data_list:
+ pywarpx.warpx.plot_rho = 1
+ if 'dive' in self.data_list:
+ pywarpx.warpx.plot_dive = 1
+ if 'divb' in self.data_list:
+ pywarpx.warpx.plot_divb = 1
+ if 'F' in self.data_list:
+ pywarpx.warpx.plot_F = 1
+ if 'proc_number' in self.data_list:
+ pywarpx.warpx.plot_proc_number = 1
+
+ pywarpx.warpx.plot_raw_fields = self.plot_raw_fields
+ pywarpx.warpx.plot_raw_fields_guards = self.plot_raw_fields_guards
+
+ pywarpx.amr.check_consistency('plot_finepatch', self.plot_finepatch, 'The fine patch flag must be the same for all simulation frame field diagnostics')
+ pywarpx.amr.check_consistency('plot_crsepatch', self.plot_crsepatch, 'The coarse patch flag must be the same for all simulation frame field diagnostics')
+ pywarpx.warpx.plot_finepatch = self.plot_finepatch
+ pywarpx.warpx.plot_crsepatch = self.plot_crsepatch
+
+class ElectrostaticFieldDiagnostic(picmistandard.PICMI_ElectrostaticFieldDiagnostic):
+ def initialize_inputs(self):
+ # --- For now, the period must be the same as plot_int if set
+ pywarpx.amr.check_consistency('plot_int', self.period, 'The period must be the same for all simulation frame diagnostics')
+ pywarpx.amr.plot_int = self.period
+
+
+class ParticleDiagnostic(picmistandard.PICMI_ParticleDiagnostic):
+ def initialize_inputs(self):
+ # --- For now, the period must be the same as plot_int if set
+ pywarpx.amr.check_consistency('plot_int', self.period, 'The period must be the same for all simulation frame diagnostics')
+ pywarpx.amr.plot_int = self.period
+
+ if 'part_per_cell' in self.data_list:
+ pywarpx.warpx.plot_part_per_cell = 1
+ if 'part_per_grid' in self.data_list:
+ pywarpx.warpx.plot_part_per_grid = 1
+ if 'part_per_proc' in self.data_list:
+ pywarpx.warpx.plot_part_per_proc = 1
+
+
+# ----------------------------
+# Lab frame diagnostics
+# ----------------------------
+
+
+class LabFrameFieldDiagnostic(picmistandard.PICMI_LabFrameFieldDiagnostic):
+ def initialize_inputs(self):
+
+ pywarpx.warpx.check_consistency('num_snapshots_lab', self.num_snapshots, 'The number of snapshots must be the same in all lab frame diagnostics')
+ pywarpx.warpx.check_consistency('dt_snapshots_lab', self.dt_snapshots, 'The time between snapshots must be the same in all lab frame diagnostics')
+
+ pywarpx.warpx.do_boosted_frame_diagnostic = 1
+ pywarpx.warpx.num_snapshots_lab = self.num_snapshots
+ pywarpx.warpx.dt_snapshots_lab = self.dt_snapshots
+ pywarpx.warpx.do_boosted_frame_fields = 1
+
+
+class LabFrameParticleDiagnostic(picmistandard.PICMI_LabFrameParticleDiagnostic):
+ def initialize_inputs(self):
+
+ pywarpx.warpx.check_consistency('num_snapshots_lab', self.num_snapshots, 'The number of snapshots must be the same in all lab frame diagnostics')
+ pywarpx.warpx.check_consistency('dt_snapshots_lab', self.dt_snapshots, 'The time between snapshots must be the same in all lab frame diagnostics')
+
+ pywarpx.warpx.do_boosted_frame_diagnostic = 1
+ pywarpx.warpx.num_snapshots_lab = self.num_snapshots
+ pywarpx.warpx.dt_snapshots_lab = self.dt_snapshots
+ pywarpx.warpx.do_boosted_frame_particles = 1
diff --git a/README.md b/README.md
index 87f53f969..d2d3bd5aa 100644
--- a/README.md
+++ b/README.md
@@ -6,3 +6,4 @@ WarpX is an advanced electromagnetic Particle-In-Cell code.
It supports many features including Perfectly-Matched Layers (PML), mesh refinement, and the boosted-frame technique.
In order to learn how to install and run the code, please see the online documentation: https://ecp-warpx.github.io/index.html
+
diff --git a/Regression/WarpX-tests.ini b/Regression/WarpX-tests.ini
index 14af09277..60647e89b 100644
--- a/Regression/WarpX-tests.ini
+++ b/Regression/WarpX-tests.ini
@@ -60,8 +60,9 @@ branch = master
[pml_x_yee]
buildDir = .
inputFile = Examples/Tests/PML/inputs2d
-runtime_params = warpx.do_dynamic_scheduling=0 warpx.maxwell_fdtd_solver=yee
+runtime_params = warpx.do_dynamic_scheduling=0 algo.maxwell_fdtd_solver=yee
dim = 2
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 2
@@ -69,13 +70,14 @@ useOMP = 1
numthreads = 2
compileTest = 0
doVis = 0
-analysisRoutine = Examples/Tests/PML/analysis_pml.py
+analysisRoutine = Examples/Tests/PML/analysis_pml_yee.py
[pml_x_ckc]
buildDir = .
inputFile = Examples/Tests/PML/inputs2d
-runtime_params = warpx.do_dynamic_scheduling=0 warpx.maxwell_fdtd_solver=ckc
+runtime_params = warpx.do_dynamic_scheduling=0 algo.maxwell_fdtd_solver=ckc
dim = 2
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 2
@@ -83,13 +85,14 @@ useOMP = 1
numthreads = 2
compileTest = 0
doVis = 0
-analysisRoutine = Examples/Tests/PML/analysis_pml.py
+analysisRoutine = Examples/Tests/PML/analysis_pml_ckc.py
[nci_corrector]
buildDir = .
inputFile = Examples/Modules/nci_corrector/inputs2d
runtime_params = warpx.do_dynamic_scheduling=0
dim = 2
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 2
@@ -103,6 +106,7 @@ analysisRoutine = Examples/Modules/nci_corrector/ncicorr_analysis.py
buildDir = .
inputFile = Examples/Tests/Langmuir/inputs.rt
dim = 2
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 2
@@ -120,6 +124,7 @@ analysisOutputImage = langmuir2d_analysis.png
buildDir = .
inputFile = Examples/Tests/Langmuir/inputs.rt
dim = 3
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 4
@@ -137,6 +142,7 @@ analysisOutputImage = langmuir_x_analysis.png
buildDir = .
inputFile = Examples/Tests/Langmuir/inputs.rt
dim = 3
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 4
@@ -154,6 +160,7 @@ analysisOutputImage = langmuir_y_analysis.png
buildDir = .
inputFile = Examples/Tests/Langmuir/inputs.rt
dim = 3
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 4
@@ -171,6 +178,7 @@ analysisOutputImage = langmuir_z_analysis.png
buildDir = .
inputFile = Examples/Tests/Langmuir/inputs.multi.rt
dim = 3
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 4
@@ -224,6 +232,7 @@ analysisOutputImage = langmuir_multi_2d_analysis.png
buildDir = .
inputFile = Examples/Modules/laser_injection/inputs.rt
dim = 3
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 4
@@ -239,6 +248,7 @@ analysisOutputImage = laser_analysis.png
buildDir = .
inputFile = Examples/Modules/laser_injection/inputs.2d.rt
dim = 2
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 2
@@ -246,6 +256,7 @@ useOMP = 1
numthreads = 2
compileTest = 0
doVis = 0
+runtime_params = warpx.do_dynamic_scheduling=0 warpx.serialize_ics=1
compareParticles = 0
[LaserAcceleration]
@@ -253,6 +264,7 @@ buildDir = .
inputFile = Examples/Physics_applications/laser_acceleration/inputs.3d
runtime_params = warpx.do_dynamic_scheduling=0 amr.n_cell=32 32 256 max_step=100 electrons.zmin=0.e-6 warpx.serialize_ics=1
dim = 3
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 2
@@ -268,6 +280,7 @@ buildDir = .
inputFile = Examples/Tests/subcycling/inputs.2d
runtime_params = warpx.serialize_ics=1 warpx.do_dynamic_scheduling=0
dim = 2
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 2
@@ -282,6 +295,7 @@ buildDir = .
inputFile = Examples/Physics_applications/laser_acceleration/inputs.2d
runtime_params = amr.max_level=1 max_step=100 warpx.serialize_ics=1
dim = 2
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 2
@@ -297,6 +311,7 @@ buildDir = .
inputFile = Examples/Physics_applications/plasma_acceleration/inputs.2d
runtime_params = amr.max_level=1 amr.n_cell=32 512 max_step=100 plasma_e.zmin=-200.e-6 warpx.serialize_ics=1 warpx.do_dynamic_scheduling=0
dim = 2
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 2
@@ -309,9 +324,10 @@ particleTypes = beam driver plasma_e
[Python_Langmuir]
buildDir = .
-inputFile = Examples/Tests/Langmuir/langmuir_PICMI.py
-customRunCmd = python langmuir_PICMI.py
+inputFile = Examples/Tests/Langmuir/langmuir_PICMI_rt.py
+customRunCmd = python langmuir_PICMI_rt.py
dim = 3
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 2
@@ -327,6 +343,7 @@ outputFile = plotfiles/plt00040
buildDir = .
inputFile = Examples/Physics_applications/uniform_plasma/inputs.3d
dim = 3
+addToCompileString =
restartTest = 1
restartFileNum = 6
useMPI = 1
@@ -342,6 +359,7 @@ particleTypes = electrons
buildDir = tests/CurrentDeposition
inputFile = inputs
dim = 3
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 1
@@ -355,6 +373,7 @@ outputFile = plotfiles/plt00000
buildDir = tests/FieldGather
inputFile = inputs
dim = 3
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 1
@@ -368,6 +387,7 @@ outputFile = plotfiles/plt00000
buildDir = tests/FieldSolver
inputFile = inputs
dim = 3
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 1
@@ -381,6 +401,7 @@ outputFile = plotfiles/plt00000
buildDir = tests/ParticlePusher
inputFile = inputs
dim = 3
+addToCompileString =
restartTest = 0
useMPI = 1
numprocs = 1
diff --git a/Regression/prepare_file_travis.py b/Regression/prepare_file_travis.py
index 6d0e0dfc8..82b3c8e0e 100644
--- a/Regression/prepare_file_travis.py
+++ b/Regression/prepare_file_travis.py
@@ -1,22 +1,35 @@
# This script modifies `WarpX-test.ini` (which is used for nightly builds)
# and creates the file `travis-test.ini` (which is used for continous
# integration on TravisCI (https://travis-ci.org/)
-# The subtests that are selected are controlled by the environement WARPX_DIM
+# The subtests that are selected are controlled by WARPX_TEST_DIM
+# The architecture (CPU/GPU) is selected by WARPX_TEST_ARCH
import re
import os
# Get relevant environment variables
-dim = os.environ.get('WARPX_DIM', None)
+dim = os.environ.get('WARPX_TEST_DIM', None)
+arch = os.environ.get('WARPX_TEST_ARCH', 'CPU')
+
+# Find the directory in which the tests should be run
+current_dir = os.getcwd()
+test_dir = re.sub('warpx/Regression', '', current_dir )
with open('WarpX-tests.ini') as f:
text = f.read()
# Replace default folder name
-text = re.sub('/home/regtester/AMReX_RegTesting/', '/home/travis/', text)
+text = re.sub('/home/regtester/AMReX_RegTesting', test_dir, text)
# Add doComparison = 0 for each test
text = re.sub( '\[(?P<name>.*)\]\nbuildDir = ',
'[\g<name>]\ndoComparison = 0\nbuildDir = ', text )
+# Change compile options when running on GPU
+if arch == 'GPU':
+ text = re.sub( 'addToCompileString =',
+ 'addToCompileString = USE_GPU=TRUE USE_OMP=FALSE USE_ACC=TRUE', text)
+ text = re.sub( 'COMP\s*=.*', 'COMP = pgi', text )
+print('Compiling for %s' %arch)
+
# Use only 2 cores for compiling
text = re.sub( 'numMakeJobs = \d+', 'numMakeJobs = 2', text )
diff --git a/Source/LaserParticleContainer.cpp b/Source/LaserParticleContainer.cpp
index 8c5d799e3..cf669d32d 100644
--- a/Source/LaserParticleContainer.cpp
+++ b/Source/LaserParticleContainer.cpp
@@ -91,7 +91,7 @@ LaserParticleContainer::LaserParticleContainer (AmrCore* amr_core, int ispecies)
if (WarpX::gamma_boost > 1.) {
// Check that the laser direction is equal to the boost direction
- AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
+ AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
nvec[0]*WarpX::boost_direction[0]
+ nvec[1]*WarpX::boost_direction[1]
+ nvec[2]*WarpX::boost_direction[2] - 1. < 1.e-12,
@@ -110,22 +110,22 @@ LaserParticleContainer::LaserParticleContainer (AmrCore* amr_core, int ispecies)
p_X = { p_X[0]*s, p_X[1]*s, p_X[2]*s };
Real dp = std::inner_product(nvec.begin(), nvec.end(), p_X.begin(), 0.0);
- AMREX_ALWAYS_ASSERT_WITH_MESSAGE(std::abs(dp) < 1.0e-14,
+ AMREX_ALWAYS_ASSERT_WITH_MESSAGE(std::abs(dp) < 1.0e-14,
"Laser plane vector is not perpendicular to the main polarization vector");
p_Y = CrossProduct(nvec, p_X); // The second polarization vector
s = 1.0/std::sqrt(stc_direction[0]*stc_direction[0] + stc_direction[1]*stc_direction[1] + stc_direction[2]*stc_direction[2]);
- stc_direction = { stc_direction[0]*s, stc_direction[1]*s, stc_direction[2]*s };
+ stc_direction = { stc_direction[0]*s, stc_direction[1]*s, stc_direction[2]*s };
dp = std::inner_product(nvec.begin(), nvec.end(), stc_direction.begin(), 0.0);
- AMREX_ALWAYS_ASSERT_WITH_MESSAGE(std::abs(dp) < 1.0e-14,
+ AMREX_ALWAYS_ASSERT_WITH_MESSAGE(std::abs(dp) < 1.0e-14,
"stc_direction is not perpendicular to the laser plane vector");
-
+
// Get angle between p_X and stc_direction
// in 2d, stcs are in the simulation plane
#if AMREX_SPACEDIM == 3
- theta_stc = acos(stc_direction[0]*p_X[0] +
- stc_direction[1]*p_X[1] +
+ theta_stc = acos(stc_direction[0]*p_X[0] +
+ stc_direction[1]*p_X[1] +
stc_direction[2]*p_X[2]);
#else
theta_stc = 0.;
@@ -250,7 +250,7 @@ LaserParticleContainer::InitData (int lev)
BoxArray plane_ba { Box {IntVect(plane_lo[0],0), IntVect(plane_hi[0],0)} };
#endif
- Vector<Real> particle_x, particle_y, particle_z, particle_w;
+ RealVector particle_x, particle_y, particle_z, particle_w;
const DistributionMapping plane_dm {plane_ba, nprocs};
const Vector<int>& procmap = plane_dm.ProcessorMap();
@@ -263,7 +263,7 @@ LaserParticleContainer::InitData (int lev)
{
const Vector<Real>& pos = Transform(cell[0], cell[1]);
#if (AMREX_SPACEDIM == 3)
- const Real* x = pos.data();
+ const Real* x = pos.dataPtr();
#else
const Real x[2] = {pos[0], pos[2]};
#endif
@@ -281,15 +281,15 @@ LaserParticleContainer::InitData (int lev)
}
}
const int np = particle_z.size();
- Vector<Real> particle_ux(np, 0.0);
- Vector<Real> particle_uy(np, 0.0);
- Vector<Real> particle_uz(np, 0.0);
+ RealVector particle_ux(np, 0.0);
+ RealVector particle_uy(np, 0.0);
+ RealVector particle_uz(np, 0.0);
if (Verbose()) amrex::Print() << "Adding laser particles\n";
AddNParticles(lev,
- np, particle_x.data(), particle_y.data(), particle_z.data(),
- particle_ux.data(), particle_uy.data(), particle_uz.data(),
- 1, particle_w.data(), 1);
+ np, particle_x.dataPtr(), particle_y.dataPtr(), particle_z.dataPtr(),
+ particle_ux.dataPtr(), particle_uy.dataPtr(), particle_uz.dataPtr(),
+ 1, particle_w.dataPtr(), 1);
}
void
@@ -297,8 +297,8 @@ LaserParticleContainer::Evolve (int lev,
const MultiFab&, const MultiFab&, const MultiFab&,
const MultiFab&, const MultiFab&, const MultiFab&,
MultiFab& jx, MultiFab& jy, MultiFab& jz,
- MultiFab*, MultiFab*, MultiFab*,
- MultiFab* rho, MultiFab*,
+ MultiFab* cjx, MultiFab* cjy, MultiFab* cjz,
+ MultiFab* rho, MultiFab* crho,
const MultiFab*, const MultiFab*, const MultiFab*,
const MultiFab*, const MultiFab*, const MultiFab*,
Real t, Real dt)
@@ -307,11 +307,7 @@ LaserParticleContainer::Evolve (int lev,
BL_PROFILE_VAR_NS("Laser::Evolve::Copy", blp_copy);
BL_PROFILE_VAR_NS("PICSAR::LaserParticlePush", blp_pxr_pp);
BL_PROFILE_VAR_NS("PICSAR::LaserCurrentDepo", blp_pxr_cd);
-
- const std::array<Real,3>& dx = WarpX::CellSize(lev);
-
- // WarpX assumes the same number of guard cells for Jx, Jy, Jz
- long ngJ = jx.nGrow();
+ BL_PROFILE_VAR_NS("Laser::Evolve::Accumulate", blp_accumulate);
Real t_lab = t;
if (WarpX::gamma_boost > 1) {
@@ -329,8 +325,18 @@ LaserParticleContainer::Evolve (int lev,
#pragma omp parallel
#endif
{
- Vector<Real> xp, yp, zp, giv, plane_Xp, plane_Yp, amplitude_E;
- FArrayBox local_rho, local_jx, local_jy, local_jz;
+#ifdef _OPENMP
+ int thread_num = omp_get_thread_num();
+#else
+ int thread_num = 0;
+#endif
+
+ if (local_rho[thread_num] == nullptr) local_rho[thread_num].reset( new amrex::FArrayBox());
+ if (local_jx[thread_num] == nullptr) local_jx[thread_num].reset( new amrex::FArrayBox());
+ if (local_jy[thread_num] == nullptr) local_jy[thread_num].reset( new amrex::FArrayBox());
+ if (local_jz[thread_num] == nullptr) local_jz[thread_num].reset( new amrex::FArrayBox());
+
+ Cuda::DeviceVector<Real> plane_Xp, plane_Yp, amplitude_E;
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
@@ -346,13 +352,11 @@ LaserParticleContainer::Evolve (int lev,
auto& uzp = attribs[PIdx::uz];
const long np = pti.numParticles();
+ // For now, laser particles do not take the current buffers into account
+ const long np_current = np;
- // Data on the grid
- FArrayBox& jxfab = jx[pti];
- FArrayBox& jyfab = jy[pti];
- FArrayBox& jzfab = jz[pti];
+ m_giv[thread_num].resize(np);
- giv.resize(np);
plane_Xp.resize(np);
plane_Yp.resize(np);
amplitude_E.resize(np);
@@ -361,198 +365,78 @@ LaserParticleContainer::Evolve (int lev,
// copy data from particle container to temp arrays
//
BL_PROFILE_VAR_START(blp_copy);
- pti.GetPosition(xp, yp, zp);
+ pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
BL_PROFILE_VAR_STOP(blp_copy);
- for (int i = 0; i < np; ++i)
- {
- // Find the coordinates of the particles in the emission plane
-#if (AMREX_SPACEDIM == 3)
- plane_Xp[i] = u_X[0]*(xp[i] - position[0])
- + u_X[1]*(yp[i] - position[1])
- + u_X[2]*(zp[i] - position[2]);
- plane_Yp[i] = u_Y[0]*(xp[i] - position[0])
- + u_Y[1]*(yp[i] - position[1])
- + u_Y[2]*(zp[i] - position[2]);
-#elif (AMREX_SPACEDIM == 2)
- plane_Xp[i] = u_X[0]*(xp[i] - position[0])
- + u_X[2]*(zp[i] - position[2]);
- plane_Yp[i] = 0;
-#endif
- }
-
- const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev);
- const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
-
- long lvect = 8;
-
- auto depositCharge = [&] (MultiFab* rhomf, int icomp)
- {
- long ngRho = rhomf->nGrow();
-
- Real* data_ptr;
- const int *rholen;
- FArrayBox& rhofab = (*rhomf)[pti];
- Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector());
- Box grown_box;
- const std::array<Real, 3>& xyzmin = xyzmin_tile;
- tile_box.grow(ngRho);
- local_rho.resize(tile_box);
- local_rho = 0.0;
- data_ptr = local_rho.dataPtr();
- rholen = local_rho.length();
-
-#if (AMREX_SPACEDIM == 3)
- const long nx = rholen[0]-1-2*ngRho;
- const long ny = rholen[1]-1-2*ngRho;
- const long nz = rholen[2]-1-2*ngRho;
-#else
- const long nx = rholen[0]-1-2*ngRho;
- const long ny = 0;
- const long nz = rholen[1]-1-2*ngRho;
-#endif
- warpx_charge_deposition(data_ptr, &np,
- xp.data(), yp.data(), zp.data(), wp.data(),
- &this->charge,
- &xyzmin[0], &xyzmin[1], &xyzmin[2],
- &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
- &ngRho, &ngRho, &ngRho,
- &WarpX::nox,&WarpX::noy,&WarpX::noz,
- &lvect, &WarpX::charge_deposition_algo);
-
- const int ncomp = 1;
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho),
- BL_TO_FORTRAN_N_3D(rhofab,icomp), ncomp);
- };
-
- if (rho) depositCharge(rho,0);
+ if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev);
//
// Particle Push
//
BL_PROFILE_VAR_START(blp_pxr_pp);
+ // Find the coordinates of the particles in the emission plane
+ calculate_laser_plane_coordinates( &np,
+ m_xp[thread_num].dataPtr(),
+ m_yp[thread_num].dataPtr(),
+ m_zp[thread_num].dataPtr(),
+ plane_Xp.dataPtr(), plane_Yp.dataPtr(),
+ &u_X[0], &u_X[1], &u_X[2], &u_Y[0], &u_Y[1], &u_Y[2],
+ &position[0], &position[1], &position[2] );
// Calculate the laser amplitude to be emitted,
// at the position of the emission plane
if (profile == laser_t::Gaussian) {
- warpx_gaussian_laser( &np, plane_Xp.data(), plane_Yp.data(),
+ warpx_gaussian_laser( &np, plane_Xp.dataPtr(), plane_Yp.dataPtr(),
&t_lab, &wavelength, &e_max, &profile_waist, &profile_duration,
- &profile_t_peak, &profile_focal_distance, amplitude_E.data(),
+ &profile_t_peak, &profile_focal_distance, amplitude_E.dataPtr(),
&zeta, &beta, &phi2, &theta_stc );
}
if (profile == laser_t::Harris) {
- warpx_harris_laser( &np, plane_Xp.data(), plane_Yp.data(),
+ warpx_harris_laser( &np, plane_Xp.dataPtr(), plane_Yp.dataPtr(),
&t, &wavelength, &e_max, &profile_waist, &profile_duration,
- &profile_focal_distance, amplitude_E.data() );
+ &profile_focal_distance, amplitude_E.dataPtr() );
}
if (profile == laser_t::parse_field_function) {
- parse_function_laser( &np, plane_Xp.data(), plane_Yp.data(), &t,
- amplitude_E.data(), parser_instance_number );
+ parse_function_laser( &np, plane_Xp.dataPtr(), plane_Yp.dataPtr(), &t,
+ amplitude_E.dataPtr(), parser_instance_number );
}
-
// Calculate the corresponding momentum and position for the particles
- for (int i = 0; i < np; ++i)
- {
- // Calculate the velocity according to the amplitude of E
- Real sign_charge = std::copysign( 1.0, wp[i] );
- Real v_over_c = sign_charge * mobility * amplitude_E[i];
- AMREX_ALWAYS_ASSERT_WITH_MESSAGE( v_over_c < 1,
- "The laser particles have to move unphysically in order to emit the laser.");
- // The velocity is along the laser polarization p_X
- Real vx = PhysConst::c * v_over_c * p_X[0];
- Real vy = PhysConst::c * v_over_c * p_X[1];
- Real vz = PhysConst::c * v_over_c * p_X[2];
- // When running in the boosted-frame, their is additional
- // velocity along nvec
- if (WarpX::gamma_boost > 1.) {
- vx -= PhysConst::c * WarpX::beta_boost * nvec[0];
- vy -= PhysConst::c * WarpX::beta_boost * nvec[1];
- vz -= PhysConst::c * WarpX::beta_boost * nvec[2];
- }
- // Get the corresponding momenta
- giv[i] = std::sqrt(1 - std::pow(v_over_c,2))/WarpX::gamma_boost;
- Real gamma = 1./giv[i];
- uxp[i] = gamma * vx;
- uyp[i] = gamma * vy;
- uzp[i] = gamma * vz;
- // Push the the particle positions
- xp[i] += vx * dt;
-#if (AMREX_SPACEDIM == 3)
- yp[i] += vy * dt;
-#endif
- zp[i] += vz * dt;
- }
-
+ update_laser_particle(
+ &np,
+ m_xp[thread_num].dataPtr(),
+ m_yp[thread_num].dataPtr(),
+ m_zp[thread_num].dataPtr(),
+ uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+ m_giv[thread_num].dataPtr(),
+ wp.dataPtr(), amplitude_E.dataPtr(), &p_X[0], &p_X[1], &p_X[2],
+ &nvec[0], &nvec[1], &nvec[2], &mobility, &dt,
+ &PhysConst::c, &WarpX::beta_boost, &WarpX::gamma_boost );
BL_PROFILE_VAR_STOP(blp_pxr_pp);
//
// Current Deposition
//
- BL_PROFILE_VAR_START(blp_pxr_cd);
- Real *jx_ptr, *jy_ptr, *jz_ptr;
- const int *jxntot, *jyntot, *jzntot;
- Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag);
- Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag);
- Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag);
-
- const std::array<Real, 3>& xyzmin = xyzmin_tile;
-
- tbx.grow(ngJ);
- tby.grow(ngJ);
- tbz.grow(ngJ);
-
- local_jx.resize(tbx);
- local_jy.resize(tby);
- local_jz.resize(tbz);
-
- local_jx = 0.0;
- local_jy = 0.0;
- local_jz = 0.0;
-
- jx_ptr = local_jx.dataPtr();
- jy_ptr = local_jy.dataPtr();
- jz_ptr = local_jz.dataPtr();
-
- jxntot = local_jx.length();
- jyntot = local_jy.length();
- jzntot = local_jz.length();
-
- warpx_current_deposition(
- jx_ptr, &ngJ, jxntot,
- jy_ptr, &ngJ, jyntot,
- jz_ptr, &ngJ, jzntot,
- &np, xp.data(), yp.data(), zp.data(),
- uxp.data(), uyp.data(), uzp.data(),
- giv.data(), wp.data(), &this->charge,
- &xyzmin[0], &xyzmin[1], &xyzmin[2],
- &dt, &dx[0], &dx[1], &dx[2],
- &WarpX::nox,&WarpX::noy,&WarpX::noz,
- &lvect,&WarpX::current_deposition_algo);
-
- const int ncomp = 1;
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jx),
- BL_TO_FORTRAN_3D(jxfab), ncomp);
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jy),
- BL_TO_FORTRAN_3D(jyfab), ncomp);
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jz),
- BL_TO_FORTRAN_3D(jzfab), ncomp);
-
- BL_PROFILE_VAR_STOP(blp_pxr_cd);
-
- if (rho) depositCharge(rho,1);
+ DepositCurrent(pti, wp, uxp, uyp, uzp, jx, jy, jz,
+ cjx, cjy, cjz, np_current, np, thread_num, lev, dt);
//
// copy particle data back
//
BL_PROFILE_VAR_START(blp_copy);
- pti.SetPosition(xp, yp, zp);
+ pti.SetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
BL_PROFILE_VAR_STOP(blp_copy);
+ if (rho) DepositCharge(pti, wp, rho, crho, 1, np_current, np, thread_num, lev);
+
if (cost) {
const Box& tbx = pti.tilebox();
wt = (amrex::second() - wt) / tbx.d_numPts();
- (*cost)[pti].plus(wt, tbx);
+ FArrayBox* costfab = cost->fabPtr(pti);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box,
+ {
+ costfab->plus(wt, work_box);
+ });
}
}
}
diff --git a/Source/Make.WarpX b/Source/Make.WarpX
index 9dfe667e0..e5da17e93 100644
--- a/Source/Make.WarpX
+++ b/Source/Make.WarpX
@@ -5,6 +5,13 @@ OPENBC_HOME ?= ../../../openbc_poisson
USE_MPI = TRUE
USE_PARTICLES = TRUE
+ifeq ($(USE_GPU),TRUE)
+ USE_OMP = FALSE
+ USE_CUDA = TRUE
+ USE_ACC = TRUE
+ NVCC_HOST_COMP = gcc
+endif
+
include $(AMREX_HOME)/Tools/GNUMake/Make.defs
ifndef USE_PYTHON_MAIN
@@ -55,8 +62,13 @@ ifeq ($(USE_PSATD),TRUE)
endif
USERSuffix += .PSATD
DEFINES += -DWARPX_USE_PSATD
- DEFINES += -DFFTW # PICSAR uses it
- LIBRARIES += -lfftw3_mpi -lfftw3 -lfftw3_omp
+ ifeq ($(USE_CUDA),TRUE)
+ DEFINES += -DFFTW -DCUDA_FFT=1 # PICSAR uses it
+ LIBRARIES += -lfftw3_mpi -lfftw3 -lfftw3_threads -lcufft
+ else
+ DEFINES += -DFFTW # PICSAR uses it
+ LIBRARIES += -lfftw3_mpi -lfftw3 -lfftw3_threads
+ endif
endif
ifeq ($(STORE_OLD_PARTICLE_ATTRIBS),TRUE)
diff --git a/Source/Make.package b/Source/Make.package
index f2adfc07c..f55e12297 100644
--- a/Source/Make.package
+++ b/Source/Make.package
@@ -1,8 +1,11 @@
ifneq ($(USE_PYTHON_MAIN),TRUE)
CEXE_sources += main.cpp
+else
+ CEXE_sources += WarpXWrappers.cpp
+ CEXE_headers += WarpXWrappers.h
endif
-CEXE_sources += WarpXWrappers.cpp WarpX_py.cpp
+CEXE_sources += WarpX_py.cpp
CEXE_sources += WarpX.cpp WarpXInitData.cpp WarpXEvolve.cpp WarpXIO.cpp WarpXProb.cpp WarpXRegrid.cpp
CEXE_sources += WarpXTagging.cpp WarpXComm.cpp WarpXMove.cpp WarpXBoostedFrameDiagnostic.cpp
@@ -10,7 +13,7 @@ CEXE_sources += WarpXTagging.cpp WarpXComm.cpp WarpXMove.cpp WarpXBoostedFrameDi
CEXE_sources += ParticleIO.cpp
CEXE_sources += ParticleContainer.cpp WarpXParticleContainer.cpp PhysicalParticleContainer.cpp LaserParticleContainer.cpp RigidInjectedParticleContainer.cpp
-CEXE_headers += WarpXWrappers.h WarpX_py.H
+CEXE_headers += WarpX_py.H
CEXE_headers += WarpX.H WarpX_f.H WarpXConst.H WarpXBoostedFrameDiagnostic.H
CEXE_sources += WarpXConst.cpp
diff --git a/Source/ParticleContainer.H b/Source/ParticleContainer.H
index 026ba90a5..9f732f355 100644
--- a/Source/ParticleContainer.H
+++ b/Source/ParticleContainer.H
@@ -139,6 +139,8 @@ public:
void WriteHeader (std::ostream& os) const;
+ void SortParticlesByCell ();
+
void Redistribute ();
void RedistributeLocal (const int num_ghost);
@@ -160,10 +162,6 @@ public:
return r;
}
- int L_lower_order_in_v() {return l_lower_order_in_v;}
-
- bool Use_fdtd_nci_corr() {return use_fdtd_nci_corr;}
-
void GetLabFrameData(const std::string& snapshot_name,
const int i_lab, const int direction,
const amrex::Real z_old, const amrex::Real z_new,
@@ -202,9 +200,5 @@ private:
// runtime parameters
int nspecies = 1; // physical particles only. If WarpX::use_laser, nspecies+1 == allcontainers.size().
-
- int l_lower_order_in_v = true;
-
- bool use_fdtd_nci_corr = false;
};
#endif /*WARPX_ParticleContainer_H_*/
diff --git a/Source/ParticleContainer.cpp b/Source/ParticleContainer.cpp
index 75c13d793..8aac692d6 100644
--- a/Source/ParticleContainer.cpp
+++ b/Source/ParticleContainer.cpp
@@ -69,8 +69,8 @@ MultiParticleContainer::ReadParameters ()
}
}
}
- pp.query("use_fdtd_nci_corr", use_fdtd_nci_corr);
- pp.query("l_lower_order_in_v", l_lower_order_in_v);
+ pp.query("use_fdtd_nci_corr", WarpX::use_fdtd_nci_corr);
+ pp.query("l_lower_order_in_v", WarpX::l_lower_order_in_v);
initialized = true;
}
}
@@ -261,6 +261,14 @@ MultiParticleContainer::GetChargeDensity (int lev, bool local)
}
void
+MultiParticleContainer::SortParticlesByCell ()
+{
+ for (auto& pc : allcontainers) {
+ pc->SortParticlesByCell();
+ }
+}
+
+void
MultiParticleContainer::Redistribute ()
{
for (auto& pc : allcontainers) {
diff --git a/Source/PhysicalParticleContainer.H b/Source/PhysicalParticleContainer.H
index 52e59713b..bd6015437 100644
--- a/Source/PhysicalParticleContainer.H
+++ b/Source/PhysicalParticleContainer.H
@@ -61,10 +61,10 @@ public:
amrex::Real dt) override;
virtual void PushPX(WarpXParIter& pti,
- amrex::Vector<amrex::Real>& xp,
- amrex::Vector<amrex::Real>& yp,
- amrex::Vector<amrex::Real>& zp,
- amrex::Vector<amrex::Real>& giv,
+ amrex::Cuda::DeviceVector<amrex::Real>& xp,
+ amrex::Cuda::DeviceVector<amrex::Real>& yp,
+ amrex::Cuda::DeviceVector<amrex::Real>& zp,
+ amrex::Cuda::DeviceVector<amrex::Real>& giv,
amrex::Real dt);
virtual void PushP (int lev, amrex::Real dt,
@@ -80,6 +80,10 @@ public:
// Inject particles in Box 'part_box'
virtual void AddParticles (int lev);
void AddPlasma(int lev, amrex::RealBox part_realbox = amrex::RealBox());
+ void AddPlasmaCPU (int lev, amrex::RealBox part_realbox);
+#ifdef AMREX_USE_GPU
+ void AddPlasmaGPU (int lev, amrex::RealBox part_realbox);
+#endif
void MapParticletoBoostedFrame(amrex::Real& x, amrex::Real& y, amrex::Real& z, std::array<amrex::Real, 3>& u);
@@ -105,9 +109,14 @@ protected:
bool boost_adjust_transverse_positions = false;
bool do_backward_propagation = false;
+ long NumParticlesToAdd (const amrex::Box& overlap_box,
+ const amrex::RealBox& overlap_realbox,
+ const amrex::RealBox& tile_real_box,
+ const amrex::RealBox& particle_real_box);
+
int GetRefineFac(const amrex::Real x, const amrex::Real y, const amrex::Real z);
std::unique_ptr<amrex::IArrayBox> m_refined_injection_mask = nullptr;
-
+
};
#endif
diff --git a/Source/PhysicalParticleContainer.cpp b/Source/PhysicalParticleContainer.cpp
index df0ee3b3c..c4fdd4464 100644
--- a/Source/PhysicalParticleContainer.cpp
+++ b/Source/PhysicalParticleContainer.cpp
@@ -10,6 +10,62 @@
using namespace amrex;
+long PhysicalParticleContainer::
+NumParticlesToAdd(const Box& overlap_box, const RealBox& overlap_realbox,
+ const RealBox& tile_realbox, const RealBox& particle_real_box)
+{
+ const int lev = 0;
+ const Geometry& geom = Geom(lev);
+ int num_ppc = plasma_injector->num_particles_per_cell;
+ const Real* dx = geom.CellSize();
+
+ long np = 0;
+ const auto& overlap_corner = overlap_realbox.lo();
+ for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv))
+ {
+ int fac;
+ if (injected) {
+#if ( AMREX_SPACEDIM == 3 )
+ Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
+ Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
+ Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
+#elif ( AMREX_SPACEDIM == 2 )
+ Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
+ Real y = 0;
+ Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
+#endif
+ fac = GetRefineFac(x, y, z);
+ } else {
+ fac = 1.0;
+ }
+
+ int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
+ for (int i_part=0; i_part<ref_num_ppc;i_part++) {
+ std::array<Real, 3> r;
+ plasma_injector->getPositionUnitBox(r, i_part, fac);
+#if ( AMREX_SPACEDIM == 3 )
+ Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
+ Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
+ Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
+#elif ( AMREX_SPACEDIM == 2 )
+ Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
+ Real y = 0;
+ Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
+#endif
+ // If the new particle is not inside the tile box,
+ // go to the next generated particle.
+#if ( AMREX_SPACEDIM == 3 )
+ if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
+#elif ( AMREX_SPACEDIM == 2 )
+ if(!tile_realbox.contains( RealVect{x, z} )) continue;
+#endif
+ ++np;
+ }
+ }
+
+ return np;
+}
+
PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int ispecies,
const std::string& name)
: WarpXParticleContainer(amr_core, ispecies),
@@ -22,7 +78,7 @@ PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int isp
ParmParse pp(species_name);
pp.query("boost_adjust_transverse_positions", boost_adjust_transverse_positions);
- pp.query("do_backward_propagation", do_backward_propagation);
+ pp.query("do_backward_propagation", do_backward_propagation);
}
void PhysicalParticleContainer::InitData()
@@ -174,9 +230,19 @@ PhysicalParticleContainer::AddParticles (int lev)
* but its boundaries need not be aligned with the actual cells of the simulation)
*/
void
-PhysicalParticleContainer::AddPlasma(int lev, RealBox part_realbox)
+PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
{
- BL_PROFILE("PhysicalParticleContainer::AddPlasma");
+#ifdef AMREX_USE_GPU
+ AddPlasmaGPU(lev, part_realbox);
+#else
+ AddPlasmaCPU(lev, part_realbox);
+#endif
+}
+
+void
+PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox)
+{
+ BL_PROFILE("PhysicalParticleContainer::AddPlasmaCPU");
// If no part_realbox is provided, initialize particles in the whole domain
const Geometry& geom = Geom(lev);
@@ -365,18 +431,264 @@ PhysicalParticleContainer::AddPlasma(int lev, RealBox part_realbox)
attribs[PIdx::uzold] = u[2];
#endif
- AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs);
+ AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs);
}
}
if (cost) {
- wt = (amrex::second() - wt) / tile_box.d_numPts();
- (*cost)[mfi].plus(wt, tile_box);
+ wt = (amrex::second() - wt) / tile_box.d_numPts();
+ FArrayBox* costfab = cost->fabPtr(mfi);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tile_box, work_box,
+ {
+ costfab->plus(wt, work_box);
+ });
}
}
}
}
+#ifdef AMREX_USE_GPU
+void
+PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox)
+{
+ BL_PROFILE("PhysicalParticleContainer::AddPlasmaGPU");
+
+ // If no part_realbox is provided, initialize particles in the whole domain
+ const Geometry& geom = Geom(lev);
+ if (!part_realbox.ok()) part_realbox = geom.ProbDomain();
+
+ int num_ppc = plasma_injector->num_particles_per_cell;
+
+ const Real* dx = geom.CellSize();
+
+ Real scale_fac;
+#if AMREX_SPACEDIM==3
+ scale_fac = dx[0]*dx[1]*dx[2]/num_ppc;
+#elif AMREX_SPACEDIM==2
+ scale_fac = dx[0]*dx[1]/num_ppc;
+#endif
+
+#ifdef _OPENMP
+ // First touch all tiles in the map in serial
+ for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) {
+ const int grid_id = mfi.index();
+ const int tile_id = mfi.LocalTileIndex();
+ GetParticles(lev)[std::make_pair(grid_id, tile_id)];
+ }
+#endif
+
+ MultiFab* cost = WarpX::getCosts(lev);
+
+ if ( (not m_refined_injection_mask) and WarpX::do_moving_window)
+ {
+ Box mask_box = geom.Domain();
+ mask_box.setSmall(WarpX::moving_window_dir, 0);
+ mask_box.setBig(WarpX::moving_window_dir, 0);
+ m_refined_injection_mask.reset( new IArrayBox(mask_box));
+ m_refined_injection_mask->setVal(-1);
+ }
+
+ MFItInfo info;
+ if (do_tiling) {
+ info.EnableTiling(tile_size);
+ }
+ info.SetDynamic(true);
+
+#ifdef _OPENMP
+#pragma omp parallel if (not WarpX::serialize_ics)
+#endif
+ {
+ std::array<Real,PIdx::nattribs> attribs;
+ attribs.fill(0.0);
+
+ // Loop through the tiles
+ for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) {
+
+ Real wt = amrex::second();
+
+ const Box& tile_box = mfi.tilebox();
+ const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev);
+
+ // Find the cells of part_box that overlap with tile_realbox
+ // If there is no overlap, just go to the next tile in the loop
+ RealBox overlap_realbox;
+ Box overlap_box;
+ Real ncells_adjust;
+ bool no_overlap = 0;
+
+ for (int dir=0; dir<AMREX_SPACEDIM; dir++) {
+ if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) {
+ ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] );
+ overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]);
+ } else {
+ no_overlap = 1; break;
+ }
+ if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) {
+ ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] );
+ overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]);
+ } else {
+ no_overlap = 1; break;
+ }
+ // Count the number of cells in this direction in overlap_realbox
+ overlap_box.setSmall( dir, 0 );
+ overlap_box.setBig( dir,
+ int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
+ }
+ if (no_overlap == 1) {
+ continue; // Go to the next tile
+ }
+
+ const int grid_id = mfi.index();
+ const int tile_id = mfi.LocalTileIndex();
+
+ Cuda::HostVector<ParticleType> host_particles;
+ std::array<Cuda::HostVector<Real>, PIdx::nattribs> host_attribs;
+
+ // Loop through the cells of overlap_box and inject
+ // the corresponding particles
+ const auto& overlap_corner = overlap_realbox.lo();
+ for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv))
+ {
+ int fac;
+ if (injected) {
+#if ( AMREX_SPACEDIM == 3 )
+ Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
+ Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
+ Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
+#elif ( AMREX_SPACEDIM == 2 )
+ Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
+ Real y = 0;
+ Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
+#endif
+ fac = GetRefineFac(x, y, z);
+ } else {
+ fac = 1.0;
+ }
+
+ int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
+ for (int i_part=0; i_part<ref_num_ppc;i_part++) {
+ std::array<Real, 3> r;
+ plasma_injector->getPositionUnitBox(r, i_part, fac);
+#if ( AMREX_SPACEDIM == 3 )
+ Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
+ Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
+ Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
+#elif ( AMREX_SPACEDIM == 2 )
+ Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
+ Real y = 0;
+ Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
+#endif
+ // If the new particle is not inside the tile box,
+ // go to the next generated particle.
+#if ( AMREX_SPACEDIM == 3 )
+ if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
+#elif ( AMREX_SPACEDIM == 2 )
+ if(!tile_realbox.contains( RealVect{x, z} )) continue;
+#endif
+
+ Real dens;
+ std::array<Real, 3> u;
+ if (WarpX::gamma_boost == 1.){
+ // Lab-frame simulation
+ // If the particle is not within the species's
+ // xmin, xmax, ymin, ymax, zmin, zmax, go to
+ // the next generated particle.
+ if (!plasma_injector->insideBounds(x, y, z)) continue;
+ plasma_injector->getMomentum(u, x, y, z);
+ dens = plasma_injector->getDensity(x, y, z);
+ } else {
+ // Boosted-frame simulation
+ Real c = PhysConst::c;
+ Real gamma_boost = WarpX::gamma_boost;
+ Real beta_boost = WarpX::beta_boost;
+ // Since the user provides the density distribution
+ // at t_lab=0 and in the lab-frame coordinates,
+ // we need to find the lab-frame position of this
+ // particle at t_lab=0, from its boosted-frame coordinates
+ // Assuming ballistic motion, this is given by:
+ // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
+ // where betaz_lab is the speed of the particle in the lab frame
+ //
+ // In order for this equation to be solvable, betaz_lab
+ // is explicitly assumed to have no dependency on z0_lab
+ plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
+ // At this point u is the lab-frame momentum
+ // => Apply the above formula for z0_lab
+ Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
+ Real betaz_lab = u[2]/gamma_lab/c;
+ Real t = WarpX::GetInstance().gett_new(lev);
+ Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
+ // If the particle is not within the lab-frame zmin, zmax, etc.
+ // go to the next generated particle.
+ if (!plasma_injector->insideBounds(x, y, z0_lab)) continue;
+ // call `getDensity` with lab-frame parameters
+ dens = plasma_injector->getDensity(x, y, z0_lab);
+ // At this point u and dens are the lab-frame quantities
+ // => Perform Lorentz transform
+ dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
+ u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
+ }
+ attribs[PIdx::w ] = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac));
+ attribs[PIdx::ux] = u[0];
+ attribs[PIdx::uy] = u[1];
+ attribs[PIdx::uz] = u[2];
+
+#ifdef WARPX_STORE_OLD_PARTICLE_ATTRIBS
+ attribs[PIdx::xold] = x;
+ attribs[PIdx::yold] = y;
+ attribs[PIdx::zold] = z;
+
+ attribs[PIdx::uxold] = u[0];
+ attribs[PIdx::uyold] = u[1];
+ attribs[PIdx::uzold] = u[2];
+#endif
+
+ ParticleType p;
+ p.id() = ParticleType::NextID();
+ p.cpu() = ParallelDescriptor::MyProc();
+#if (AMREX_SPACEDIM == 3)
+ p.pos(0) = x;
+ p.pos(1) = y;
+ p.pos(2) = z;
+#elif (AMREX_SPACEDIM == 2)
+ p.pos(0) = x;
+ p.pos(1) = z;
+#endif
+
+ host_particles.push_back(p);
+ for (int kk = 0; kk < PIdx::nattribs; ++kk)
+ host_attribs[kk].push_back(attribs[kk]);
+ }
+ }
+
+ auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)];
+ auto old_size = particle_tile.GetArrayOfStructs().size();
+ auto new_size = old_size + host_particles.size();
+ particle_tile.resize(new_size);
+
+ thrust::copy(host_particles.begin(),
+ host_particles.end(),
+ particle_tile.GetArrayOfStructs().begin() + old_size);
+
+ for (int kk = 0; kk < PIdx::nattribs; ++kk) {
+ thrust::copy(host_attribs[kk].begin(),
+ host_attribs[kk].end(),
+ particle_tile.GetStructOfArrays().GetRealData(kk).begin() + old_size);
+ }
+
+ if (cost) {
+ wt = (amrex::second() - wt) / tile_box.d_numPts();
+ FArrayBox* costfab = cost->fabPtr(mfi);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tile_box, work_box,
+ {
+ costfab->plus(wt, work_box);
+ });
+ }
+ }
+ }
+}
+#endif
+
#ifdef WARPX_DO_ELECTROSTATIC
void
PhysicalParticleContainer::
@@ -425,10 +737,10 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>,
const FArrayBox& ezfab = (*E[lev][2])[pti];
#endif
- WRPX_INTERPOLATE_CIC(particles.data(), nstride, np,
- Exp.data(), Eyp.data(),
+ WRPX_INTERPOLATE_CIC(particles.dataPtr(), nstride, np,
+ Exp.dataPtr(), Eyp.dataPtr(),
#if AMREX_SPACEDIM == 3
- Ezp.data(),
+ Ezp.dataPtr(),
#endif
exfab.dataPtr(), eyfab.dataPtr(),
#if AMREX_SPACEDIM == 3
@@ -495,10 +807,10 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>,
#endif
if (lev == 0) {
- WRPX_INTERPOLATE_CIC(particles.data(), nstride, np,
- Exp.data(), Eyp.data(),
+ WRPX_INTERPOLATE_CIC(particles.dataPtr(), nstride, np,
+ Exp.dataPtr(), Eyp.dataPtr(),
#if AMREX_SPACEDIM == 3
- Ezp.data(),
+ Ezp.dataPtr(),
#endif
exfab.dataPtr(), eyfab.dataPtr(),
#if AMREX_SPACEDIM == 3
@@ -515,10 +827,10 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>,
const Box& coarse_box = coarsened_fine_BA[pti];
const Real* coarse_dx = Geom(0).CellSize();
- WRPX_INTERPOLATE_CIC_TWO_LEVELS(particles.data(), nstride, np,
- Exp.data(), Eyp.data(),
+ WRPX_INTERPOLATE_CIC_TWO_LEVELS(particles.dataPtr(), nstride, np,
+ Exp.dataPtr(), Eyp.dataPtr(),
#if AMREX_SPACEDIM == 3
- Ezp.data(),
+ Ezp.dataPtr(),
#endif
exfab.dataPtr(), eyfab.dataPtr(),
#if AMREX_SPACEDIM == 3
@@ -573,14 +885,14 @@ PhysicalParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<Mul
//
// Particle Push
//
- WRPX_PUSH_LEAPFROG(particles.data(), nstride, np,
- uxp.data(), uyp.data(),
+ WRPX_PUSH_LEAPFROG(particles.dataPtr(), nstride, np,
+ uxp.dataPtr(), uyp.dataPtr(),
#if AMREX_SPACEDIM == 3
- uzp.data(),
+ uzp.dataPtr(),
#endif
- Exp.data(), Eyp.data(),
+ Exp.dataPtr(), Eyp.dataPtr(),
#if AMREX_SPACEDIM == 3
- Ezp.data(),
+ Ezp.dataPtr(),
#endif
&this->charge, &this->mass, &dt,
prob_domain.lo(), prob_domain.hi());
@@ -607,7 +919,7 @@ PhysicalParticleContainer::FieldGather (int lev,
#pragma omp parallel
#endif
{
- Vector<Real> xp, yp, zp;
+ Cuda::DeviceVector<Real> xp, yp, zp;
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
@@ -653,12 +965,14 @@ PhysicalParticleContainer::FieldGather (int lev,
// Field Gather
//
const int ll4symtry = false;
- const int l_lower_order_in_v = warpx_l_lower_order_in_v();
long lvect_fieldgathe = 64;
warpx_geteb_energy_conserving(
- &np, xp.data(), yp.data(), zp.data(),
- Exp.data(),Eyp.data(),Ezp.data(),
- Bxp.data(),Byp.data(),Bzp.data(),
+ &np,
+ xp.dataPtr(),
+ yp.dataPtr(),
+ zp.dataPtr(),
+ Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
+ Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
ixyzmin,
&xyzmin[0], &xyzmin[1], &xyzmin[2],
&dx[0], &dx[1], &dx[2],
@@ -669,13 +983,17 @@ PhysicalParticleContainer::FieldGather (int lev,
BL_TO_FORTRAN_ANYD(bxfab),
BL_TO_FORTRAN_ANYD(byfab),
BL_TO_FORTRAN_ANYD(bzfab),
- &ll4symtry, &l_lower_order_in_v,
+ &ll4symtry, &WarpX::l_lower_order_in_v,
&lvect_fieldgathe, &WarpX::field_gathering_algo);
if (cost) {
const Box& tbx = pti.tilebox();
wt = (amrex::second() - wt) / tbx.d_numPts();
- (*cost)[pti].plus(wt, tbx);
+ FArrayBox* costfab = cost->fabPtr(pti);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box,
+ {
+ costfab->plus(wt, work_box);
+ });
}
}
}
@@ -696,20 +1014,15 @@ PhysicalParticleContainer::Evolve (int lev,
BL_PROFILE_VAR_NS("PPC::Evolve::Copy", blp_copy);
BL_PROFILE_VAR_NS("PICSAR::FieldGather", blp_pxr_fg);
BL_PROFILE_VAR_NS("PICSAR::ParticlePush", blp_pxr_pp);
- BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd);
- BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
BL_PROFILE_VAR_NS("PPC::Evolve::partition", blp_partition);
-
+
const std::array<Real,3>& dx = WarpX::CellSize(lev);
const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0));
const auto& mypc = WarpX::GetInstance().GetPartContainer();
const int nstencilz_fdtd_nci_corr = mypc.nstencilz_fdtd_nci_corr;
- // WarpX assumes the same number of guard cells for Jx, Jy, Jz
- long ngJ = jx.nGrow();
-
- BL_ASSERT(OnSameGrids(lev,Ex));
+ BL_ASSERT(OnSameGrids(lev,jx));
MultiFab* cost = WarpX::getCosts(lev);
@@ -719,17 +1032,26 @@ PhysicalParticleContainer::Evolve (int lev,
bool has_buffer = cEx || cjx;
#ifdef _OPENMP
-#pragma omp parallel
+#pragma omp parallel
#endif
{
- Vector<Real> xp, yp, zp, giv;
- FArrayBox local_rho, local_jx, local_jy, local_jz;
+#ifdef _OPENMP
+ int thread_num = omp_get_thread_num();
+#else
+ int thread_num = 0;
+#endif
+
+ if (local_rho[thread_num] == nullptr) local_rho[thread_num].reset( new amrex::FArrayBox());
+ if (local_jx[thread_num] == nullptr) local_jx[thread_num].reset( new amrex::FArrayBox());
+ if (local_jy[thread_num] == nullptr) local_jy[thread_num].reset( new amrex::FArrayBox());
+ if (local_jz[thread_num] == nullptr) local_jz[thread_num].reset( new amrex::FArrayBox());
+
FArrayBox filtered_Ex, filtered_Ey, filtered_Ez;
FArrayBox filtered_Bx, filtered_By, filtered_Bz;
std::vector<bool> inexflag;
Vector<long> pid;
- Vector<Real> tmp;
- Vector<ParticleType> particle_tmp;
+ RealVector tmp;
+ ParticleVector particle_tmp;
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
@@ -760,7 +1082,7 @@ PhysicalParticleContainer::Evolve (int lev,
FArrayBox const* byfab = &(By[pti]);
FArrayBox const* bzfab = &(Bz[pti]);
- if (warpx_use_fdtd_nci_corr())
+ if (WarpX::use_fdtd_nci_corr)
{
#if (AMREX_SPACEDIM == 2)
const Box& tbox = amrex::grow(pti.tilebox(),{static_cast<int>(WarpX::nox),
@@ -823,10 +1145,6 @@ PhysicalParticleContainer::Evolve (int lev,
#endif
}
- FArrayBox& jxfab = jx[pti];
- FArrayBox& jyfab = jy[pti];
- FArrayBox& jzfab = jz[pti];
-
Exp.assign(np,0.0);
Eyp.assign(np,0.0);
Ezp.assign(np,0.0);
@@ -834,7 +1152,7 @@ PhysicalParticleContainer::Evolve (int lev,
Byp.assign(np,WarpX::B_external[1]);
Bzp.assign(np,WarpX::B_external[2]);
- giv.resize(np);
+ m_giv[thread_num].resize(np);
long nfine_current = np;
long nfine_gather = np;
@@ -933,121 +1251,33 @@ PhysicalParticleContainer::Evolve (int lev,
// copy data from particle container to temp arrays
//
BL_PROFILE_VAR_START(blp_copy);
- pti.GetPosition(xp, yp, zp);
+ pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
BL_PROFILE_VAR_STOP(blp_copy);
- const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev);
- const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
- const int* ixyzmin_grid = box.loVect();
-
- long lvect = 8;
-
- auto depositCharge = [&] (MultiFab* rhomf, MultiFab* crhomf, int icomp)
- {
- long ngRho = rhomf->nGrow();
- Real* data_ptr;
- Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector());
- const int *rholen;
-
- if (np_current > 0)
- {
- FArrayBox& rhofab = (*rhomf)[pti];
- const std::array<Real, 3>& xyzmin = xyzmin_tile;
- tile_box.grow(ngRho);
- local_rho.resize(tile_box);
- local_rho = 0.0;
- data_ptr = local_rho.dataPtr();
- rholen = local_rho.length();
-
-#if (AMREX_SPACEDIM == 3)
- const long nx = rholen[0]-1-2*ngRho;
- const long ny = rholen[1]-1-2*ngRho;
- const long nz = rholen[2]-1-2*ngRho;
-#else
- const long nx = rholen[0]-1-2*ngRho;
- const long ny = 0;
- const long nz = rholen[1]-1-2*ngRho;
-#endif
- warpx_charge_deposition(data_ptr, &np_current,
- xp.data(), yp.data(), zp.data(), wp.data(),
- &this->charge,
- &xyzmin[0], &xyzmin[1], &xyzmin[2],
- &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
- &ngRho, &ngRho, &ngRho,
- &WarpX::nox,&WarpX::noy,&WarpX::noz,
- &lvect, &WarpX::charge_deposition_algo);
-
- const int ncomp = 1;
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho),
- BL_TO_FORTRAN_N_3D(rhofab,icomp), ncomp);
- }
-
- if (np_current < np)
- {
- const IntVect& ref_ratio = WarpX::RefRatio(lev-1);
- const Box& ctilebox = amrex::coarsen(pti.tilebox(), ref_ratio);
- const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1);
-
- tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector());
- tile_box.grow(ngRho);
-
- local_rho.resize(tile_box);
-
- local_rho = 0.0;
-
- data_ptr = local_rho.dataPtr();
- rholen = local_rho.length();
-
-#if (AMREX_SPACEDIM == 3)
- const long nx = rholen[0]-1-2*ngRho;
- const long ny = rholen[1]-1-2*ngRho;
- const long nz = rholen[2]-1-2*ngRho;
-#else
- const long nx = rholen[0]-1-2*ngRho;
- const long ny = 0;
- const long nz = rholen[1]-1-2*ngRho;
-#endif
-
- long ncrse = np - nfine_current;
- warpx_charge_deposition(data_ptr, &ncrse,
- xp.data() + nfine_current,
- yp.data() + nfine_current,
- zp.data() + nfine_current,
- wp.data() + nfine_current,
- &this->charge,
- &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
- &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz,
- &ngRho, &ngRho, &ngRho,
- &WarpX::nox,&WarpX::noy,&WarpX::noz,
- &lvect, &WarpX::charge_deposition_algo);
-
- FArrayBox& crhofab = (*crhomf)[pti];
-
- const int ncomp = 1;
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho),
- BL_TO_FORTRAN_N_3D(crhofab,icomp), ncomp);
- }
- };
-
- if (rho) depositCharge(rho, crho, 0);
-
+ if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev);
+
if (! do_not_push)
{
//
// Field Gather of Aux Data (i.e., the full solution)
//
const int ll4symtry = false;
- const int l_lower_order_in_v = warpx_l_lower_order_in_v();
long lvect_fieldgathe = 64;
+ const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
+ const int* ixyzmin_grid = box.loVect();
+
const long np_gather = (cEx) ? nfine_gather : np;
BL_PROFILE_VAR_START(blp_pxr_fg);
warpx_geteb_energy_conserving(
- &np_gather, xp.data(), yp.data(), zp.data(),
- Exp.data(),Eyp.data(),Ezp.data(),
- Bxp.data(),Byp.data(),Bzp.data(),
+ &np_gather,
+ m_xp[thread_num].dataPtr(),
+ m_yp[thread_num].dataPtr(),
+ m_zp[thread_num].dataPtr(),
+ Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
+ Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
ixyzmin_grid,
&xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2],
&dx[0], &dx[1], &dx[2],
@@ -1058,7 +1288,7 @@ PhysicalParticleContainer::Evolve (int lev,
BL_TO_FORTRAN_ANYD(*bxfab),
BL_TO_FORTRAN_ANYD(*byfab),
BL_TO_FORTRAN_ANYD(*bzfab),
- &ll4symtry, &l_lower_order_in_v,
+ &ll4symtry, &WarpX::l_lower_order_in_v,
&lvect_fieldgathe, &WarpX::field_gathering_algo);
if (np_gather < np)
@@ -1075,7 +1305,7 @@ PhysicalParticleContainer::Evolve (int lev,
const FArrayBox* cbyfab = &(*cBy)[pti];
const FArrayBox* cbzfab = &(*cBz)[pti];
- if (warpx_use_fdtd_nci_corr())
+ if (WarpX::use_fdtd_nci_corr)
{
#if (AMREX_SPACEDIM == 2)
const Box& tbox = amrex::grow(cbox,{static_cast<int>(WarpX::nox),
@@ -1139,9 +1369,12 @@ PhysicalParticleContainer::Evolve (int lev,
long ncrse = np - nfine_gather;
warpx_geteb_energy_conserving(
- &ncrse, xp.data()+nfine_gather, yp.data()+nfine_gather, zp.data()+nfine_gather,
- Exp.data()+nfine_gather, Eyp.data()+nfine_gather, Ezp.data()+nfine_gather,
- Bxp.data()+nfine_gather, Byp.data()+nfine_gather, Bzp.data()+nfine_gather,
+ &ncrse,
+ m_xp[thread_num].dataPtr()+nfine_gather,
+ m_yp[thread_num].dataPtr()+nfine_gather,
+ m_zp[thread_num].dataPtr()+nfine_gather,
+ Exp.dataPtr()+nfine_gather, Eyp.dataPtr()+nfine_gather, Ezp.dataPtr()+nfine_gather,
+ Bxp.dataPtr()+nfine_gather, Byp.dataPtr()+nfine_gather, Bzp.dataPtr()+nfine_gather,
cixyzmin_grid,
&cxyzmin_grid[0], &cxyzmin_grid[1], &cxyzmin_grid[2],
&cdx[0], &cdx[1], &cdx[2],
@@ -1152,7 +1385,7 @@ PhysicalParticleContainer::Evolve (int lev,
BL_TO_FORTRAN_ANYD(*cbxfab),
BL_TO_FORTRAN_ANYD(*cbyfab),
BL_TO_FORTRAN_ANYD(*cbzfab),
- &ll4symtry, &l_lower_order_in_v,
+ &ll4symtry, &WarpX::l_lower_order_in_v,
&lvect_fieldgathe, &WarpX::field_gathering_algo);
}
@@ -1162,141 +1395,34 @@ PhysicalParticleContainer::Evolve (int lev,
// Particle Push
//
BL_PROFILE_VAR_START(blp_pxr_pp);
- PushPX(pti, xp, yp, zp, giv, dt);
+ PushPX(pti, m_xp[thread_num], m_yp[thread_num], m_zp[thread_num],
+ m_giv[thread_num], dt);
BL_PROFILE_VAR_STOP(blp_pxr_pp);
//
- // Current Deposition onto fine patch
+ // Current Deposition
//
-
- BL_PROFILE_VAR_START(blp_pxr_cd);
- Real *jx_ptr, *jy_ptr, *jz_ptr;
- const int *jxntot, *jyntot, *jzntot;
- Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag);
- Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag);
- Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag);
- Box gtbx, gtby, gtbz;
-
- const std::array<Real, 3>& xyzmin = xyzmin_tile;
-
- if (np_current > 0)
- {
- tbx.grow(ngJ);
- tby.grow(ngJ);
- tbz.grow(ngJ);
-
- local_jx.resize(tbx);
- local_jy.resize(tby);
- local_jz.resize(tbz);
-
- local_jx = 0.0;
- local_jy = 0.0;
- local_jz = 0.0;
-
- jx_ptr = local_jx.dataPtr();
- jy_ptr = local_jy.dataPtr();
- jz_ptr = local_jz.dataPtr();
-
- jxntot = local_jx.length();
- jyntot = local_jy.length();
- jzntot = local_jz.length();
-
- warpx_current_deposition(
- jx_ptr, &ngJ, jxntot,
- jy_ptr, &ngJ, jyntot,
- jz_ptr, &ngJ, jzntot,
- &np_current, xp.data(), yp.data(), zp.data(),
- uxp.data(), uyp.data(), uzp.data(),
- giv.data(), wp.data(), &this->charge,
- &xyzmin[0], &xyzmin[1], &xyzmin[2],
- &dt, &dx[0], &dx[1], &dx[2],
- &WarpX::nox,&WarpX::noy,&WarpX::noz,
- &lvect,&WarpX::current_deposition_algo);
-
- BL_PROFILE_VAR_STOP(blp_pxr_cd);
-
- BL_PROFILE_VAR_START(blp_accumulate);
- const int ncomp = 1;
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jx),
- BL_TO_FORTRAN_3D(jxfab), ncomp);
-
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jy),
- BL_TO_FORTRAN_3D(jyfab), ncomp);
-
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jz),
- BL_TO_FORTRAN_3D(jzfab), ncomp);
- BL_PROFILE_VAR_STOP(blp_accumulate);
- }
-
- if (np_current < np)
- {
- const IntVect& ref_ratio = WarpX::RefRatio(lev-1);
- const Box& ctilebox = amrex::coarsen(pti.tilebox(),ref_ratio);
- const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1);
-
- tbx = amrex::convert(ctilebox, WarpX::jx_nodal_flag);
- tby = amrex::convert(ctilebox, WarpX::jy_nodal_flag);
- tbz = amrex::convert(ctilebox, WarpX::jz_nodal_flag);
- tbx.grow(ngJ);
- tby.grow(ngJ);
- tbz.grow(ngJ);
-
- local_jx.resize(tbx);
- local_jy.resize(tby);
- local_jz.resize(tbz);
-
- local_jx = 0.0;
- local_jy = 0.0;
- local_jz = 0.0;
-
- jx_ptr = local_jx.dataPtr();
- jy_ptr = local_jy.dataPtr();
- jz_ptr = local_jz.dataPtr();
-
- jxntot = local_jx.length();
- jyntot = local_jy.length();
- jzntot = local_jz.length();
-
- long ncrse = np - nfine_current;
- warpx_current_deposition(
- jx_ptr, &ngJ, jxntot,
- jy_ptr, &ngJ, jyntot,
- jz_ptr, &ngJ, jzntot,
- &ncrse, xp.data()+nfine_current, yp.data()+nfine_current, zp.data()+nfine_current,
- uxp.data()+nfine_current, uyp.data()+nfine_current, uzp.data()+nfine_current,
- giv.data()+nfine_current, wp.data()+nfine_current, &this->charge,
- &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
- &dt, &cdx[0], &cdx[1], &cdx[2],
- &WarpX::nox,&WarpX::noy,&WarpX::noz,
- &lvect,&WarpX::current_deposition_algo);
-
- FArrayBox& cjxfab = (*cjx)[pti];
- FArrayBox& cjyfab = (*cjy)[pti];
- FArrayBox& cjzfab = (*cjz)[pti];
-
- const int ncomp = 1;
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jx),
- BL_TO_FORTRAN_3D(cjxfab), ncomp);
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jy),
- BL_TO_FORTRAN_3D(cjyfab), ncomp);
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jz),
- BL_TO_FORTRAN_3D(cjzfab), ncomp);
- }
-
+ DepositCurrent(pti, wp, uxp, uyp, uzp, jx, jy, jz,
+ cjx, cjy, cjz, np_current, np, thread_num, lev, dt);
+
//
// copy particle data back
//
BL_PROFILE_VAR_START(blp_copy);
- pti.SetPosition(xp, yp, zp);
+ pti.SetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
BL_PROFILE_VAR_STOP(blp_copy);
}
-
- if (rho) depositCharge(rho, crho, 1);
+
+ if (rho) DepositCharge(pti, wp, rho, crho, 1, np_current, np, thread_num, lev);
if (cost) {
const Box& tbx = pti.tilebox();
wt = (amrex::second() - wt) / tbx.d_numPts();
- (*cost)[pti].plus(wt, tbx);
+ FArrayBox* costfab = cost->fabPtr(pti);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box,
+ {
+ costfab->plus(wt, work_box);
+ });
}
}
}
@@ -1304,8 +1430,10 @@ PhysicalParticleContainer::Evolve (int lev,
void
PhysicalParticleContainer::PushPX(WarpXParIter& pti,
- Vector<Real>& xp, Vector<Real>& yp, Vector<Real>& zp,
- Vector<Real>& giv,
+ Cuda::DeviceVector<Real>& xp,
+ Cuda::DeviceVector<Real>& yp,
+ Cuda::DeviceVector<Real>& zp,
+ Cuda::DeviceVector<Real>& giv,
Real dt)
{
@@ -1330,15 +1458,19 @@ PhysicalParticleContainer::PushPX(WarpXParIter& pti,
auto& uypold = attribs[PIdx::uyold];
auto& uzpold = attribs[PIdx::uzold];
- warpx_copy_attribs(&np, xp.data(), yp.data(), zp.data(),
- uxp.data(), uyp.data(), uzp.data(),
- xpold.data(), ypold.data(), zpold.data(),
- uxpold.data(), uypold.data(), uzpold.data());
+ warpx_copy_attribs(&np, xp.dataPtr(), yp.dataPtr(), zp.dataPtr(),
+ uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+ xpold.dataPtr(), ypold.dataPtr(), zpold.dataPtr(),
+ uxpold.dataPtr(), uypold.dataPtr(), uzpold.dataPtr());
#endif
- warpx_particle_pusher(&np, xp.data(), yp.data(), zp.data(),
- uxp.data(), uyp.data(), uzp.data(), giv.data(),
+ warpx_particle_pusher(&np,
+ xp.dataPtr(),
+ yp.dataPtr(),
+ zp.dataPtr(),
+ uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+ giv.dataPtr(),
Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
&this->charge, &this->mass, &dt,
@@ -1351,6 +1483,8 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz)
{
+ BL_PROFILE("PhysicalParticleContainer::PushP");
+
if (do_not_push) return;
const std::array<Real,3>& dx = WarpX::CellSize(lev);
@@ -1359,8 +1493,11 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
#pragma omp parallel
#endif
{
- Vector<Real> xp, yp, zp, giv;
-
+#ifdef _OPENMP
+ int thread_num = omp_get_thread_num();
+#else
+ int thread_num = 0;
+#endif
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
const Box& box = pti.validbox();
@@ -1394,23 +1531,25 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
Byp.assign(np,WarpX::B_external[1]);
Bzp.assign(np,WarpX::B_external[2]);
- giv.resize(np);
+ m_giv[thread_num].resize(np);
//
// copy data from particle container to temp arrays
//
- pti.GetPosition(xp, yp, zp);
+ pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
const int* ixyzmin_grid = box.loVect();
const int ll4symtry = false;
- const int l_lower_order_in_v = true;
long lvect_fieldgathe = 64;
warpx_geteb_energy_conserving(
- &np, xp.data(), yp.data(), zp.data(),
- Exp.data(),Eyp.data(),Ezp.data(),
- Bxp.data(),Byp.data(),Bzp.data(),
+ &np,
+ m_xp[thread_num].dataPtr(),
+ m_yp[thread_num].dataPtr(),
+ m_zp[thread_num].dataPtr(),
+ Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
+ Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
ixyzmin_grid,
&xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2],
&dx[0], &dx[1], &dx[2],
@@ -1421,11 +1560,15 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
BL_TO_FORTRAN_ANYD(bxfab),
BL_TO_FORTRAN_ANYD(byfab),
BL_TO_FORTRAN_ANYD(bzfab),
- &ll4symtry, &l_lower_order_in_v,
+ &ll4symtry, &WarpX::l_lower_order_in_v,
&lvect_fieldgathe, &WarpX::field_gathering_algo);
- warpx_particle_pusher_momenta(&np, xp.data(), yp.data(), zp.data(),
- uxp.data(), uyp.data(), uzp.data(), giv.data(),
+ warpx_particle_pusher_momenta(&np,
+ m_xp[thread_num].dataPtr(),
+ m_yp[thread_num].dataPtr(),
+ m_zp[thread_num].dataPtr(),
+ uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+ m_giv[thread_num].dataPtr(),
Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
&this->charge, &this->mass, &dt,
@@ -1482,7 +1625,7 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real
#pragma omp parallel
#endif
{
- Vector<Real> xp_new, yp_new, zp_new;
+ RealVector xp_new, yp_new, zp_new;
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
@@ -1615,7 +1758,7 @@ int PhysicalParticleContainer::GetRefineFac(const Real x, const Real y, const Re
stretched_boxes.push_back(bx);
}
- BoxArray stretched_ba(stretched_boxes.data(), stretched_boxes.size());
+ BoxArray stretched_ba(stretched_boxes.dataPtr(), stretched_boxes.size());
const int num_ghost = 0;
if ( stretched_ba.intersects(Box(iv, iv), num_ghost) )
diff --git a/Source/RigidInjectedParticleContainer.H b/Source/RigidInjectedParticleContainer.H
index f2440ad7a..8a9ac9e6e 100644
--- a/Source/RigidInjectedParticleContainer.H
+++ b/Source/RigidInjectedParticleContainer.H
@@ -43,10 +43,10 @@ public:
amrex::Real dt) override;
virtual void PushPX(WarpXParIter& pti,
- amrex::Vector<amrex::Real>& xp,
- amrex::Vector<amrex::Real>& yp,
- amrex::Vector<amrex::Real>& zp,
- amrex::Vector<amrex::Real>& giv,
+ amrex::Cuda::DeviceVector<amrex::Real>& xp,
+ amrex::Cuda::DeviceVector<amrex::Real>& yp,
+ amrex::Cuda::DeviceVector<amrex::Real>& zp,
+ amrex::Cuda::DeviceVector<amrex::Real>& giv,
amrex::Real dt) override;
virtual void PushP (int lev, amrex::Real dt,
diff --git a/Source/RigidInjectedParticleContainer.cpp b/Source/RigidInjectedParticleContainer.cpp
index 96bdc59a4..eb2c1c4a8 100644
--- a/Source/RigidInjectedParticleContainer.cpp
+++ b/Source/RigidInjectedParticleContainer.cpp
@@ -73,7 +73,7 @@ RigidInjectedParticleContainer::RemapParticles()
// Note that the particles are already in the boosted frame.
// This value is saved to advance the particles not injected yet
- Vector<Real> xp, yp, zp;
+ Cuda::DeviceVector<Real> xp, yp, zp;
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
@@ -134,7 +134,7 @@ RigidInjectedParticleContainer::BoostandRemapParticles()
#pragma omp parallel
#endif
{
- Vector<Real> xp, yp, zp;
+ Cuda::DeviceVector<Real> xp, yp, zp;
for (WarpXParIter pti(*this, 0); pti.isValid(); ++pti)
{
@@ -205,8 +205,10 @@ RigidInjectedParticleContainer::BoostandRemapParticles()
void
RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
- Vector<Real>& xp, Vector<Real>& yp, Vector<Real>& zp,
- Vector<Real>& giv,
+ Cuda::DeviceVector<Real>& xp,
+ Cuda::DeviceVector<Real>& yp,
+ Cuda::DeviceVector<Real>& zp,
+ Cuda::DeviceVector<Real>& giv,
Real dt)
{
@@ -231,15 +233,16 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
auto& uypold = attribs[PIdx::uyold];
auto& uzpold = attribs[PIdx::uzold];
- warpx_copy_attribs(&np, xp.data(), yp.data(), zp.data(),
- uxp.data(), uyp.data(), uzp.data(),
- xpold.data(), ypold.data(), zpold.data(),
- uxpold.data(), uypold.data(), uzpold.data());
+ warpx_copy_attribs(&np, xp.dataPtr(), yp.dataPtr(), zp.dataPtr(),
+ uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+ xpold.dataPtr(), ypold.dataPtr(), zpold.dataPtr(),
+ uxpold.dataPtr(), uypold.dataPtr(), uzpold.dataPtr());
#endif
// Save the position and momenta, making copies
- Vector<Real> xp_save, yp_save, zp_save, uxp_save, uyp_save, uzp_save;
+ Cuda::DeviceVector<Real> xp_save, yp_save, zp_save;
+ RealVector uxp_save, uyp_save, uzp_save;
if (!done_injecting_lev) {
xp_save = xp;
@@ -265,8 +268,12 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
}
}
- warpx_particle_pusher(&np, xp.data(), yp.data(), zp.data(),
- uxp.data(), uyp.data(), uzp.data(), giv.data(),
+ warpx_particle_pusher(&np,
+ xp.dataPtr(),
+ yp.dataPtr(),
+ zp.dataPtr(),
+ uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+ giv.dataPtr(),
Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
&this->charge, &this->mass, &dt,
@@ -355,7 +362,7 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
#pragma omp parallel
#endif
{
- Vector<Real> xp, yp, zp, giv;
+ Cuda::DeviceVector<Real> xp, yp, zp, giv;
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
@@ -404,9 +411,12 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
const int l_lower_order_in_v = true;
long lvect_fieldgathe = 64;
warpx_geteb_energy_conserving(
- &np, xp.data(), yp.data(), zp.data(),
- Exp.data(),Eyp.data(),Ezp.data(),
- Bxp.data(),Byp.data(),Bzp.data(),
+ &np,
+ xp.dataPtr(),
+ yp.dataPtr(),
+ zp.dataPtr(),
+ Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
+ Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
ixyzmin_grid,
&xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2],
&dx[0], &dx[1], &dx[2],
@@ -425,8 +435,12 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
auto uyp_save = uyp;
auto uzp_save = uzp;
- warpx_particle_pusher_momenta(&np, xp.data(), yp.data(), zp.data(),
- uxp.data(), uyp.data(), uzp.data(), giv.data(),
+ warpx_particle_pusher_momenta(&np,
+ xp.dataPtr(),
+ yp.dataPtr(),
+ zp.dataPtr(),
+ uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+ giv.dataPtr(),
Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
&this->charge, &this->mass, &dt,
diff --git a/Source/WarpX.H b/Source/WarpX.H
index ec41115cd..08fe657b4 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -86,6 +86,9 @@ public:
static long noy;
static long noz;
+ static bool use_fdtd_nci_corr;
+ static int l_lower_order_in_v;
+
static bool use_laser;
static bool use_filter;
static bool serialize_ics;
@@ -105,6 +108,8 @@ public:
static bool do_dynamic_scheduling;
static bool refine_plasma;
+ static int sort_int;
+
// buffers
static int n_field_gather_buffer;
static int n_current_deposition_buffer;
@@ -139,9 +144,13 @@ public:
void EvolveB (int lev, amrex::Real dt);
void EvolveF ( amrex::Real dt, DtType dt_type);
void EvolveF (int lev, amrex::Real dt, DtType dt_type);
+ void EvolveB (int lev, PatchType patch_type, amrex::Real dt);
+ void EvolveE (int lev, PatchType patch_type, amrex::Real dt);
+ void EvolveF (int lev, PatchType patch_type, amrex::Real dt, DtType dt_type);
void DampPML ();
void DampPML (int lev);
+ void DampPML (int lev, PatchType patch_type);
void PushParticlesandDepose (int lev, amrex::Real cur_time);
void PushParticlesandDepose ( amrex::Real cur_time);
@@ -248,11 +257,6 @@ private:
void FillBoundaryE (int lev, PatchType patch_type);
void FillBoundaryF (int lev, PatchType patch_type);
- void EvolveB (int lev, PatchType patch_type, amrex::Real dt);
- void EvolveE (int lev, PatchType patch_type, amrex::Real dt);
- void EvolveF (int lev, PatchType patch_type, amrex::Real dt, DtType dt_type);
- void DampPML (int lev, PatchType patch_type);
-
void OneStep_nosub (amrex::Real t);
void OneStep_sub1 (amrex::Real t);
@@ -350,10 +354,18 @@ private:
const std::array<const amrex::MultiFab*, 3>& B,
const std::array<amrex::Real,3>& dx);
+ static void ComputeDivB (amrex::MultiFab& divB, int dcomp,
+ const std::array<const amrex::MultiFab*, 3>& B,
+ const std::array<amrex::Real,3>& dx, int ngrow);
+
static void ComputeDivE (amrex::MultiFab& divE, int dcomp,
const std::array<const amrex::MultiFab*, 3>& B,
const std::array<amrex::Real,3>& dx);
+ static void ComputeDivE (amrex::MultiFab& divE, int dcomp,
+ const std::array<const amrex::MultiFab*, 3>& B,
+ const std::array<amrex::Real,3>& dx, int ngrow);
+
void SyncCurrent (const std::array<const amrex::MultiFab*,3>& fine,
const std::array< amrex::MultiFab*,3>& crse,
int ref_ratio);
@@ -425,6 +437,13 @@ private:
amrex::Vector<std::array< std::unique_ptr<amrex::MultiFab>, 3 > > current_buf;
amrex::Vector<std::unique_ptr<amrex::MultiFab> > charge_buf;
+ amrex::Vector<std::array<std::unique_ptr<amrex::iMultiFab>, 3 > > current_fp_owner_masks;
+ amrex::Vector<std::array<std::unique_ptr<amrex::iMultiFab>, 3 > > current_cp_owner_masks;
+
+ amrex::Vector<std::unique_ptr<amrex::iMultiFab> > rho_fp_owner_masks;
+ amrex::Vector<std::unique_ptr<amrex::iMultiFab> > rho_cp_owner_masks;
+
+
// div E cleaning
int do_dive_cleaning = 0;
@@ -582,6 +601,7 @@ private:
int insitu_int;
int insitu_start;
std::string insitu_config;
+ int insitu_pin_mesh;
};
#endif
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 44eff9cd4..1828ebcec 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -43,11 +43,16 @@ long WarpX::nox = 1;
long WarpX::noy = 1;
long WarpX::noz = 1;
+bool WarpX::use_fdtd_nci_corr = false;
+int WarpX::l_lower_order_in_v = true;
+
bool WarpX::use_laser = false;
bool WarpX::use_filter = false;
bool WarpX::serialize_ics = false;
bool WarpX::refine_plasma = false;
+int WarpX::sort_int = -1;
+
bool WarpX::do_boosted_frame_diagnostic = false;
int WarpX::num_snapshots_lab = std::numeric_limits<int>::lowest();
Real WarpX::dt_snapshots_lab = std::numeric_limits<Real>::lowest();
@@ -91,6 +96,8 @@ int WarpX::n_current_deposition_buffer = -1;
WarpX* WarpX::m_instance = nullptr;
+
+
WarpX&
WarpX::GetInstance ()
{
@@ -169,6 +176,11 @@ WarpX::WarpX ()
current_buf.resize(nlevs_max);
charge_buf.resize(nlevs_max);
+ current_fp_owner_masks.resize(nlevs_max);
+ current_cp_owner_masks.resize(nlevs_max);
+ rho_fp_owner_masks.resize(nlevs_max);
+ rho_cp_owner_masks.resize(nlevs_max);
+
pml.resize(nlevs_max);
#ifdef WARPX_DO_ELECTROSTATIC
@@ -335,6 +347,7 @@ WarpX::ReadParameters ()
pp.query("do_dive_cleaning", do_dive_cleaning);
pp.query("n_field_gather_buffer", n_field_gather_buffer);
pp.query("n_current_deposition_buffer", n_current_deposition_buffer);
+ pp.query("sort_int", sort_int);
pp.query("do_pml", do_pml);
pp.query("pml_ncell", pml_ncell);
@@ -450,11 +463,13 @@ WarpX::ReadParameters ()
insitu_start = 0;
insitu_int = 0;
insitu_config = "";
+ insitu_pin_mesh = 0;
ParmParse pp("insitu");
pp.query("int", insitu_int);
pp.query("start", insitu_start);
pp.query("config", insitu_config);
+ pp.query("pin_mesh", insitu_pin_mesh);
}
}
@@ -492,8 +507,14 @@ WarpX::ClearLevel (int lev)
Efield_cax[lev][i].reset();
Bfield_cax[lev][i].reset();
current_buf[lev][i].reset();
+
+ current_fp_owner_masks[lev][i].reset();
+ current_cp_owner_masks[lev][i].reset();
}
+ rho_fp_owner_masks[lev].reset();
+ rho_cp_owner_masks[lev].reset();
+
charge_buf[lev].reset();
current_buffer_masks[lev].reset();
@@ -550,7 +571,7 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d
int ngy = (ngy_tmp % 2) ? ngy_tmp+1 : ngy_tmp; // Always even number
int ngz_nonci = (ngz_tmp % 2) ? ngz_tmp+1 : ngz_tmp; // Always even number
int ngz;
- if (warpx_use_fdtd_nci_corr()) {
+ if (WarpX::use_fdtd_nci_corr) {
int ng = ngz_tmp + (mypc->nstencilz_fdtd_nci_corr-1);
ngz = (ng % 2) ? ng+1 : ng;
} else {
@@ -613,16 +634,24 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d
current_fp[lev][1].reset( new MultiFab(amrex::convert(ba,jy_nodal_flag),dm,1,ngJ));
current_fp[lev][2].reset( new MultiFab(amrex::convert(ba,jz_nodal_flag),dm,1,ngJ));
- if (do_subcycling == 1 && lev == 0) {
+ const auto& period = Geom(lev).periodicity();
+ current_fp_owner_masks[lev][0] = std::move(current_fp[lev][0]->OwnerMask(period));
+ current_fp_owner_masks[lev][1] = std::move(current_fp[lev][1]->OwnerMask(period));
+ current_fp_owner_masks[lev][2] = std::move(current_fp[lev][2]->OwnerMask(period));
+
+ if (do_dive_cleaning || plot_rho)
+ {
+ rho_fp[lev].reset(new MultiFab(amrex::convert(ba,IntVect::TheUnitVector()),dm,2,ngRho));
+ rho_fp_owner_masks[lev] = std::move(rho_fp[lev]->OwnerMask(period));
+ }
+
+ if (do_subcycling == 1 && lev == 0)
+ {
current_store[lev][0].reset( new MultiFab(amrex::convert(ba,jx_nodal_flag),dm,1,ngJ));
current_store[lev][1].reset( new MultiFab(amrex::convert(ba,jy_nodal_flag),dm,1,ngJ));
current_store[lev][2].reset( new MultiFab(amrex::convert(ba,jz_nodal_flag),dm,1,ngJ));
}
- if (do_dive_cleaning || plot_rho){
- rho_fp[lev].reset(new MultiFab(amrex::convert(ba,IntVect::TheUnitVector()),dm,2,ngRho));
- }
-
if (do_dive_cleaning)
{
F_fp[lev].reset (new MultiFab(amrex::convert(ba,IntVect::TheUnitVector()),dm,1, ngF));
@@ -631,6 +660,7 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d
else
{
rho_fp[lev].reset(new MultiFab(amrex::convert(ba,IntVect::TheUnitVector()),dm,2,ngRho));
+ rho_fp_owner_masks[lev] = std::move(rho_fp[lev]->OwnerMask(period));
}
#endif
@@ -678,8 +708,14 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d
current_cp[lev][1].reset( new MultiFab(amrex::convert(cba,jy_nodal_flag),dm,1,ngJ));
current_cp[lev][2].reset( new MultiFab(amrex::convert(cba,jz_nodal_flag),dm,1,ngJ));
+ const auto& cperiod = Geom(lev).periodicity();
+ current_cp_owner_masks[lev][0] = std::move(current_cp[lev][0]->OwnerMask(cperiod));
+ current_cp_owner_masks[lev][1] = std::move(current_cp[lev][1]->OwnerMask(cperiod));
+ current_cp_owner_masks[lev][2] = std::move(current_cp[lev][2]->OwnerMask(cperiod));
+
if (do_dive_cleaning || plot_rho){
rho_cp[lev].reset(new MultiFab(amrex::convert(cba,IntVect::TheUnitVector()),dm,2,ngRho));
+ rho_cp_owner_masks[lev] = std::move(rho_cp[lev]->OwnerMask(cperiod));
}
if (do_dive_cleaning)
{
@@ -689,6 +725,7 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d
else
{
rho_cp[lev].reset(new MultiFab(amrex::convert(cba,IntVect::TheUnitVector()),dm,2,ngRho));
+ rho_cp_owner_masks[lev] = std::move(rho_cp[lev]->OwnerMask(cperiod));
}
#endif
}
@@ -808,6 +845,26 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp,
}
void
+WarpX::ComputeDivB (MultiFab& divB, int dcomp,
+ const std::array<const MultiFab*, 3>& B,
+ const std::array<Real,3>& dx, int ngrow)
+{
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+ for (MFIter mfi(divB, true); mfi.isValid(); ++mfi)
+ {
+ Box bx = mfi.growntilebox(ngrow);
+ WRPX_COMPUTE_DIVB(bx.loVect(), bx.hiVect(),
+ BL_TO_FORTRAN_N_ANYD(divB[mfi],dcomp),
+ BL_TO_FORTRAN_ANYD((*B[0])[mfi]),
+ BL_TO_FORTRAN_ANYD((*B[1])[mfi]),
+ BL_TO_FORTRAN_ANYD((*B[2])[mfi]),
+ dx.data());
+ }
+}
+
+void
WarpX::ComputeDivE (MultiFab& divE, int dcomp,
const std::array<const MultiFab*, 3>& E,
const std::array<Real,3>& dx)
@@ -828,6 +885,26 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp,
}
void
+WarpX::ComputeDivE (MultiFab& divE, int dcomp,
+ const std::array<const MultiFab*, 3>& E,
+ const std::array<Real,3>& dx, int ngrow)
+{
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+ for (MFIter mfi(divE, true); mfi.isValid(); ++mfi)
+ {
+ Box bx = mfi.growntilebox(ngrow);
+ WRPX_COMPUTE_DIVE(bx.loVect(), bx.hiVect(),
+ BL_TO_FORTRAN_N_ANYD(divE[mfi],dcomp),
+ BL_TO_FORTRAN_ANYD((*E[0])[mfi]),
+ BL_TO_FORTRAN_ANYD((*E[1])[mfi]),
+ BL_TO_FORTRAN_ANYD((*E[2])[mfi]),
+ dx.data());
+ }
+}
+
+void
WarpX::applyFilter (MultiFab& dstmf, const MultiFab& srcmf, int scomp, int dcomp, int ncomp)
{
ncomp = std::min(ncomp, srcmf.nComp());
diff --git a/Source/WarpXComm.cpp b/Source/WarpXComm.cpp
index f225f1015..32b9e80f5 100644
--- a/Source/WarpXComm.cpp
+++ b/Source/WarpXComm.cpp
@@ -491,16 +491,16 @@ WarpX::SyncCurrent ()
for (int lev = 0; lev <= finest_level; ++lev)
{
const auto& period = Geom(lev).periodicity();
- current_fp[lev][0]->OverrideSync(period);
- current_fp[lev][1]->OverrideSync(period);
- current_fp[lev][2]->OverrideSync(period);
+ current_fp[lev][0]->OverrideSync(*current_fp_owner_masks[lev][0], period);
+ current_fp[lev][1]->OverrideSync(*current_fp_owner_masks[lev][1], period);
+ current_fp[lev][2]->OverrideSync(*current_fp_owner_masks[lev][2],period);
}
for (int lev = 1; lev <= finest_level; ++lev)
{
const auto& cperiod = Geom(lev-1).periodicity();
- current_cp[lev][0]->OverrideSync(cperiod);
- current_cp[lev][1]->OverrideSync(cperiod);
- current_cp[lev][2]->OverrideSync(cperiod);
+ current_cp[lev][0]->OverrideSync(*current_cp_owner_masks[lev][0], cperiod);
+ current_cp[lev][1]->OverrideSync(*current_cp_owner_masks[lev][1], cperiod);
+ current_cp[lev][2]->OverrideSync(*current_cp_owner_masks[lev][2], cperiod);
}
}
@@ -645,12 +645,12 @@ WarpX::SyncRho (amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rhof,
for (int lev = 0; lev <= finest_level; ++lev)
{
const auto& period = Geom(lev).periodicity();
- rhof[lev]->OverrideSync(period);
+ rhof[lev]->OverrideSync(*rho_fp_owner_masks[lev], period);
}
for (int lev = 1; lev <= finest_level; ++lev)
{
const auto& cperiod = Geom(lev-1).periodicity();
- rhoc[lev]->OverrideSync(cperiod);
+ rhoc[lev]->OverrideSync(*rho_cp_owner_masks[lev], cperiod);
}
}
diff --git a/Source/WarpXEvolve.cpp b/Source/WarpXEvolve.cpp
index 7c1629e94..6d917e36d 100644
--- a/Source/WarpXEvolve.cpp
+++ b/Source/WarpXEvolve.cpp
@@ -155,6 +155,12 @@ WarpX::EvolveEM (int numsteps)
mypc->Redistribute();
}
+ bool to_sort = (sort_int > 0) && ((step+1) % sort_int == 0);
+ if (to_sort) {
+ amrex::Print() << "re-sorting particles \n";
+ mypc->SortParticlesByCell();
+ }
+
amrex::Print()<< "STEP " << step+1 << " ends." << " TIME = " << cur_time
<< " DT = " << dt[0] << "\n";
Real walltime_end_step = amrex::second();
@@ -492,9 +498,9 @@ WarpX::EvolveB (int lev, PatchType patch_type, amrex::Real dt)
// Loop through the grids, and over the tiles within each grid
#ifdef _OPENMP
-#pragma omp parallel
+#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
- for ( MFIter mfi(*Bx,true); mfi.isValid(); ++mfi )
+ for ( MFIter mfi(*Bx, TilingIfNotGPU()); mfi.isValid(); ++mfi )
{
Real wt = amrex::second();
@@ -519,8 +525,12 @@ WarpX::EvolveB (int lev, PatchType patch_type, amrex::Real dt)
if (cost) {
Box cbx = mfi.tilebox(IntVect{AMREX_D_DECL(0,0,0)});
if (patch_type == PatchType::coarse) cbx.refine(rr);
- wt = (amrex::second() - wt) / cbx.d_numPts();
- (*cost)[mfi].plus(wt, cbx);
+ wt = (amrex::second() - wt) / cbx.d_numPts();\
+ FArrayBox* costfab = cost->fabPtr(mfi);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( cbx, work_box,
+ {
+ costfab->plus(wt, work_box);
+ });
}
}
@@ -530,9 +540,9 @@ WarpX::EvolveB (int lev, PatchType patch_type, amrex::Real dt)
const auto& pml_E = (patch_type == PatchType::fine) ? pml[lev]->GetE_fp() : pml[lev]->GetE_cp();
#ifdef _OPENMP
-#pragma omp parallel
+#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
- for ( MFIter mfi(*pml_B[0],true); mfi.isValid(); ++mfi )
+ for ( MFIter mfi(*pml_B[0], TilingIfNotGPU()); mfi.isValid(); ++mfi )
{
const Box& tbx = mfi.tilebox(Bx_nodal_flag);
const Box& tby = mfi.tilebox(By_nodal_flag);
@@ -617,9 +627,9 @@ WarpX::EvolveE (int lev, PatchType patch_type, amrex::Real dt)
// Loop through the grids, and over the tiles within each grid
#ifdef _OPENMP
-#pragma omp parallel
+#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
- for ( MFIter mfi(*Ex,true); mfi.isValid(); ++mfi )
+ for ( MFIter mfi(*Ex, TilingIfNotGPU()); mfi.isValid(); ++mfi )
{
Real wt = amrex::second();
@@ -662,7 +672,11 @@ WarpX::EvolveE (int lev, PatchType patch_type, amrex::Real dt)
Box cbx = mfi.tilebox(IntVect{AMREX_D_DECL(0,0,0)});
if (patch_type == PatchType::coarse) cbx.refine(rr);
wt = (amrex::second() - wt) / cbx.d_numPts();
- (*cost)[mfi].plus(wt, cbx);
+ FArrayBox* costfab = cost->fabPtr(mfi);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( cbx, work_box,
+ {
+ costfab->plus(wt, work_box);
+ });
}
}
@@ -674,9 +688,9 @@ WarpX::EvolveE (int lev, PatchType patch_type, amrex::Real dt)
const auto& pml_E = (patch_type == PatchType::fine) ? pml[lev]->GetE_fp() : pml[lev]->GetE_cp();
const auto& pml_F = (patch_type == PatchType::fine) ? pml[lev]->GetF_fp() : pml[lev]->GetF_cp();
#ifdef _OPENMP
-#pragma omp parallel
+#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
- for ( MFIter mfi(*pml_E[0],true); mfi.isValid(); ++mfi )
+ for ( MFIter mfi(*pml_E[0], TilingIfNotGPU()); mfi.isValid(); ++mfi )
{
const Box& tex = mfi.tilebox(Ex_nodal_flag);
const Box& tey = mfi.tilebox(Ey_nodal_flag);
@@ -775,9 +789,9 @@ WarpX::EvolveF (int lev, PatchType patch_type, Real dt, DtType dt_type)
const auto& pml_E = (patch_type == PatchType::fine) ? pml[lev]->GetE_fp() : pml[lev]->GetE_cp();
#ifdef _OPENMP
-#pragma omp parallel
+#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
- for ( MFIter mfi(*pml_F,true); mfi.isValid(); ++mfi )
+ for ( MFIter mfi(*pml_F, TilingIfNotGPU()); mfi.isValid(); ++mfi )
{
const Box& bx = mfi.tilebox();
WRPX_PUSH_PML_F(bx.loVect(), bx.hiVect(),
@@ -821,9 +835,9 @@ WarpX::DampPML (int lev, PatchType patch_type)
: pml[lev]->GetMultiSigmaBox_cp();
#ifdef _OPENMP
-#pragma omp parallel
+#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
- for ( MFIter mfi(*pml_E[0],true); mfi.isValid(); ++mfi )
+ for ( MFIter mfi(*pml_E[0], TilingIfNotGPU()); mfi.isValid(); ++mfi )
{
const Box& tex = mfi.tilebox(Ex_nodal_flag);
const Box& tey = mfi.tilebox(Ey_nodal_flag);
diff --git a/Source/WarpXIO.cpp b/Source/WarpXIO.cpp
index 70985ac7e..93a359d7c 100644
--- a/Source/WarpXIO.cpp
+++ b/Source/WarpXIO.cpp
@@ -488,11 +488,11 @@ WarpX::UpdateInSitu () const
Vector<const MultiFab*> srcmf(AMREX_SPACEDIM);
PackPlotDataPtrs(srcmf, current_fp[lev]);
int dcomp = 0;
- amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf);
+ amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf, ngrow);
#if (AMREX_SPACEDIM == 2)
MultiFab::Copy(mf[lev], mf[lev], dcomp+1, dcomp+2, 1, ngrow);
- amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *current_fp[lev][1], 0, 1);
+ amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *current_fp[lev][1], 0, 1, ngrow);
#endif
if (lev == 0)
{
@@ -503,10 +503,10 @@ WarpX::UpdateInSitu () const
dcomp += 3;
PackPlotDataPtrs(srcmf, Efield_aux[lev]);
- amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf);
+ amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf, ngrow);
#if (AMREX_SPACEDIM == 2)
MultiFab::Copy(mf[lev], mf[lev], dcomp+1, dcomp+2, 1, ngrow);
- amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *Efield_aux[lev][1], 0, 1);
+ amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *Efield_aux[lev][1], 0, 1, ngrow);
#endif
if (lev == 0)
{
@@ -517,10 +517,10 @@ WarpX::UpdateInSitu () const
dcomp += 3;
PackPlotDataPtrs(srcmf, Bfield_aux[lev]);
- amrex::average_face_to_cellcenter(mf[lev], dcomp, srcmf);
+ amrex::average_face_to_cellcenter(mf[lev], dcomp, srcmf, ngrow);
#if (AMREX_SPACEDIM == 2)
MultiFab::Copy(mf[lev], mf[lev], dcomp+1, dcomp+2, 1, ngrow);
- MultiFab::Copy(mf[lev], *Bfield_aux[lev][1], 0, dcomp+1, 1, ngrow);
+ MultiFab::Copy(mf[lev], *Bfield_aux[lev][1], 0, dcomp+1, 1, ngrow);
#endif
if (lev == 0)
{
@@ -532,16 +532,14 @@ WarpX::UpdateInSitu () const
if (plot_part_per_cell)
{
- MultiFab temp_dat(grids[lev],mf[lev].DistributionMap(),1,0);
- temp_dat.setVal(0);
+ MultiFab temp_dat(grids[lev], mf[lev].DistributionMap(), 1, ngrow);
+ temp_dat.setVal(0, ngrow);
// MultiFab containing number of particles in each cell
mypc->Increment(temp_dat, lev);
- MultiFab::Copy(mf[lev], temp_dat, 0, dcomp, 1, 0);
+ MultiFab::Copy(mf[lev], temp_dat, 0, dcomp, 1, ngrow);
if (lev == 0)
- {
varnames.push_back("part_per_cell");
- }
dcomp += 1;
}
@@ -554,14 +552,13 @@ WarpX::UpdateInSitu () const
#ifdef _OPENMP
#pragma omp parallel
#endif
- for (MFIter mfi(mf[lev]); mfi.isValid(); ++mfi) {
- (mf[lev])[mfi].setVal(static_cast<Real>(npart_in_grid[mfi.index()]), dcomp);
- }
- if (lev == 0)
- {
- varnames.push_back("part_per_grid");
- }
- dcomp += 1;
+ for (MFIter mfi(mf[lev]); mfi.isValid(); ++mfi)
+ (mf[lev])[mfi].setVal(static_cast<Real>(npart_in_grid[mfi.index()]), dcomp);
+
+ if (lev == 0)
+ varnames.push_back("part_per_grid");
+
+ dcomp += 1;
}
if (plot_part_per_proc)
@@ -570,30 +567,30 @@ WarpX::UpdateInSitu () const
#ifdef _OPENMP
#pragma omp parallel reduction(+:n_per_proc)
#endif
- for (MFIter mfi(mf[lev]); mfi.isValid(); ++mfi) {
+ for (MFIter mfi(mf[lev]); mfi.isValid(); ++mfi)
n_per_proc += npart_in_grid[mfi.index()];
- }
- mf[lev].setVal(static_cast<Real>(n_per_proc), dcomp,1);
+
+ mf[lev].setVal(static_cast<Real>(n_per_proc), dcomp, ngrow);
+
if (lev == 0)
- {
varnames.push_back("part_per_proc");
- }
+
dcomp += 1;
}
}
if (plot_proc_number)
{
+ Real procid = static_cast<Real>(ParallelDescriptor::MyProc());
#ifdef _OPENMP
#pragma omp parallel
#endif
- for (MFIter mfi(mf[lev]); mfi.isValid(); ++mfi) {
- (mf[lev])[mfi].setVal(static_cast<Real>(ParallelDescriptor::MyProc()), dcomp);
- }
+ for (MFIter mfi(mf[lev]); mfi.isValid(); ++mfi)
+ (mf[lev])[mfi].setVal(procid, dcomp);
+
if (lev == 0)
- {
varnames.push_back("proc_number");
- }
+
dcomp += 1;
}
@@ -601,56 +598,56 @@ WarpX::UpdateInSitu () const
{
ComputeDivB(mf[lev], dcomp,
{Bfield_aux[lev][0].get(),Bfield_aux[lev][1].get(),Bfield_aux[lev][2].get()},
- WarpX::CellSize(lev));
+ WarpX::CellSize(lev), ngrow);
if (lev == 0)
- {
varnames.push_back("divB");
- }
+
dcomp += 1;
}
if (plot_dive)
{
const BoxArray& ba = amrex::convert(boxArray(lev),IntVect::TheUnitVector());
- MultiFab dive(ba,DistributionMap(lev),1,0);
+ MultiFab dive(ba, DistributionMap(lev), 1, ngrow);
+
ComputeDivE(dive, 0,
{Efield_aux[lev][0].get(), Efield_aux[lev][1].get(), Efield_aux[lev][2].get()},
- WarpX::CellSize(lev));
- amrex::average_node_to_cellcenter(mf[lev], dcomp, dive, 0, 1);
+ WarpX::CellSize(lev), ngrow);
+
+ amrex::average_node_to_cellcenter(mf[lev], dcomp, dive, 0, 1, ngrow);
+
if (lev == 0)
- {
varnames.push_back("divE");
- }
+
dcomp += 1;
}
if (plot_rho)
{
- amrex::average_node_to_cellcenter(mf[lev], dcomp, *rho_fp[lev], 0, 1);
+ amrex::average_node_to_cellcenter(mf[lev], dcomp, *rho_fp[lev], 0, 1, ngrow);
if (lev == 0)
- {
varnames.push_back("rho");
- }
+
dcomp += 1;
}
if (plot_F)
{
- amrex::average_node_to_cellcenter(mf[lev], dcomp, *F_fp[lev], 0, 1);
+ amrex::average_node_to_cellcenter(mf[lev], dcomp, *F_fp[lev], 0, 1, ngrow);
+
if (lev == 0)
- {
varnames.push_back("F");
- }
+
dcomp += 1;
}
if (plot_finepatch)
{
PackPlotDataPtrs(srcmf, Efield_fp[lev]);
- amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf);
+ amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf, ngrow);
#if (AMREX_SPACEDIM == 2)
MultiFab::Copy(mf[lev], mf[lev], dcomp+1, dcomp+2, 1, ngrow);
- amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *Efield_fp[lev][1], 0, 1);
+ amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *Efield_fp[lev][1], 0, 1, ngrow);
#endif
if (lev == 0)
{
@@ -661,7 +658,7 @@ WarpX::UpdateInSitu () const
dcomp += 3;
PackPlotDataPtrs(srcmf, Bfield_fp[lev]);
- amrex::average_face_to_cellcenter(mf[lev], dcomp, srcmf);
+ amrex::average_face_to_cellcenter(mf[lev], dcomp, srcmf, ngrow);
#if (AMREX_SPACEDIM == 2)
MultiFab::Copy(mf[lev], mf[lev], dcomp+1, dcomp+2, 1, ngrow);
MultiFab::Copy(mf[lev], *Bfield_fp[lev][1], 0, dcomp+1, 1, ngrow);
@@ -686,10 +683,10 @@ WarpX::UpdateInSitu () const
{
std::array<std::unique_ptr<MultiFab>, 3> E = getInterpolatedE(lev);
PackPlotDataPtrs(srcmf, E);
- amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf);
+ amrex::average_edge_to_cellcenter(mf[lev], dcomp, srcmf, ngrow);
#if (AMREX_SPACEDIM == 2)
MultiFab::Copy(mf[lev], mf[lev], dcomp+1, dcomp+2, 1, ngrow);
- amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *E[1], 0, 1);
+ amrex::average_node_to_cellcenter(mf[lev], dcomp+1, *E[1], 0, 1, ngrow);
#endif
}
if (lev == 0)
@@ -709,7 +706,7 @@ WarpX::UpdateInSitu () const
{
std::array<std::unique_ptr<MultiFab>, 3> B = getInterpolatedB(lev);
PackPlotDataPtrs(srcmf, B);
- amrex::average_face_to_cellcenter(mf[lev], dcomp, srcmf);
+ amrex::average_face_to_cellcenter(mf[lev], dcomp, srcmf, ngrow);
#if (AMREX_SPACEDIM == 2)
MultiFab::Copy(mf[lev], mf[lev], dcomp+1, dcomp+2, 1, ngrow);
MultiFab::Copy(mf[lev], *B[1], 0, dcomp+1, 1, ngrow);
diff --git a/Source/WarpXInitData.cpp b/Source/WarpXInitData.cpp
index a6b63a92d..496b14e7a 100644
--- a/Source/WarpXInitData.cpp
+++ b/Source/WarpXInitData.cpp
@@ -5,7 +5,6 @@
#include <WarpX.H>
#include <WarpX_f.H>
-#include <WarpXWrappers.h>
#ifdef BL_USE_SENSEI_INSITU
#include <AMReX_AmrMeshInSituBridge.H>
@@ -34,7 +33,7 @@ WarpX::InitData ()
ComputePMLFactors();
- if (warpx_use_fdtd_nci_corr()) {
+ if (WarpX::use_fdtd_nci_corr) {
WarpX::InitNCICorrector();
}
@@ -51,6 +50,7 @@ WarpX::InitData ()
insitu_bridge = new amrex::AmrMeshInSituBridge;
insitu_bridge->setEnabled(insitu_int > 0 ? 1 : 0);
insitu_bridge->setConfig(insitu_config);
+ insitu_bridge->setPinMesh(insitu_pin_mesh);
if (insitu_bridge->initialize())
{
amrex::ErrorStream()
@@ -153,7 +153,7 @@ WarpX::ComputePMLFactors ()
void
WarpX::InitNCICorrector ()
{
- if (warpx_use_fdtd_nci_corr())
+ if (WarpX::use_fdtd_nci_corr)
{
mypc->fdtd_nci_stencilz_ex.resize(max_level+1);
mypc->fdtd_nci_stencilz_by.resize(max_level+1);
@@ -161,7 +161,6 @@ WarpX::InitNCICorrector ()
{
const Geometry& gm = Geom(lev);
const Real* dx = gm.CellSize();
- const int l_lower_order_in_v = warpx_l_lower_order_in_v();
amrex::Real dz, cdtodz;
if (AMREX_SPACEDIM == 3){
dz = dx[2];
@@ -172,7 +171,7 @@ WarpX::InitNCICorrector ()
WRPX_PXR_NCI_CORR_INIT( (mypc->fdtd_nci_stencilz_ex)[lev].data(),
(mypc->fdtd_nci_stencilz_by)[lev].data(),
mypc->nstencilz_fdtd_nci_corr, cdtodz,
- l_lower_order_in_v);
+ WarpX::l_lower_order_in_v);
}
}
}
diff --git a/Source/WarpXMove.cpp b/Source/WarpXMove.cpp
index 401211554..908c70573 100644
--- a/Source/WarpXMove.cpp
+++ b/Source/WarpXMove.cpp
@@ -40,8 +40,8 @@ WarpX::MoveWindow (bool move_j)
Real new_hi[AMREX_SPACEDIM];
const Real* current_lo = geom[0].ProbLo();
const Real* current_hi = geom[0].ProbHi();
- const Real* dx = geom[0].CellSize();
- int num_shift_base = static_cast<int>((moving_window_x - current_lo[dir]) / dx[dir]);
+ const Real* cdx = geom[0].CellSize();
+ int num_shift_base = static_cast<int>((moving_window_x - current_lo[dir]) / cdx[dir]);
if (num_shift_base == 0) return 0;
@@ -51,8 +51,8 @@ WarpX::MoveWindow (bool move_j)
new_lo[i] = current_lo[i];
new_hi[i] = current_hi[i];
}
- new_lo[dir] = current_lo[dir] + num_shift_base * dx[dir];
- new_hi[dir] = current_hi[dir] + num_shift_base * dx[dir];
+ new_lo[dir] = current_lo[dir] + num_shift_base * cdx[dir];
+ new_hi[dir] = current_hi[dir] + num_shift_base * cdx[dir];
RealBox new_box(new_lo, new_hi);
Geometry::ProbDomain(new_box);
@@ -181,6 +181,7 @@ WarpX::MoveWindow (bool move_j)
void
WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir)
{
+ BL_PROFILE("WarpX::shiftMF()");
const BoxArray& ba = mf.boxArray();
const DistributionMapping& dm = mf.DistributionMap();
const int nc = mf.nComp();
@@ -221,25 +222,24 @@ WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir)
#endif
for (MFIter mfi(tmpmf); mfi.isValid(); ++mfi )
{
- FArrayBox& srcfab = tmpmf[mfi];
-
- Box outbox = mfi.fabbox();
- outbox &= adjBox;
- if (outbox.ok()) { // outbox is the region that the window moved into
- srcfab.setVal(0.0, outbox, 0, nc);
- }
-
- FArrayBox& dstfab = mf[mfi];
- dstfab.setVal(0.0);
-
- Box dstBox = dstfab.box();
-
- if (num_shift > 0) {
- dstBox.growHi(dir, -num_shift);
- } else {
- dstBox.growLo(dir, num_shift);
- }
-
- dstfab.copy(srcfab, amrex::shift(dstBox,dir,num_shift), 0, dstBox, 0, nc);
+ FArrayBox* dstfab = mf.fabPtr(mfi);
+ FArrayBox* srcfab = tmpmf.fabPtr(mfi);
+ Box outbox = mfi.fabbox();
+ outbox &= adjBox;
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(outbox, toutbox,
+ {
+ srcfab->setVal(0.0, toutbox, 0, nc);
+ });
+ Box dstBox = mf[mfi].box();
+ if (num_shift > 0) {
+ dstBox.growHi(dir, -num_shift);
+ } else {
+ dstBox.growLo(dir, num_shift);
+ }
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(dstBox, tdstBox,
+ {
+ dstfab->setVal(0.0, tdstBox, 0, nc);
+ dstfab->copy(*srcfab, amrex::shift(tdstBox,dir,num_shift), 0, tdstBox, 0, nc);
+ });
}
}
diff --git a/Source/WarpXPML.H b/Source/WarpXPML.H
index 0cf367284..7a86bf0da 100644
--- a/Source/WarpXPML.H
+++ b/Source/WarpXPML.H
@@ -62,6 +62,13 @@ namespace amrex {
virtual void destroy (SigmaBox* fab) const final {
delete fab;
}
+#ifdef AMREX_USE_GPU
+ virtual SigmaBox* createHostAlias (SigmaBox const& src) const final
+ {
+ return const_cast<SigmaBox*>(&src);
+ }
+ virtual void destroyHostAlias (SigmaBox* fab) const final {}
+#endif
virtual FabFactory<SigmaBox>* clone () const {
return new FabFactory<SigmaBox>(*this);
}
diff --git a/Source/WarpXParticleContainer.H b/Source/WarpXParticleContainer.H
index a2d5c0fac..b879f49d9 100644
--- a/Source/WarpXParticleContainer.H
+++ b/Source/WarpXParticleContainer.H
@@ -36,27 +36,27 @@ public:
WarpXParIter (ContainerType& pc, int level);
#if (AMREX_SPACEDIM == 2)
- void GetPosition (amrex::Vector<amrex::Real>& x,
- amrex::Vector<amrex::Real>& y,
- amrex::Vector<amrex::Real>& z) const;
- void SetPosition (const amrex::Vector<amrex::Real>& x,
- const amrex::Vector<amrex::Real>& y,
- const amrex::Vector<amrex::Real>& z);
+ void GetPosition (amrex::Cuda::DeviceVector<amrex::Real>& x,
+ amrex::Cuda::DeviceVector<amrex::Real>& y,
+ amrex::Cuda::DeviceVector<amrex::Real>& z) const;
+ void SetPosition (const amrex::Cuda::DeviceVector<amrex::Real>& x,
+ const amrex::Cuda::DeviceVector<amrex::Real>& y,
+ const amrex::Cuda::DeviceVector<amrex::Real>& z);
#endif
- const std::array<amrex::Vector<amrex::Real>, PIdx::nattribs>& GetAttribs () const {
+ const std::array<RealVector, PIdx::nattribs>& GetAttribs () const {
return GetStructOfArrays().GetRealData();
}
- std::array<amrex::Vector<amrex::Real>, PIdx::nattribs>& GetAttribs () {
+ std::array<RealVector, PIdx::nattribs>& GetAttribs () {
return GetStructOfArrays().GetRealData();
}
- const amrex::Vector<amrex::Real>& GetAttribs (int comp) const {
+ const RealVector& GetAttribs (int comp) const {
return GetStructOfArrays().GetRealData(comp);
}
- amrex::Vector<amrex::Real>& GetAttribs (int comp) {
+ RealVector& GetAttribs (int comp) {
return GetStructOfArrays().GetRealData(comp);
}
};
@@ -141,6 +141,33 @@ public:
bool local = false);
std::unique_ptr<amrex::MultiFab> GetChargeDensity(int lev, bool local = false);
+ virtual void DepositCharge(WarpXParIter& pti,
+ RealVector& wp,
+ amrex::MultiFab* rhomf,
+ amrex::MultiFab* crhomf,
+ int icomp,
+ const long np_current,
+ const long np,
+ int thread_num,
+ int lev );
+
+ virtual void DepositCurrent(WarpXParIter& pti,
+ RealVector& wp,
+ RealVector& uxp,
+ RealVector& uyp,
+ RealVector& uzp,
+ amrex::MultiFab& jx,
+ amrex::MultiFab& jy,
+ amrex::MultiFab& jz,
+ amrex::MultiFab* cjx,
+ amrex::MultiFab* cjy,
+ amrex::MultiFab* cjz,
+ const long np_current,
+ const long np,
+ int thread_num,
+ int lev,
+ amrex::Real dt );
+
///
/// This returns the total charge for all the particles in this ParticleContainer.
/// This is needed when solving Poisson's equation with periodic boundary conditions.
@@ -182,6 +209,14 @@ protected:
bool deposit_on_main_grid = false;
static int do_not_push;
+
+ amrex::Vector<std::unique_ptr<amrex::FArrayBox> > local_rho;
+ amrex::Vector<std::unique_ptr<amrex::FArrayBox> > local_jx;
+ amrex::Vector<std::unique_ptr<amrex::FArrayBox> > local_jy;
+ amrex::Vector<std::unique_ptr<amrex::FArrayBox> > local_jz;
+
+ amrex::Vector<amrex::Cuda::DeviceVector<amrex::Real> > m_xp, m_yp, m_zp, m_giv;
+
};
#endif
diff --git a/Source/WarpXParticleContainer.cpp b/Source/WarpXParticleContainer.cpp
index ad6979462..a9699071c 100644
--- a/Source/WarpXParticleContainer.cpp
+++ b/Source/WarpXParticleContainer.cpp
@@ -18,14 +18,14 @@ WarpXParIter::WarpXParIter (ContainerType& pc, int level)
#if (AMREX_SPACEDIM == 2)
void
-WarpXParIter::GetPosition (Vector<Real>& x, Vector<Real>& y, Vector<Real>& z) const
+WarpXParIter::GetPosition (Cuda::DeviceVector<Real>& x, Cuda::DeviceVector<Real>& y, Cuda::DeviceVector<Real>& z) const
{
amrex::ParIter<0,0,PIdx::nattribs>::GetPosition(x, z);
y.resize(x.size(), std::numeric_limits<Real>::quiet_NaN());
}
void
-WarpXParIter::SetPosition (const Vector<Real>& x, const Vector<Real>& y, const Vector<Real>& z)
+WarpXParIter::SetPosition (const Cuda::DeviceVector<Real>& x, const Cuda::DeviceVector<Real>& y, const Cuda::DeviceVector<Real>& z)
{
amrex::ParIter<0,0,PIdx::nattribs>::SetPosition(x, z);
}
@@ -40,6 +40,30 @@ WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies)
}
SetParticleSize();
ReadParameters();
+
+ // Initialize temporary local arrays for charge/current deposition
+ int num_threads = 1;
+ #ifdef _OPENMP
+ #pragma omp parallel
+ #pragma omp single
+ num_threads = omp_get_num_threads();
+ #endif
+ local_rho.resize(num_threads);
+ local_jx.resize(num_threads);
+ local_jy.resize(num_threads);
+ local_jz.resize(num_threads);
+ m_xp.resize(num_threads);
+ m_yp.resize(num_threads);
+ m_zp.resize(num_threads);
+ m_giv.resize(num_threads);
+ for (int i = 0; i < num_threads; ++i)
+ {
+ local_rho[i].reset(nullptr);
+ local_jx[i].reset(nullptr);
+ local_jy[i].reset(nullptr);
+ local_jz[i].reset(nullptr);
+ }
+
}
void
@@ -50,10 +74,14 @@ WarpXParticleContainer::ReadParameters ()
{
ParmParse pp("particles");
- do_tiling = true; // because the default in amrex is false
+#ifdef AMREX_USE_GPU
+ do_tiling = false; // By default, tiling is off on GPU
+#else
+ do_tiling = true;
+#endif
pp.query("do_tiling", do_tiling);
pp.query("do_not_push", do_not_push);
-
+
initialized = true;
}
}
@@ -73,7 +101,7 @@ WarpXParticleContainer::AddOneParticle (int lev, int grid, int tile,
const std::array<Real,PIdx::nattribs>& attribs)
{
auto& particle_tile = GetParticles(lev)[std::make_pair(grid,tile)];
- AddOneParticle(particle_tile, x, y, z, attribs);
+ AddOneParticle(particle_tile, x, y, z, attribs);
}
void
@@ -92,7 +120,7 @@ WarpXParticleContainer::AddOneParticle (ParticleTileType& particle_tile,
p.pos(0) = x;
p.pos(1) = z;
#endif
-
+
particle_tile.push_back(p);
particle_tile.push_back_real(attribs);
}
@@ -153,16 +181,341 @@ WarpXParticleContainer::AddNParticles (int lev,
particle_tile.push_back_real(PIdx::ux, vx + ibegin, vx + iend);
particle_tile.push_back_real(PIdx::uy, vy + ibegin, vy + iend);
particle_tile.push_back_real(PIdx::uz, vz + ibegin, vz + iend);
-
+
for (int comp = PIdx::uz+1; comp < PIdx::nattribs; ++comp)
{
particle_tile.push_back_real(comp, np, 0.0);
}
- }
+ }
Redistribute();
}
+
+void
+WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
+ RealVector& wp, RealVector& uxp,
+ RealVector& uyp, RealVector& uzp,
+ MultiFab& jx, MultiFab& jy, MultiFab& jz,
+ MultiFab* cjx, MultiFab* cjy, MultiFab* cjz,
+ const long np_current, const long np,
+ int thread_num, int lev, Real dt )
+{
+ Real *jx_ptr, *jy_ptr, *jz_ptr;
+ const int *jxntot, *jyntot, *jzntot;
+ const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev);
+ const std::array<Real,3>& dx = WarpX::CellSize(lev);
+ const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0));
+ const std::array<Real, 3>& xyzmin = xyzmin_tile;
+ const long lvect = 8;
+
+ BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd);
+ BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
+
+ Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag);
+ Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag);
+ Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag);
+
+ // WarpX assumes the same number of guard cells for Jx, Jy, Jz
+ long ngJ = jx.nGrow();
+
+ // Deposit charge for particles that are not in the current buffers
+ if (np_current > 0)
+ {
+ tbx.grow(ngJ);
+ tby.grow(ngJ);
+ tbz.grow(ngJ);
+
+ local_jx[thread_num]->resize(tbx);
+ local_jy[thread_num]->resize(tby);
+ local_jz[thread_num]->resize(tbz);
+
+ jx_ptr = local_jx[thread_num]->dataPtr();
+ jy_ptr = local_jy[thread_num]->dataPtr();
+ jz_ptr = local_jz[thread_num]->dataPtr();
+
+ FArrayBox* local_jx_ptr = local_jx[thread_num].get();
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, b,
+ {
+ local_jx_ptr->setVal(0.0, b, 0, 1);
+ });
+
+ FArrayBox* local_jy_ptr = local_jy[thread_num].get();
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, b,
+ {
+ local_jy_ptr->setVal(0.0, b, 0, 1);
+ });
+
+ FArrayBox* local_jz_ptr = local_jz[thread_num].get();
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, b,
+ {
+ local_jz_ptr->setVal(0.0, b, 0, 1);
+ });
+
+ jxntot = local_jx[thread_num]->length();
+ jyntot = local_jy[thread_num]->length();
+ jzntot = local_jz[thread_num]->length();
+
+ BL_PROFILE_VAR_START(blp_pxr_cd);
+ warpx_current_deposition(
+ jx_ptr, &ngJ, jxntot,
+ jy_ptr, &ngJ, jyntot,
+ jz_ptr, &ngJ, jzntot,
+ &np_current,
+ m_xp[thread_num].dataPtr(),
+ m_yp[thread_num].dataPtr(),
+ m_zp[thread_num].dataPtr(),
+ uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+ m_giv[thread_num].dataPtr(),
+ wp.dataPtr(), &this->charge,
+ &xyzmin[0], &xyzmin[1], &xyzmin[2],
+ &dt, &dx[0], &dx[1], &dx[2],
+ &WarpX::nox,&WarpX::noy,&WarpX::noz,
+ &lvect,&WarpX::current_deposition_algo);
+ BL_PROFILE_VAR_STOP(blp_pxr_cd);
+
+ BL_PROFILE_VAR_START(blp_accumulate);
+
+ FArrayBox const* local_jx_const_ptr = local_jx[thread_num].get();
+ FArrayBox* global_jx_ptr = jx.fabPtr(pti);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx,
+ {
+ global_jx_ptr->atomicAdd(*local_jx_const_ptr, thread_bx, thread_bx, 0, 0, 1);
+ });
+
+ FArrayBox const* local_jy_const_ptr = local_jy[thread_num].get();
+ FArrayBox* global_jy_ptr = jy.fabPtr(pti);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx,
+ {
+ global_jy_ptr->atomicAdd(*local_jy_const_ptr, thread_bx, thread_bx, 0, 0, 1);
+ });
+
+ FArrayBox const* local_jz_const_ptr = local_jz[thread_num].get();
+ FArrayBox* global_jz_ptr = jz.fabPtr(pti);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx,
+ {
+ global_jz_ptr->atomicAdd(*local_jz_const_ptr, thread_bx, thread_bx, 0, 0, 1);
+ });
+
+ BL_PROFILE_VAR_STOP(blp_accumulate);
+ }
+
+ // Deposit charge for particles that are in the current buffers
+ if (np_current < np)
+ {
+ const IntVect& ref_ratio = WarpX::RefRatio(lev-1);
+ const Box& ctilebox = amrex::coarsen(pti.tilebox(),ref_ratio);
+ const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1);
+
+ tbx = amrex::convert(ctilebox, WarpX::jx_nodal_flag);
+ tby = amrex::convert(ctilebox, WarpX::jy_nodal_flag);
+ tbz = amrex::convert(ctilebox, WarpX::jz_nodal_flag);
+ tbx.grow(ngJ);
+ tby.grow(ngJ);
+ tbz.grow(ngJ);
+
+ local_jx[thread_num]->resize(tbx);
+ local_jy[thread_num]->resize(tby);
+ local_jz[thread_num]->resize(tbz);
+
+ jx_ptr = local_jx[thread_num]->dataPtr();
+ jy_ptr = local_jy[thread_num]->dataPtr();
+ jz_ptr = local_jz[thread_num]->dataPtr();
+
+ FArrayBox* local_jx_ptr = local_jx[thread_num].get();
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, b,
+ {
+ local_jx_ptr->setVal(0.0, b, 0, 1);
+ });
+
+ FArrayBox* local_jy_ptr = local_jy[thread_num].get();
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, b,
+ {
+ local_jy_ptr->setVal(0.0, b, 0, 1);
+ });
+
+ FArrayBox* local_jz_ptr = local_jz[thread_num].get();
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, b,
+ {
+ local_jz_ptr->setVal(0.0, b, 0, 1);
+ });
+ jxntot = local_jx[thread_num]->length();
+ jyntot = local_jy[thread_num]->length();
+ jzntot = local_jz[thread_num]->length();
+
+ long ncrse = np - np_current;
+ BL_PROFILE_VAR_START(blp_pxr_cd);
+ warpx_current_deposition(
+ jx_ptr, &ngJ, jxntot,
+ jy_ptr, &ngJ, jyntot,
+ jz_ptr, &ngJ, jzntot,
+ &ncrse,
+ m_xp[thread_num].dataPtr() +np_current,
+ m_yp[thread_num].dataPtr() +np_current,
+ m_zp[thread_num].dataPtr() +np_current,
+ uxp.dataPtr()+np_current,
+ uyp.dataPtr()+np_current,
+ uzp.dataPtr()+np_current,
+ m_giv[thread_num].dataPtr()+np_current,
+ wp.dataPtr()+np_current, &this->charge,
+ &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
+ &dt, &cdx[0], &cdx[1], &cdx[2],
+ &WarpX::nox,&WarpX::noy,&WarpX::noz,
+ &lvect,&WarpX::current_deposition_algo);
+ BL_PROFILE_VAR_STOP(blp_pxr_cd);
+
+ BL_PROFILE_VAR_START(blp_accumulate);
+
+ FArrayBox const* local_jx_const_ptr = local_jx[thread_num].get();
+ FArrayBox* global_jx_ptr = cjx->fabPtr(pti);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx,
+ {
+ global_jx_ptr->atomicAdd(*local_jx_const_ptr, thread_bx, thread_bx, 0, 0, 1);
+ });
+
+ FArrayBox const* local_jy_const_ptr = local_jy[thread_num].get();
+ FArrayBox* global_jy_ptr = cjy->fabPtr(pti);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx,
+ {
+ global_jy_ptr->atomicAdd(*local_jy_const_ptr, thread_bx, thread_bx, 0, 0, 1);
+ });
+
+ FArrayBox const* local_jz_const_ptr = local_jz[thread_num].get();
+ FArrayBox* global_jz_ptr = cjz->fabPtr(pti);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx,
+ {
+ global_jz_ptr->atomicAdd(*local_jz_const_ptr, thread_bx, thread_bx, 0, 0, 1);
+ });
+
+ BL_PROFILE_VAR_STOP(blp_accumulate);
+ }
+};
+
+
+void
+WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
+ MultiFab* rhomf, MultiFab* crhomf, int icomp,
+ const long np_current,
+ const long np, int thread_num, int lev )
+{
+
+ BL_PROFILE_VAR_NS("PICSAR::ChargeDeposition", blp_pxr_chd);
+ BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
+
+ const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev);
+ const long lvect = 8;
+
+ long ngRho = rhomf->nGrow();
+ Real* data_ptr;
+ Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector());
+ const int *rholen;
+
+ const std::array<Real,3>& dx = WarpX::CellSize(lev);
+ const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0));
+
+ // Deposit charge for particles that are not in the current buffers
+ if (np_current > 0)
+ {
+ const std::array<Real, 3>& xyzmin = xyzmin_tile;
+ tile_box.grow(ngRho);
+ local_rho[thread_num]->resize(tile_box);
+ FArrayBox* local_rho_ptr = local_rho[thread_num].get();
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, b,
+ {
+ local_rho_ptr->setVal(0.0, b, 0, 1);
+ });
+
+ data_ptr = local_rho[thread_num]->dataPtr();
+ rholen = local_rho[thread_num]->length();
+#if (AMREX_SPACEDIM == 3)
+ const long nx = rholen[0]-1-2*ngRho;
+ const long ny = rholen[1]-1-2*ngRho;
+ const long nz = rholen[2]-1-2*ngRho;
+#else
+ const long nx = rholen[0]-1-2*ngRho;
+ const long ny = 0;
+ const long nz = rholen[1]-1-2*ngRho;
+#endif
+ BL_PROFILE_VAR_START(blp_pxr_chd);
+ warpx_charge_deposition(data_ptr, &np_current,
+ m_xp[thread_num].dataPtr(),
+ m_yp[thread_num].dataPtr(),
+ m_zp[thread_num].dataPtr(),
+ wp.dataPtr(),
+ &this->charge,
+ &xyzmin[0], &xyzmin[1], &xyzmin[2],
+ &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
+ &ngRho, &ngRho, &ngRho,
+ &WarpX::nox,&WarpX::noy,&WarpX::noz,
+ &lvect, &WarpX::charge_deposition_algo);
+ BL_PROFILE_VAR_STOP(blp_pxr_chd);
+
+ const int ncomp = 1;
+ FArrayBox const* local_fab = local_rho[thread_num].get();
+ FArrayBox* global_fab = rhomf->fabPtr(pti);
+ BL_PROFILE_VAR_START(blp_accumulate);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx,
+ {
+ global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp);
+ });
+ BL_PROFILE_VAR_STOP(blp_accumulate);
+ }
+
+ // Deposit charge for particles that are in the current buffers
+ if (np_current < np)
+ {
+ const IntVect& ref_ratio = WarpX::RefRatio(lev-1);
+ const Box& ctilebox = amrex::coarsen(pti.tilebox(), ref_ratio);
+ const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1);
+
+ tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector());
+ tile_box.grow(ngRho);
+ local_rho[thread_num]->resize(tile_box);
+ FArrayBox* local_rho_ptr = local_rho[thread_num].get();
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, b,
+ {
+ local_rho_ptr->setVal(0.0, b, 0, 1);
+ });
+
+ data_ptr = local_rho[thread_num]->dataPtr();
+ rholen = local_rho[thread_num]->length();
+#if (AMREX_SPACEDIM == 3)
+ const long nx = rholen[0]-1-2*ngRho;
+ const long ny = rholen[1]-1-2*ngRho;
+ const long nz = rholen[2]-1-2*ngRho;
+#else
+ const long nx = rholen[0]-1-2*ngRho;
+ const long ny = 0;
+ const long nz = rholen[1]-1-2*ngRho;
+#endif
+
+ long ncrse = np - np_current;
+ BL_PROFILE_VAR_START(blp_pxr_chd);
+ warpx_charge_deposition(data_ptr, &ncrse,
+ m_xp[thread_num].dataPtr() + np_current,
+ m_yp[thread_num].dataPtr() + np_current,
+ m_zp[thread_num].dataPtr() + np_current,
+ wp.dataPtr() + np_current,
+ &this->charge,
+ &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
+ &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz,
+ &ngRho, &ngRho, &ngRho,
+ &WarpX::nox,&WarpX::noy,&WarpX::noz,
+ &lvect, &WarpX::charge_deposition_algo);
+ BL_PROFILE_VAR_STOP(blp_pxr_chd);
+
+ const int ncomp = 1;
+ FArrayBox const* local_fab = local_rho[thread_num].get();
+ FArrayBox* global_fab = crhomf->fabPtr(pti);
+ BL_PROFILE_VAR_START(blp_accumulate);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx,
+ {
+ global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp);
+ });
+ BL_PROFILE_VAR_STOP(blp_accumulate);
+ }
+};
+
void
WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, bool local)
{
@@ -172,32 +525,32 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho,
// each level deposits it's own particles
const int ng = rho[0]->nGrow();
- for (int lev = 0; lev < num_levels; ++lev) {
+ for (int lev = 0; lev < num_levels; ++lev) {
rho[lev]->setVal(0.0, ng);
const auto& gm = m_gdb->Geom(lev);
const auto& ba = m_gdb->ParticleBoxArray(lev);
const auto& dm = m_gdb->DistributionMap(lev);
-
+
const Real* dx = gm.CellSize();
const Real* plo = gm.ProbLo();
BoxArray nba = ba;
nba.surroundingNodes();
-
+
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) {
const Box& box = nba[pti];
-
+
auto& wp = pti.GetAttribs(PIdx::w);
const auto& particles = pti.GetArrayOfStructs();
int nstride = particles.dataShape().first;
const long np = pti.numParticles();
-
+
FArrayBox& rhofab = (*rho[lev])[pti];
-
- WRPX_DEPOSIT_CIC(particles.data(), nstride, np,
- wp.data(), &this->charge,
- rhofab.dataPtr(), box.loVect(), box.hiVect(),
+
+ WRPX_DEPOSIT_CIC(particles.dataPtr(), nstride, np,
+ wp.dataPtr(), &this->charge,
+ rhofab.dataPtr(), box.loVect(), box.hiVect(),
plo, dx, &ng);
}
@@ -211,12 +564,12 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho,
const DistributionMapping& fine_dm = rho[lev+1]->DistributionMap();
BoxArray coarsened_fine_BA = fine_BA;
coarsened_fine_BA.coarsen(m_gdb->refRatio(lev));
-
+
MultiFab coarsened_fine_data(coarsened_fine_BA, fine_dm, 1, 0);
coarsened_fine_data.setVal(0.0);
-
+
IntVect ratio(AMREX_D_DECL(2, 2, 2)); // FIXME
-
+
for (MFIter mfi(coarsened_fine_data); mfi.isValid(); ++mfi) {
const Box& bx = mfi.validbox();
const Box& crse_box = coarsened_fine_data[mfi].box();
@@ -225,7 +578,7 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho,
coarsened_fine_data[mfi].dataPtr(), crse_box.loVect(), crse_box.hiVect(),
(*rho[lev+1])[mfi].dataPtr(), fine_box.loVect(), fine_box.hiVect());
}
-
+
rho[lev]->copy(coarsened_fine_data, m_gdb->Geom(lev).periodicity(), FabArrayBase::ADD);
}
}
@@ -250,19 +603,19 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local)
#pragma omp parallel
#endif
{
- Vector<Real> xp, yp, zp;
+ Cuda::DeviceVector<Real> xp, yp, zp;
FArrayBox local_rho;
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
const Box& box = pti.validbox();
-
+
auto& wp = pti.GetAttribs(PIdx::w);
-
+
const long np = pti.numParticles();
pti.GetPosition(xp, yp, zp);
-
+
const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev);
const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
@@ -298,26 +651,26 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local)
long nyg = ng;
long nzg = ng;
long lvect = 8;
-
+
warpx_charge_deposition(data_ptr,
- &np, xp.data(), yp.data(), zp.data(), wp.data(),
- &this->charge, &xyzmin[0], &xyzmin[1], &xyzmin[2],
+ &np,
+ xp.dataPtr(),
+ yp.dataPtr(),
+ zp.dataPtr(), wp.dataPtr(),
+ &this->charge, &xyzmin[0], &xyzmin[1], &xyzmin[2],
&dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
&nxg, &nyg, &nzg, &WarpX::nox,&WarpX::noy,&WarpX::noz,
&lvect, &WarpX::charge_deposition_algo);
-
+
#ifdef _OPENMP
- const Box& fabbox = rhofab.box();
- const int ncomp = 1;
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho),
- BL_TO_FORTRAN_3D(rhofab), ncomp);
+ rhofab.atomicAdd(local_rho);
#endif
}
-
+
}
if (!local) rho->SumBoundary(gm.periodicity());
-
+
return rho;
}
@@ -327,7 +680,7 @@ Real WarpXParticleContainer::sumParticleCharge(bool local) {
for (int lev = 0; lev < finestLevel(); ++lev)
{
-
+
#ifdef _OPENMP
#pragma omp parallel reduction(+:total_charge)
#endif
@@ -356,7 +709,7 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) {
amrex::Real inv_clight_sq = 1.0/PhysConst::c/PhysConst::c;
for (int lev = 0; lev <= finestLevel(); ++lev) {
-
+
#ifdef _OPENMP
#pragma omp parallel reduction(+:vx_total, vy_total, vz_total, np_total)
#endif
@@ -365,9 +718,9 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) {
auto& ux = pti.GetAttribs(PIdx::ux);
auto& uy = pti.GetAttribs(PIdx::uy);
auto& uz = pti.GetAttribs(PIdx::uz);
-
+
np_total += pti.numParticles();
-
+
for (unsigned long i = 0; i < ux.size(); i++) {
Real usq = (ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i])*inv_clight_sq;
Real gaminv = 1.0/std::sqrt(1.0 + usq);
@@ -377,14 +730,14 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) {
}
}
}
-
+
if (!local) {
ParallelDescriptor::ReduceRealSum(vx_total);
ParallelDescriptor::ReduceRealSum(vy_total);
ParallelDescriptor::ReduceRealSum(vz_total);
ParallelDescriptor::ReduceLongSum(np_total);
}
-
+
std::array<Real, 3> mean_v;
if (np_total > 0) {
mean_v[0] = vx_total / np_total;
@@ -415,7 +768,7 @@ Real WarpXParticleContainer::maxParticleVelocity(bool local) {
}
}
}
-
+
if (!local) ParallelDescriptor::ReduceRealMax(max_v);
return max_v;
}
@@ -427,23 +780,23 @@ WarpXParticleContainer::PushXES (Real dt)
int num_levels = finestLevel() + 1;
- for (int lev = 0; lev < num_levels; ++lev) {
+ for (int lev = 0; lev < num_levels; ++lev) {
const auto& gm = m_gdb->Geom(lev);
const RealBox& prob_domain = gm.ProbDomain();
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) {
auto& particles = pti.GetArrayOfStructs();
- int nstride = particles.dataShape().first;
+ int nstride = particles.dataShape().first;
const long np = pti.numParticles();
-
- auto& attribs = pti.GetAttribs();
+
+ auto& attribs = pti.GetAttribs();
auto& uxp = attribs[PIdx::ux];
auto& uyp = attribs[PIdx::uy];
auto& uzp = attribs[PIdx::uz];
-
- WRPX_PUSH_LEAPFROG_POSITIONS(particles.data(), nstride, np,
- uxp.data(), uyp.data(),
+
+ WRPX_PUSH_LEAPFROG_POSITIONS(particles.dataPtr(), nstride, np,
+ uxp.dataPtr(), uyp.dataPtr(),
#if AMREX_SPACEDIM == 3
- uzp.data(),
+ uzp.dataPtr(),
#endif
&dt,
prob_domain.lo(), prob_domain.hi());
@@ -474,7 +827,7 @@ WarpXParticleContainer::PushX (int lev, Real dt)
#pragma omp parallel
#endif
{
- Vector<Real> xp, yp, zp, giv;
+ Cuda::DeviceVector<Real> xp, yp, zp, giv;
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
@@ -485,26 +838,30 @@ WarpXParticleContainer::PushX (int lev, Real dt)
auto& uxp = attribs[PIdx::ux];
auto& uyp = attribs[PIdx::uy];
auto& uzp = attribs[PIdx::uz];
-
+
const long np = pti.numParticles();
-
+
giv.resize(np);
-
+
//
// copy data from particle container to temp arrays
//
BL_PROFILE_VAR_START(blp_copy);
pti.GetPosition(xp, yp, zp);
BL_PROFILE_VAR_STOP(blp_copy);
-
+
//
// Particle Push
//
BL_PROFILE_VAR_START(blp_pxr_pp);
- warpx_particle_pusher_positions(&np, xp.data(), yp.data(), zp.data(),
- uxp.data(), uyp.data(), uzp.data(), giv.data(), &dt);
+ warpx_particle_pusher_positions(&np,
+ xp.dataPtr(),
+ yp.dataPtr(),
+ zp.dataPtr(),
+ uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+ giv.dataPtr(), &dt);
BL_PROFILE_VAR_STOP(blp_pxr_pp);
-
+
//
// copy particle data back
//
@@ -515,9 +872,12 @@ WarpXParticleContainer::PushX (int lev, Real dt)
if (cost) {
const Box& tbx = pti.tilebox();
wt = (amrex::second() - wt) / tbx.d_numPts();
- (*cost)[pti].plus(wt, tbx);
+ FArrayBox* costfab = cost->fabPtr(pti);
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box,
+ {
+ costfab->plus(wt, work_box);
+ });
}
}
}
}
-
diff --git a/Source/WarpXRegrid.cpp b/Source/WarpXRegrid.cpp
index d21e2ad9c..8d7873041 100644
--- a/Source/WarpXRegrid.cpp
+++ b/Source/WarpXRegrid.cpp
@@ -40,6 +40,7 @@ WarpX::RemakeLevel (int lev, Real time, const BoxArray& ba, const DistributionMa
// Fine patch
+ const auto& period = Geom(lev).periodicity();
for (int idim=0; idim < 3; ++idim)
{
{
@@ -60,8 +61,8 @@ WarpX::RemakeLevel (int lev, Real time, const BoxArray& ba, const DistributionMa
const IntVect& ng = current_fp[lev][idim]->nGrowVect();
auto pmf = std::unique_ptr<MultiFab>(new MultiFab(current_fp[lev][idim]->boxArray(),
dm, 1, ng));
- // pmf->Redistribute(*current_fp[lev][idim], 0, 0, 1, ng);
current_fp[lev][idim] = std::move(pmf);
+ current_fp_owner_masks[lev][idim] = std::move(current_fp[lev][idim]->OwnerMask(period));
}
if (current_store[lev][idim])
{
@@ -86,8 +87,8 @@ WarpX::RemakeLevel (int lev, Real time, const BoxArray& ba, const DistributionMa
const IntVect& ng = rho_fp[lev]->nGrowVect();
auto pmf = std::unique_ptr<MultiFab>(new MultiFab(rho_fp[lev]->boxArray(),
dm, nc, ng));
- // pmf->Redistribute(*rho_fp[lev], 0, 0, nc, ng);
rho_fp[lev] = std::move(pmf);
+ rho_fp_owner_masks[lev] = std::move(rho_fp[lev]->OwnerMask(period));
}
// Aux patch
@@ -120,6 +121,7 @@ WarpX::RemakeLevel (int lev, Real time, const BoxArray& ba, const DistributionMa
// Coarse patch
if (lev > 0) {
+ const auto& cperiod = Geom(lev-1).periodicity();
for (int idim=0; idim < 3; ++idim)
{
{
@@ -138,10 +140,11 @@ WarpX::RemakeLevel (int lev, Real time, const BoxArray& ba, const DistributionMa
}
{
const IntVect& ng = current_cp[lev][idim]->nGrowVect();
- auto pmf = std::unique_ptr<MultiFab>(new MultiFab(current_cp[lev][idim]->boxArray(),
- dm, 1, ng));
- // pmf->Redistribute(*current_cp[lev][idim], 0, 0, 1, ng);
+ auto pmf = std::unique_ptr<MultiFab>( new MultiFab(current_cp[lev][idim]->boxArray(),
+ dm, 1, ng));
current_cp[lev][idim] = std::move(pmf);
+ current_cp_owner_masks[lev][idim] = std::move(
+ current_cp[lev][idim]->OwnerMask(cperiod));
}
}
@@ -158,8 +161,8 @@ WarpX::RemakeLevel (int lev, Real time, const BoxArray& ba, const DistributionMa
const IntVect& ng = rho_cp[lev]->nGrowVect();
auto pmf = std::unique_ptr<MultiFab>(new MultiFab(rho_cp[lev]->boxArray(),
dm, nc, ng));
- // pmf->Redistribute(*rho_cp[lev], 0, 0, nc, ng);
rho_cp[lev] = std::move(pmf);
+ rho_cp_owner_masks[lev] = std::move(rho_cp[lev]->OwnerMask(cperiod));
}
}
diff --git a/Source/WarpXUtil.cpp b/Source/WarpXUtil.cpp
index eb0eec92b..4a884330a 100644
--- a/Source/WarpXUtil.cpp
+++ b/Source/WarpXUtil.cpp
@@ -38,9 +38,8 @@ void ReadBoostedFrameParameters(Real& gamma_boost, Real& beta_boost,
void ConvertLabParamsToBoost()
{
-
- Real gamma_boost, beta_boost;
- int max_level;
+ Real gamma_boost = 1., beta_boost = 0.;
+ int max_level = 0;
Vector<int> boost_direction {0,0,0};
ReadBoostedFrameParameters(gamma_boost, beta_boost, boost_direction);
diff --git a/Source/WarpXWrappers.cpp b/Source/WarpXWrappers.cpp
index 198cd818d..6ee06a7d5 100644
--- a/Source/WarpXWrappers.cpp
+++ b/Source/WarpXWrappers.cpp
@@ -56,14 +56,12 @@ extern "C"
bool warpx_use_fdtd_nci_corr()
{
- auto & mypc = WarpX::GetInstance().GetPartContainer();
- return mypc.Use_fdtd_nci_corr();
+ return WarpX::use_fdtd_nci_corr;
}
int warpx_l_lower_order_in_v()
{
- auto & mypc = WarpX::GetInstance().GetPartContainer();
- return mypc.L_lower_order_in_v();
+ return WarpX::l_lower_order_in_v;
}
int warpx_nComps()
diff --git a/Source/WarpX_f.H b/Source/WarpX_f.H
index e709c1332..578785a14 100644
--- a/Source/WarpX_f.H
+++ b/Source/WarpX_f.H
@@ -180,6 +180,22 @@ extern "C"
void parse_function_laser( const long* np, amrex::Real* Xp, amrex::Real* Yp, amrex::Real* t, amrex::Real* amplitude, const int parser_instance_number );
+ void calculate_laser_plane_coordinates( const long* np,
+ amrex::Real* xp, amrex::Real* yp, amrex::Real* zp,
+ amrex::Real* plane_Xp, amrex::Real* plane_Yp,
+ amrex::Real* u_Xx, amrex::Real* u_Xy, amrex::Real* u_Xz,
+ amrex::Real* u_Yx, amrex::Real* u_Yy, amrex::Real* u_Yz,
+ amrex::Real* positionx, amrex::Real* positiony, amrex::Real* positionz );
+
+ void update_laser_particle( const long* np,
+ amrex::Real* xp, amrex::Real* yp, amrex::Real* zp,
+ amrex::Real* uxp, amrex::Real* uyp, amrex::Real* uzp,
+ amrex::Real* giv, amrex::Real* wp, amrex::Real* amplitude_E,
+ amrex::Real* p_Xx, amrex::Real* p_Xy, amrex::Real* p_Xz,
+ amrex::Real* nvecx, amrex::Real* nvecy, amrex::Real* nvecz,
+ amrex::Real* mobility, amrex::Real* dt, const amrex::Real* c,
+ const amrex::Real* beta_boost, const amrex::Real* gamma_boost );
+
// Maxwell solver
void warpx_push_evec(
diff --git a/Source/WarpX_fft.F90 b/Source/WarpX_fft.F90
index 8d697d184..e605b2be0 100644
--- a/Source/WarpX_fft.F90
+++ b/Source/WarpX_fft.F90
@@ -198,7 +198,7 @@ contains
call c_f_pointer(fft_data(21), rhof, shp)
fft_data(22) = fftw_alloc_complex(sz)
call c_f_pointer(fft_data(22), rhooldf, shp)
-
+!$acc enter data create (exf,eyf,ezf,bxf,byf,bzf,jxf,jyf,jzf,rhof,rhooldf)
if (ndata < 22) then
call amrex_abort("size of fft_data is too small")
end if
diff --git a/Source/WarpX_laser.F90 b/Source/WarpX_laser.F90
index a99cec8c7..b08413019 100644
--- a/Source/WarpX_laser.F90
+++ b/Source/WarpX_laser.F90
@@ -1,3 +1,10 @@
+#if AMREX_USE_GPU && __PGIC__ <= 18 && __PGIC_MINOR__ <= 7
+#define USE_ACC_LASER_SUBROUTINE
+! Define a separate subroutine that contains the openacc pragmas
+! This is because fortran subroutines containing openacc pramas
+! ignore the `bind(C,` statement, and so cannot directly be called from C++
+! (at least until pgi version 18.7)
+#endif
module warpx_laser_module
@@ -27,6 +34,31 @@ contains
stc_exponent, stcfactor
complex*16, parameter :: j=cmplx(0., 1.)
+#ifdef USE_ACC_LASER_SUBROUTINE
+ call warpx_gaussian_laser_acc( np, Xp, Yp, t, &
+ wavelength, e_max, waist, duration, t_peak, f, amplitude, &
+ zeta, beta, phi2, theta_stc )
+
+ end subroutine warpx_gaussian_laser
+
+ subroutine warpx_gaussian_laser_acc( np, Xp, Yp, t, &
+ wavelength, e_max, waist, duration, t_peak, f, amplitude, &
+ zeta, beta, phi2, theta_stc )
+
+ integer(c_long), intent(in) :: np
+ real(amrex_real), intent(in) :: Xp(np),Yp(np)
+ real(amrex_real), intent(in) :: e_max, t, t_peak, wavelength, duration, f, waist
+ real(amrex_real), intent(in) :: zeta, beta, phi2, theta_stc
+ real(amrex_real), intent(inout) :: amplitude(np)
+
+ integer(c_long) :: i
+ real(amrex_real) :: k0, oscillation_phase, inv_tau2
+ complex*16 :: diffract_factor, exp_argument, prefactor, &
+ inv_complex_waist_2, stretch_factor, &
+ stc_exponent, stcfactor
+ complex*16, parameter :: j=cmplx(0., 1.)
+#endif
+
! This function uses the complex expression of a Gaussian laser
! (Including Gouy phase and laser oscillations)
@@ -38,13 +70,13 @@ contains
! laser diffraction, and phase front curvature
diffract_factor = 1 + j * f * 2./(k0*waist**2)
inv_complex_waist_2 = 1./( waist**2 * diffract_factor )
-
+
! Time stretching due to STCs and phi2 complex envelope
! (1 if zeta=0, beta=0, phi2=0)
- stretch_factor = 1. + \
- 4*(zeta + beta*f)**2 * (inv_tau2*inv_complex_waist_2) + \
+ stretch_factor = 1. + &
+ 4*(zeta + beta*f)**2 * (inv_tau2*inv_complex_waist_2) + &
2*j*(phi2 - beta**2*k0*f) * inv_tau2
-
+
! Amplitude and monochromatic oscillations
prefactor = e_max * exp( j * oscillation_phase )
@@ -58,21 +90,29 @@ contains
#endif
! Loop through the macroparticle to calculate the proper amplitude
+ !$acc parallel deviceptr(amplitude, Xp, Yp)
+ !$acc loop
do i = 1, np
! Exp argument for the temporal gaussian envelope + STCs
- stc_exponent = 1./stretch_factor * inv_tau2 * \
- (t - t_peak - beta*k0*(Xp(i)*cos(theta_stc) + Yp(i)*sin(theta_stc)) -\
- 2*j*(Xp(i)*cos(theta_stc) + Yp(i)*sin(theta_stc))*( zeta - beta*f ) *\
- inv_complex_waist_2)**2
- ! stcfactor = everything but complex transverse envelope
+ stc_exponent = 1./stretch_factor * inv_tau2 * &
+ (t - t_peak - beta*k0*(Xp(i)*cos(theta_stc) + Yp(i)*sin(theta_stc)) - &
+ 2*j*(Xp(i)*cos(theta_stc) + Yp(i)*sin(theta_stc))*( zeta - beta*f ) * &
+ inv_complex_waist_2)**2
+ ! stcfactor = everything but complex transverse envelope
stcfactor = prefactor * exp( - stc_exponent )
! Exp argument for transverse envelope
exp_argument = - ( Xp(i)*Xp(i) + Yp(i)*Yp(i) ) * inv_complex_waist_2
! stcfactor + transverse envelope
amplitude(i) = DREAL( stcfactor * exp( exp_argument ) )
- enddo
+ enddo
+ !$acc end loop
+ !$acc end parallel
+#ifdef USE_ACC_LASER_SUBROUTINE
+ end subroutine warpx_gaussian_laser_acc
+#else
end subroutine warpx_gaussian_laser
+#endif
! Harris function for the laser temporal profile
subroutine warpx_harris_laser( np, Xp, Yp, t, &
@@ -88,6 +128,25 @@ contains
real(amrex_real) :: space_envelope, time_envelope, arg_osc, arg_env
real(amrex_real) :: omega0, zR, wz, inv_Rz, oscillations, inv_wz_2
+#ifdef USE_ACC_LASER_SUBROUTINE
+ call warpx_harris_laser_acc( np, Xp, Yp, t, &
+ wavelength, e_max, waist, duration, f, amplitude )
+
+ end subroutine warpx_harris_laser
+
+ subroutine warpx_harris_laser_acc( np, Xp, Yp, t, &
+ wavelength, e_max, waist, duration, f, amplitude )
+
+ integer(c_long), intent(in) :: np
+ real(amrex_real), intent(in) :: Xp(np),Yp(np)
+ real(amrex_real), intent(in) :: e_max, t, wavelength, duration, f, waist
+ real(amrex_real), intent(inout) :: amplitude(np)
+
+ integer(c_long) :: i
+ real(amrex_real) :: space_envelope, time_envelope, arg_osc, arg_env
+ real(amrex_real) :: omega0, zR, wz, inv_Rz, oscillations, inv_wz_2
+#endif
+
! This function uses the Harris function as the temporal profile of the pulse
omega0 = 2*pi*clight/wavelength
zR = pi * waist**2 / wavelength
@@ -110,14 +169,22 @@ contains
end if
! Loop through the macroparticle to calculate the proper amplitude
+ !$acc parallel deviceptr(amplitude, Xp, Yp)
+ !$acc loop
do i = 1, np
space_envelope = exp(- ( Xp(i)*Xp(i) + Yp(i)*Yp(i) ) * inv_wz_2)
arg_osc = omega0*t - omega0/clight*(Xp(i)*Xp(i) + Yp(i)*Yp(i)) * inv_Rz / 2
oscillations = cos(arg_osc)
amplitude(i) = e_max * time_envelope * space_envelope * oscillations
enddo
+ !$acc end loop
+ !$acc end parallel
+#ifdef USE_ACC_LASER_SUBROUTINE
+ end subroutine warpx_harris_laser_acc
+#else
end subroutine warpx_harris_laser
+#endif
! Parse function from the input script for the laser temporal profile
subroutine parse_function_laser( np, Xp, Yp, t, amplitude, parser_instance_number ) bind(C, name="parse_function_laser")
@@ -136,4 +203,135 @@ contains
enddo
end subroutine parse_function_laser
+
+ subroutine calculate_laser_plane_coordinates(np, xp, yp, zp, &
+ plane_Xp, plane_Yp, u_Xx, u_Xy, u_Xz, u_Yx, u_Yy, u_Yz, &
+ positionx, positiony, positionz ) &
+ bind(C, name="calculate_laser_plane_coordinates")
+ integer(c_long), intent(in) :: np
+ real(amrex_real), intent(in) :: xp(np), yp(np), zp(np)
+ real(amrex_real), intent(in) :: u_Xx, u_Xy, u_Xz
+ real(amrex_real), intent(in) :: u_Yx, u_Yy, u_Yz
+ real(amrex_real), intent(in) :: positionx, positiony, positionz
+ real(amrex_real), intent(out) :: plane_Xp(np), plane_Yp(np)
+ integer :: ip
+
+#ifdef USE_ACC_LASER_SUBROUTINE
+ call calculate_laser_plane_coordinates_acc(np, xp, yp, zp, &
+ plane_Xp, plane_Yp, u_Xx, u_Xy, u_Xz, u_Yx, u_Yy, u_Yz, &
+ positionx, positiony, positionz )
+
+ end subroutine calculate_laser_plane_coordinates
+
+ subroutine calculate_laser_plane_coordinates_acc(np, xp, yp, zp, &
+ plane_Xp, plane_Yp, u_Xx, u_Xy, u_Xz, u_Yx, u_Yy, u_Yz, &
+ positionx, positiony, positionz )
+ integer(c_long), intent(in) :: np
+ real(amrex_real), intent(in) :: xp(np), yp(np), zp(np)
+ real(amrex_real), intent(in) :: u_Xx, u_Xy, u_Xz
+ real(amrex_real), intent(in) :: u_Yx, u_Yy, u_Yz
+ real(amrex_real), intent(in) :: positionx, positiony, positionz
+ real(amrex_real), intent(out) :: plane_Xp(np), plane_Yp(np)
+ integer :: ip
+#endif
+
+ !$acc parallel deviceptr(xp, yp, zp, plane_Xp, plane_Yp)
+ !$acc loop
+ do ip = 1, np
+#if (BL_SPACEDIM == 3)
+ plane_Xp(ip) = u_Xx*(xp(ip) - positionx) &
+ + u_Xy*(yp(ip) - positiony) &
+ + u_Xz*(zp(ip) - positionz)
+ plane_Yp(ip) = u_Yx*(xp(ip) - positionx) &
+ + u_Yy*(yp(ip) - positiony) &
+ + u_Yz*(zp(ip) - positionz)
+#elif (AMREX_SPACEDIM == 2)
+ plane_Xp(ip) = u_Xx*(xp(ip) - positionx) &
+ + u_Xz*(zp(ip) - positionz)
+ plane_Yp(ip) = 0
+#endif
+ enddo
+ !$acc end loop
+ !$acc end parallel
+
+#ifdef USE_ACC_LASER_SUBROUTINE
+ end subroutine calculate_laser_plane_coordinates_acc
+#else
+ end subroutine calculate_laser_plane_coordinates
+#endif
+
+ subroutine update_laser_particle( np, &
+ xp, yp, zp, uxp, uyp, uzp, giv, wp, amplitude_E, p_Xx, p_Xy, p_Xz, &
+ nvecx, nvecy, nvecz, mobility, dt, c, beta_boost, gamma_boost ) &
+ bind(C, name="update_laser_particle")
+
+ integer(c_long), intent(in) :: np
+ real(amrex_real), intent(in) :: amplitude_E(np), wp(np)
+ real(amrex_real), intent(inout) :: xp(np), yp(np), zp(np)
+ real(amrex_real), intent(out) :: uxp(np), uyp(np), uzp(np), giv(np)
+ real(amrex_real), intent(in) :: p_Xx, p_Xy, p_Xz
+ real(amrex_real), intent(in) :: nvecx, nvecy, nvecz
+ real(amrex_real), intent(in) :: c, beta_boost, gamma_boost, mobility, dt
+
+#ifdef USE_ACC_LASER_SUBROUTINE
+ call update_laser_particle_acc( np, &
+ xp, yp, zp, uxp, uyp, uzp, giv, wp, amplitude_E, p_Xx, p_Xy, p_Xz, &
+ nvecx, nvecy, nvecz, mobility, dt, c, beta_boost, gamma_boost )
+
+ end subroutine update_laser_particle
+
+ subroutine update_laser_particle_acc( np, &
+ xp, yp, zp, uxp, uyp, uzp, giv, wp, amplitude_E, p_Xx, p_Xy, p_Xz, &
+ nvecx, nvecy, nvecz, mobility, dt, c, beta_boost, gamma_boost )
+
+ integer(c_long), intent(in) :: np
+ real(amrex_real), intent(in) :: amplitude_E(np), wp(np)
+ real(amrex_real), intent(inout) :: xp(np), yp(np), zp(np)
+ real(amrex_real), intent(out) :: uxp(np), uyp(np), uzp(np), giv(np)
+ real(amrex_real), intent(in) :: p_Xx, p_Xy, p_Xz
+ real(amrex_real), intent(in) :: nvecx, nvecy, nvecz
+ real(amrex_real), intent(in) :: c, beta_boost, gamma_boost, mobility, dt
+#endif
+
+ real(amrex_real) :: vx, vy, vz, v_over_c, sign_charge, gamma
+ integer :: ip
+
+ !$acc parallel deviceptr(amplitude_E, wp, xp, yp, zp, uxp, uyp, uzp, giv)
+ !$acc loop
+ do ip = 1, np
+ ! Calculate the velocity according to the amplitude of E
+ sign_charge = SIGN( 1.0_amrex_real, wp(ip) )
+ v_over_c = sign_charge * mobility * amplitude_E(ip)
+ ! The velocity is along the laser polarization p_X
+ vx = c * v_over_c * p_Xx
+ vy = c * v_over_c * p_Xy
+ vz = c * v_over_c * p_Xz
+ ! When running in the boosted-frame, their is additional velocity along nvec
+ if (gamma_boost > 1.) then
+ vx = vx - c * beta_boost * nvecx
+ vy = vy - c * beta_boost * nvecy
+ vz = vz - c * beta_boost * nvecz
+ endif
+ ! Get the corresponding momenta
+ gamma = gamma_boost/sqrt(1 - v_over_c*v_over_c)
+ giv(ip) = 1./gamma
+ uxp(ip) = gamma * vx
+ uyp(ip) = gamma * vy
+ uzp(ip) = gamma * vz
+ ! Push the the particle positions
+ xp(ip) = xp(ip) + vx * dt
+#if (BL_SPACEDIM == 3)
+ yp(ip) = yp(ip) + vy * dt
+#endif
+ zp(ip) = zp(ip) + vz * dt
+ enddo
+ !$acc end loop
+ !$acc end parallel
+
+#ifdef USE_ACC_LASER_SUBROUTINE
+ end subroutine update_laser_particle_acc
+#else
+ end subroutine update_laser_particle
+#endif
+
end module warpx_laser_module
diff --git a/Source/WarpX_picsar.F90 b/Source/WarpX_picsar.F90
index 91a2f08e7..13168ceca 100644
--- a/Source/WarpX_picsar.F90
+++ b/Source/WarpX_picsar.F90
@@ -294,7 +294,8 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n
jy,jy_nguards,jy_nvalid, &
jz,jz_nguards,jz_nvalid, &
np,xp,yp,zp,uxp,uyp,uzp,gaminv,w,q, &
- xmin,zmin,dt,dx,dz,nox,noz,lvect)
+ xmin,zmin,dt,dx,dz,nox,noz,lvect, &
+ current_depo_algo)
#endif
end subroutine warpx_current_deposition
diff --git a/Tools/parallel_postproc.py b/Tools/parallel_postproc.py
new file mode 100644
index 000000000..0942118ac
--- /dev/null
+++ b/Tools/parallel_postproc.py
@@ -0,0 +1,83 @@
+import matplotlib
+matplotlib.use('Agg')
+from mpi4py import MPI
+import glob, read_raw_data
+import numpy as np
+import scipy.constants as scc
+import matplotlib.pyplot as plt
+
+species = 'electrons'
+fieldname = 'Ey'
+lambda0 = .81e-6
+
+# --- custom functions --- #
+# ------------------------ #
+omega0 = 2*np.pi*scc.c/lambda0
+# Read field fieldname and return normalized max
+def get_a0(res_dir, snapshot):
+ header = res_dir + '/Header'
+ print( snapshot )
+ allrd, info = read_raw_data.read_lab_snapshot(snapshot, header)
+ F = allrd[ fieldname ]
+ return info['z'][-1], np.max(np.abs(F)) * scc.e/(scc.m_e*omega0*scc.c)
+# Convert elements of a list to numpy arrays
+def convert_to_np_array(list_in):
+ list_out = []
+ for elem in list_in:
+ list_out.append( np.array( elem ) )
+ return list_out
+
+# --- MPI parallelization --- #
+# --------------------------- #
+# Get ordered list of snapshot files
+res_dir = './lab_frame_data/';
+file_list = glob.glob(res_dir + '/snapshot?????')
+file_list.sort()
+# Total number of files
+nfiles = len(file_list)
+# Each file has a unique number
+number_list = range(nfiles)
+comm_world = MPI.COMM_WORLD
+me = comm_world.Get_rank()
+nrank = comm_world.Get_size()
+# List of files to process for current proc
+my_list = file_list[ (me*nfiles)/nrank : ((me+1)*nfiles)/nrank ]
+# List if file numbers for current proc
+my_number_list = number_list[ (me*nfiles)/nrank : ((me+1)*nfiles)/nrank ]
+
+# --- Run parallel analysis --- #
+# ----------------------------- #
+# Each MPI rank reads roughly (nb snapshots)/(nb ranks) snapshots.
+# Works with any number of snapshots.
+for count, filename in enumerate(my_list):
+ zwin, a0 = get_a0( res_dir, filename )
+ uzlab = read_raw_data.get_particle_field(filename, species, 'uz')/scc.c
+ select_particles = (uzlab > 5.)
+ uzlab = uzlab[ select_particles ]
+ uzmean = np.mean(uzlab)
+
+# --- gather and rank 0 plots --- #
+# ------------------------------- #
+# Gather particle quantities to rank 0 to plot history of quantities.
+UZMEAN = comm_world.gather(uzmean, root=0)
+ZWIN = comm_world.gather(zwin, root=0)
+A0 = comm_world.gather(a0, root=0)
+# Rank 0 does the plot.
+if me == 0:
+ # Convert to numpy arrays
+ UZMEAN, ZWIN, A0 = convert_to_np_array([UZMEAN, ZWIN, A0])
+ # Plot and save
+ fig = plt.figure()
+ plt.subplot(2,1,1)
+ plt.plot(ZWIN*1.e3, UZMEAN)
+ plt.xlabel('z (mm)')
+ plt.ylabel('uz')
+ plt.title( 'beam energy' )
+ plt.grid()
+ plt.subplot(2,1,2)
+ plt.plot(ZWIN*1.e3, A0)
+ plt.ylabel('a0')
+ plt.title( 'beam propag angle' )
+ plt.grid()
+ fig.savefig('./image.png', bbox_inches='tight')
+ plt.close()