aboutsummaryrefslogtreecommitdiff
path: root/Source
diff options
context:
space:
mode:
Diffstat (limited to 'Source')
-rw-r--r--Source/.DS_Storebin6148 -> 0 bytes
-rw-r--r--Source/BoundaryConditions/PML.H30
-rw-r--r--Source/BoundaryConditions/PML.cpp149
-rw-r--r--Source/Diagnostics/ParticleIO.cpp2
-rw-r--r--Source/Evolve/WarpXEvolveEM.cpp28
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralAlgorithms/Make.package2
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.H34
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.cpp146
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H14
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithm.H4
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralFieldData.H19
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp6
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralSolver.H3
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralSolver.cpp24
-rw-r--r--Source/FieldSolver/WarpXPushFieldsEM.cpp215
-rw-r--r--Source/FortranInterface/WarpX_f.H48
-rw-r--r--Source/FortranInterface/WarpX_picsar.F90389
-rw-r--r--Source/Initialization/CustomDensityProb.H49
-rw-r--r--Source/Initialization/CustomDensityProb.cpp12
-rw-r--r--Source/Initialization/CustomMomentumProb.H30
-rw-r--r--Source/Initialization/CustomMomentumProb.cpp14
-rw-r--r--Source/Initialization/InjectorDensity.H202
-rw-r--r--Source/Initialization/InjectorDensity.cpp77
-rw-r--r--Source/Initialization/InjectorMomentum.H223
-rw-r--r--Source/Initialization/InjectorMomentum.cpp40
-rw-r--r--Source/Initialization/InjectorPosition.H146
-rw-r--r--Source/Initialization/Make.package15
-rw-r--r--Source/Initialization/PlasmaInjector.H293
-rw-r--r--Source/Initialization/PlasmaInjector.cpp369
-rw-r--r--Source/Initialization/PlasmaProfiles.cpp41
-rw-r--r--Source/Initialization/WarpXInitData.cpp43
-rw-r--r--Source/Laser/LaserParticleContainer.cpp26
-rw-r--r--Source/Make.WarpX33
-rw-r--r--Source/Parser/GpuParser.H72
-rw-r--r--Source/Parser/GpuParser.cpp73
-rw-r--r--Source/Parser/Make.package2
-rw-r--r--Source/Parser/WarpXParser.H4
-rw-r--r--Source/Parser/wp_parser_c.h122
-rw-r--r--Source/Parser/wp_parser_y.c129
-rw-r--r--Source/Parser/wp_parser_y.h22
-rwxr-xr-xSource/Particles/Deposition/ChargeDeposition.H97
-rw-r--r--Source/Particles/Deposition/CurrentDeposition.H624
-rw-r--r--Source/Particles/Deposition/Make.package1
-rw-r--r--Source/Particles/Gather/FieldGather.H216
-rw-r--r--Source/Particles/Gather/Make.package3
-rw-r--r--Source/Particles/Make.package2
-rw-r--r--Source/Particles/MultiParticleContainer.H5
-rw-r--r--Source/Particles/MultiParticleContainer.cpp31
-rw-r--r--Source/Particles/PhysicalParticleContainer.H51
-rw-r--r--Source/Particles/PhysicalParticleContainer.cpp1323
-rw-r--r--Source/Particles/Pusher/GetAndSetPosition.H6
-rw-r--r--Source/Particles/Pusher/Make.package2
-rw-r--r--Source/Particles/Pusher/UpdateMomentumBoris.H47
-rw-r--r--Source/Particles/Pusher/UpdateMomentumVay.H54
-rw-r--r--Source/Particles/Pusher/UpdatePosition.H2
-rw-r--r--Source/Particles/RigidInjectedParticleContainer.H3
-rw-r--r--Source/Particles/RigidInjectedParticleContainer.cpp255
-rw-r--r--Source/Particles/ShapeFactors.H117
-rw-r--r--Source/Particles/WarpXParticleContainer.H17
-rw-r--r--Source/Particles/WarpXParticleContainer.cpp308
-rw-r--r--Source/Utils/WarpXAlgorithmSelection.H6
-rw-r--r--Source/Utils/WarpXAlgorithmSelection.cpp7
-rw-r--r--Source/WarpX.H20
-rw-r--r--Source/WarpX.cpp76
64 files changed, 3765 insertions, 2658 deletions
diff --git a/Source/.DS_Store b/Source/.DS_Store
deleted file mode 100644
index 01640e062..000000000
--- a/Source/.DS_Store
+++ /dev/null
Binary files differ
diff --git a/Source/BoundaryConditions/PML.H b/Source/BoundaryConditions/PML.H
index 0cf367284..b34cbe88b 100644
--- a/Source/BoundaryConditions/PML.H
+++ b/Source/BoundaryConditions/PML.H
@@ -6,6 +6,10 @@
#include <AMReX_MultiFab.H>
#include <AMReX_Geometry.H>
+#ifdef WARPX_USE_PSATD
+#include <SpectralSolver.H>
+#endif
+
#if (AMREX_SPACEDIM == 3)
#define WRPX_PML_TO_FORTRAN(x) \
@@ -93,7 +97,12 @@ class PML
public:
PML (const amrex::BoxArray& ba, const amrex::DistributionMapping& dm,
const amrex::Geometry* geom, const amrex::Geometry* cgeom,
- int ncell, int delta, int ref_ratio, int do_dive_cleaning, int do_moving_window);
+ int ncell, int delta, int ref_ratio,
+#ifdef WARPX_USE_PSATD
+ amrex::Real dt, int nox_fft, int noy_fft, int noz_fft, bool do_nodal,
+#endif
+ int do_dive_cleaning, int do_moving_window,
+ const amrex::IntVect do_pml_Lo, const amrex::IntVect do_pml_Hi);
void ComputePMLFactors (amrex::Real dt);
@@ -111,6 +120,10 @@ public:
const MultiSigmaBox& GetMultiSigmaBox_cp () const
{ return *sigba_cp; }
+#ifdef WARPX_USE_PSATD
+ void PushPSATD ();
+#endif
+
void ExchangeB (const std::array<amrex::MultiFab*,3>& B_fp,
const std::array<amrex::MultiFab*,3>& B_cp);
void ExchangeE (const std::array<amrex::MultiFab*,3>& E_fp,
@@ -154,10 +167,23 @@ private:
std::unique_ptr<MultiSigmaBox> sigba_fp;
std::unique_ptr<MultiSigmaBox> sigba_cp;
+#ifdef WARPX_USE_PSATD
+ std::unique_ptr<SpectralSolver> spectral_solver_fp;
+ std::unique_ptr<SpectralSolver> spectral_solver_cp;
+#endif
+
static amrex::BoxArray MakeBoxArray (const amrex::Geometry& geom,
- const amrex::BoxArray& grid_ba, int ncell);
+ const amrex::BoxArray& grid_ba, int ncell,
+ const amrex::IntVect do_pml_Lo,
+ const amrex::IntVect do_pml_Hi);
static void Exchange (amrex::MultiFab& pml, amrex::MultiFab& reg, const amrex::Geometry& geom);
};
+#ifdef WARPX_USE_PSATD
+void PushPMLPSATDSinglePatch( SpectralSolver& solver,
+ std::array<std::unique_ptr<amrex::MultiFab>,3>& pml_E,
+ std::array<std::unique_ptr<amrex::MultiFab>,3>& pml_B );
+#endif
+
#endif
diff --git a/Source/BoundaryConditions/PML.cpp b/Source/BoundaryConditions/PML.cpp
index f780f335c..21d348482 100644
--- a/Source/BoundaryConditions/PML.cpp
+++ b/Source/BoundaryConditions/PML.cpp
@@ -258,14 +258,7 @@ SigmaBox::ComputePMLFactorsB (const Real* dx, Real dt)
{
for (int i = 0, N = sigma_star[idim].size(); i < N; ++i)
{
- if (sigma_star[idim][i] == 0.0)
- {
- sigma_star_fac[idim][i] = 1.0;
- }
- else
- {
- sigma_star_fac[idim][i] = std::exp(-sigma_star[idim][i]*dt);
- }
+ sigma_star_fac[idim][i] = std::exp(-sigma_star[idim][i]*dt);
}
}
}
@@ -277,14 +270,7 @@ SigmaBox::ComputePMLFactorsE (const Real* dx, Real dt)
{
for (int i = 0, N = sigma[idim].size(); i < N; ++i)
{
- if (sigma[idim][i] == 0.0)
- {
- sigma_fac[idim][i] = 1.0;
- }
- else
- {
- sigma_fac[idim][i] = std::exp(-sigma[idim][i]*dt);
- }
+ sigma_fac[idim][i] = std::exp(-sigma[idim][i]*dt);
}
}
}
@@ -329,11 +315,16 @@ MultiSigmaBox::ComputePMLFactorsE (const Real* dx, Real dt)
PML::PML (const BoxArray& grid_ba, const DistributionMapping& grid_dm,
const Geometry* geom, const Geometry* cgeom,
- int ncell, int delta, int ref_ratio, int do_dive_cleaning, int do_moving_window)
+ int ncell, int delta, int ref_ratio,
+#ifdef WARPX_USE_PSATD
+ Real dt, int nox_fft, int noy_fft, int noz_fft, bool do_nodal,
+#endif
+ int do_dive_cleaning, int do_moving_window,
+ const amrex::IntVect do_pml_Lo, const amrex::IntVect do_pml_Hi)
: m_geom(geom),
m_cgeom(cgeom)
{
- const BoxArray& ba = MakeBoxArray(*geom, grid_ba, ncell);
+ const BoxArray& ba = MakeBoxArray(*geom, grid_ba, ncell, do_pml_Lo, do_pml_Hi);
if (ba.size() == 0) {
m_ok = false;
return;
@@ -343,10 +334,30 @@ PML::PML (const BoxArray& grid_ba, const DistributionMapping& grid_dm,
DistributionMapping dm{ba};
- int nge = 2;
- int ngb = 2;
- int ngf = (do_moving_window) ? 2 : 0;
- if (WarpX::maxwell_fdtd_solver_id == 1) ngf = std::max( ngf, 1 );
+ // Define the number of guard cells in each direction, for E, B, and F
+ IntVect nge = IntVect(AMREX_D_DECL(2, 2, 2));
+ IntVect ngb = IntVect(AMREX_D_DECL(2, 2, 2));
+ int ngf_int = (do_moving_window) ? 2 : 0;
+ if (WarpX::maxwell_fdtd_solver_id == 1) ngf_int = std::max( ngf_int, 1 );
+ IntVect ngf = IntVect(AMREX_D_DECL(ngf_int, ngf_int, ngf_int));
+#ifdef WARPX_USE_PSATD
+ // Increase the number of guard cells, in order to fit the extent
+ // of the stencil for the spectral solver
+ IntVect ngFFT;
+ if (do_nodal) {
+ ngFFT = IntVect(AMREX_D_DECL(nox_fft, noy_fft, noz_fft));
+ } else {
+ ngFFT = IntVect(AMREX_D_DECL(nox_fft/2, noy_fft/2, noz_fft/2));
+ }
+ // Set the number of guard cells to the maximum of each field
+ // (all fields should have the same number of guard cells)
+ ngFFT = ngFFT.max(nge);
+ ngFFT = ngFFT.max(ngb);
+ ngFFT = ngFFT.max(ngf);
+ nge = ngFFT;
+ ngb = ngFFT;
+ ngf = ngFFT;
+ #endif
pml_E_fp[0].reset(new MultiFab(amrex::convert(ba,WarpX::Ex_nodal_flag), dm, 3, nge));
pml_E_fp[1].reset(new MultiFab(amrex::convert(ba,WarpX::Ey_nodal_flag), dm, 3, nge));
@@ -370,15 +381,26 @@ PML::PML (const BoxArray& grid_ba, const DistributionMapping& grid_dm,
sigba_fp.reset(new MultiSigmaBox(ba, dm, grid_ba, geom->CellSize(), ncell, delta));
+#ifdef WARPX_USE_PSATD
+ const bool in_pml = true; // Tells spectral solver to use split-PML equations
+ const RealVect dx{AMREX_D_DECL(geom->CellSize(0), geom->CellSize(1), geom->CellSize(2))};
+ // Get the cell-centered box, with guard cells
+ BoxArray realspace_ba = ba; // Copy box
+ realspace_ba.enclosedCells().grow(nge); // cell-centered + guard cells
+ spectral_solver_fp.reset( new SpectralSolver( realspace_ba, dm,
+ nox_fft, noy_fft, noz_fft, do_nodal, dx, dt, in_pml ) );
+#endif
+
if (cgeom)
{
-
- nge = 1;
- ngb = 1;
+#ifndef WARPX_USE_PSATD
+ nge = IntVect(AMREX_D_DECL(1, 1, 1));
+ ngb = IntVect(AMREX_D_DECL(1, 1, 1));
+#endif
BoxArray grid_cba = grid_ba;
grid_cba.coarsen(ref_ratio);
- const BoxArray& cba = MakeBoxArray(*cgeom, grid_cba, ncell);
+ const BoxArray& cba = MakeBoxArray(*cgeom, grid_cba, ncell, do_pml_Lo, do_pml_Hi);
DistributionMapping cdm{cba};
@@ -403,17 +425,32 @@ PML::PML (const BoxArray& grid_ba, const DistributionMapping& grid_dm,
}
sigba_cp.reset(new MultiSigmaBox(cba, cdm, grid_cba, cgeom->CellSize(), ncell, delta));
- }
+#ifdef WARPX_USE_PSATD
+ const bool in_pml = true; // Tells spectral solver to use split-PML equations
+ const RealVect cdx{AMREX_D_DECL(cgeom->CellSize(0), cgeom->CellSize(1), cgeom->CellSize(2))};
+ // Get the cell-centered box, with guard cells
+ BoxArray realspace_cba = cba; // Copy box
+ realspace_cba.enclosedCells().grow(nge); // cell-centered + guard cells
+ spectral_solver_cp.reset( new SpectralSolver( realspace_cba, cdm,
+ nox_fft, noy_fft, noz_fft, do_nodal, cdx, dt, in_pml ) );
+#endif
+ }
}
BoxArray
-PML::MakeBoxArray (const amrex::Geometry& geom, const amrex::BoxArray& grid_ba, int ncell)
+PML::MakeBoxArray (const amrex::Geometry& geom, const amrex::BoxArray& grid_ba, int ncell,
+ const amrex::IntVect do_pml_Lo, const amrex::IntVect do_pml_Hi)
{
Box domain = geom.Domain();
for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
if ( ! geom.isPeriodic(idim) ) {
- domain.grow(idim, ncell);
+ if (do_pml_Lo[idim]){
+ domain.growLo(idim, ncell);
+ }
+ if (do_pml_Hi[idim]){
+ domain.growHi(idim, ncell);
+ }
}
}
@@ -753,3 +790,57 @@ PML::Restart (const std::string& dir)
VisMF::Read(*pml_B_cp[2], dir+"_Bz_cp");
}
}
+
+#ifdef WARPX_USE_PSATD
+void
+PML::PushPSATD () {
+
+ // Update the fields on the fine and coarse patch
+ PushPMLPSATDSinglePatch( *spectral_solver_fp, pml_E_fp, pml_B_fp );
+ if (spectral_solver_cp) {
+ PushPMLPSATDSinglePatch( *spectral_solver_cp, pml_E_cp, pml_B_cp );
+ }
+}
+
+void
+PushPMLPSATDSinglePatch (
+ SpectralSolver& solver,
+ std::array<std::unique_ptr<amrex::MultiFab>,3>& pml_E,
+ std::array<std::unique_ptr<amrex::MultiFab>,3>& pml_B ) {
+
+ using Idx = SpectralPMLIndex;
+
+ // Perform forward Fourier transform
+ // Note: the correspondance between the spectral PML index
+ // (Exy, Ezx, etc.) and the component (0 or 1) of the
+ // MultiFabs (e.g. pml_E) is dictated by the
+ // function that damps the PML
+ solver.ForwardTransform(*pml_E[0], Idx::Exy, 0);
+ solver.ForwardTransform(*pml_E[0], Idx::Exz, 1);
+ solver.ForwardTransform(*pml_E[1], Idx::Eyz, 0);
+ solver.ForwardTransform(*pml_E[1], Idx::Eyx, 1);
+ solver.ForwardTransform(*pml_E[2], Idx::Ezx, 0);
+ solver.ForwardTransform(*pml_E[2], Idx::Ezy, 1);
+ solver.ForwardTransform(*pml_B[0], Idx::Bxy, 0);
+ solver.ForwardTransform(*pml_B[0], Idx::Bxz, 1);
+ solver.ForwardTransform(*pml_B[1], Idx::Byz, 0);
+ solver.ForwardTransform(*pml_B[1], Idx::Byx, 1);
+ solver.ForwardTransform(*pml_B[2], Idx::Bzx, 0);
+ solver.ForwardTransform(*pml_B[2], Idx::Bzy, 1);
+ // Advance fields in spectral space
+ solver.pushSpectralFields();
+ // Perform backward Fourier Transform
+ solver.BackwardTransform(*pml_E[0], Idx::Exy, 0);
+ solver.BackwardTransform(*pml_E[0], Idx::Exz, 1);
+ solver.BackwardTransform(*pml_E[1], Idx::Eyz, 0);
+ solver.BackwardTransform(*pml_E[1], Idx::Eyx, 1);
+ solver.BackwardTransform(*pml_E[2], Idx::Ezx, 0);
+ solver.BackwardTransform(*pml_E[2], Idx::Ezy, 1);
+ solver.BackwardTransform(*pml_B[0], Idx::Bxy, 0);
+ solver.BackwardTransform(*pml_B[0], Idx::Bxz, 1);
+ solver.BackwardTransform(*pml_B[1], Idx::Byz, 0);
+ solver.BackwardTransform(*pml_B[1], Idx::Byx, 1);
+ solver.BackwardTransform(*pml_B[2], Idx::Bzx, 0);
+ solver.BackwardTransform(*pml_B[2], Idx::Bzy, 1);
+}
+#endif
diff --git a/Source/Diagnostics/ParticleIO.cpp b/Source/Diagnostics/ParticleIO.cpp
index f2a543ed5..f159e5302 100644
--- a/Source/Diagnostics/ParticleIO.cpp
+++ b/Source/Diagnostics/ParticleIO.cpp
@@ -98,7 +98,7 @@ MultiParticleContainer::WritePlotFile (const std::string& dir) const
real_names.push_back("By");
real_names.push_back("Bz");
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
real_names.push_back("theta");
#endif
diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp
index 32a4747db..16b5905d1 100644
--- a/Source/Evolve/WarpXEvolveEM.cpp
+++ b/Source/Evolve/WarpXEvolveEM.cpp
@@ -299,6 +299,7 @@ WarpX::OneStep_nosub (Real cur_time)
// (And update guard cells immediately afterwards)
#ifdef WARPX_USE_PSATD
PushPSATD(dt[0]);
+ if (do_pml) DampPML();
FillBoundaryE();
FillBoundaryB();
#else
@@ -481,6 +482,19 @@ WarpX::PushParticlesandDepose (int lev, Real cur_time)
Efield_cax[lev][0].get(), Efield_cax[lev][1].get(), Efield_cax[lev][2].get(),
Bfield_cax[lev][0].get(), Bfield_cax[lev][1].get(), Bfield_cax[lev][2].get(),
cur_time, dt[lev]);
+#ifdef WARPX_DIM_RZ
+ // This is called after all particles have deposited their current and charge.
+ ApplyInverseVolumeScalingToCurrentDensity(current_fp[lev][0].get(), current_fp[lev][1].get(), current_fp[lev][2].get(), lev);
+ if (current_buf[lev][0].get()) {
+ ApplyInverseVolumeScalingToCurrentDensity(current_buf[lev][0].get(), current_buf[lev][1].get(), current_buf[lev][2].get(), lev-1);
+ }
+ if (rho_fp[lev].get()) {
+ ApplyInverseVolumeScalingToChargeDensity(rho_fp[lev].get(), lev);
+ if (charge_buf[lev].get()) {
+ ApplyInverseVolumeScalingToChargeDensity(charge_buf[lev].get(), lev-1);
+ }
+ }
+#endif
}
void
@@ -491,7 +505,7 @@ WarpX::ComputeDt ()
if (maxwell_fdtd_solver_id == 0) {
// CFL time step Yee solver
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
// Derived semi-analytically by R. Lehe
deltat = cfl * 1./( std::sqrt((1+0.2105)/(dx[0]*dx[0]) + 1./(dx[1]*dx[1])) * PhysConst::c );
#else
@@ -536,10 +550,7 @@ WarpX::computeMaxStepBoostAccelerator(amrex::Geometry a_geom){
WarpX::moving_window_dir == AMREX_SPACEDIM-1,
"Can use zmax_plasma_to_compute_max_step only if " +
"moving window along z. TODO: all directions.");
- AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
- maxLevel() == 0,
- "Can use zmax_plasma_to_compute_max_step only if " +
- "max level = 0.");
+
AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
(WarpX::boost_direction[0]-0)*(WarpX::boost_direction[0]-0) +
(WarpX::boost_direction[1]-0)*(WarpX::boost_direction[1]-0) +
@@ -560,7 +571,12 @@ WarpX::computeMaxStepBoostAccelerator(amrex::Geometry a_geom){
const Real interaction_time_boost = (len_plasma_boost-zmin_domain_boost)/
(moving_window_v-v_plasma_boost);
// Divide by dt, and update value of max_step.
- const int computed_max_step = interaction_time_boost/dt[0];
+ int computed_max_step;
+ if (do_subcycling){
+ computed_max_step = interaction_time_boost/dt[0];
+ } else {
+ computed_max_step = interaction_time_boost/dt[maxLevel()];
+ }
max_step = computed_max_step;
Print()<<"max_step computed in computeMaxStepBoostAccelerator: "
<<computed_max_step<<std::endl;
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/Make.package b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/Make.package
index c62c21f44..ee8376865 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/Make.package
+++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/Make.package
@@ -1,6 +1,8 @@
CEXE_headers += SpectralBaseAlgorithm.H
CEXE_headers += PsatdAlgorithm.H
CEXE_sources += PsatdAlgorithm.cpp
+CEXE_headers += PMLPsatdAlgorithm.H
+CEXE_sources += PMLPsatdAlgorithm.cpp
INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/FieldSolver/SpectralSolver/SpectralAlgorithms
VPATH_LOCATIONS += $(WARPX_HOME)/Source/FieldSolver/SpectralSolver/SpectralAlgorithms
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.H b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.H
new file mode 100644
index 000000000..a2511b6b7
--- /dev/null
+++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.H
@@ -0,0 +1,34 @@
+#ifndef WARPX_PML_PSATD_ALGORITHM_H_
+#define WARPX_PML_PSATD_ALGORITHM_H_
+
+#include <SpectralBaseAlgorithm.H>
+
+/* \brief Class that updates the field in spectral space
+ * and stores the coefficients of the corresponding update equation.
+ */
+class PMLPsatdAlgorithm : public SpectralBaseAlgorithm
+{
+ public:
+ PMLPsatdAlgorithm(const SpectralKSpace& spectral_kspace,
+ const amrex::DistributionMapping& dm,
+ const int norder_x, const int norder_y,
+ const int norder_z, const bool nodal,
+ const amrex::Real dt);
+
+ void InitializeSpectralCoefficients(
+ const SpectralKSpace& spectral_kspace,
+ const amrex::DistributionMapping& dm,
+ const amrex::Real dt);
+
+ // Redefine functions from base class
+ virtual void pushSpectralFields(SpectralFieldData& f) const override final;
+ virtual int getRequiredNumberOfFields() const override final {
+ return SpectralPMLIndex::n_fields;
+ }
+
+ private:
+ SpectralCoefficients C_coef, S_ck_coef;
+
+};
+
+#endif // WARPX_PML_PSATD_ALGORITHM_H_
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.cpp b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.cpp
new file mode 100644
index 000000000..d76259d4c
--- /dev/null
+++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.cpp
@@ -0,0 +1,146 @@
+#include <PMLPsatdAlgorithm.H>
+#include <WarpXConst.H>
+#include <cmath>
+
+using namespace amrex;
+
+/* \brief Initialize coefficients for the update equation */
+PMLPsatdAlgorithm::PMLPsatdAlgorithm(
+ const SpectralKSpace& spectral_kspace,
+ const DistributionMapping& dm,
+ const int norder_x, const int norder_y,
+ const int norder_z, const bool nodal, const Real dt)
+ // Initialize members of base class
+ : SpectralBaseAlgorithm( spectral_kspace, dm,
+ norder_x, norder_y, norder_z, nodal )
+{
+ const BoxArray& ba = spectral_kspace.spectralspace_ba;
+
+ // Allocate the arrays of coefficients
+ C_coef = SpectralCoefficients(ba, dm, 1, 0);
+ S_ck_coef = SpectralCoefficients(ba, dm, 1, 0);
+
+ InitializeSpectralCoefficients(spectral_kspace, dm, dt);
+}
+
+/* Advance the E and B field in spectral space (stored in `f`)
+ * over one time step */
+void
+PMLPsatdAlgorithm::pushSpectralFields(SpectralFieldData& f) const{
+
+ // Loop over boxes
+ for (MFIter mfi(f.fields); mfi.isValid(); ++mfi){
+
+ const Box& bx = f.fields[mfi].box();
+
+ // Extract arrays for the fields to be updated
+ Array4<Complex> fields = f.fields[mfi].array();
+ // Extract arrays for the coefficients
+ Array4<const Real> C_arr = C_coef[mfi].array();
+ Array4<const Real> S_ck_arr = S_ck_coef[mfi].array();
+ // Extract pointers for the k vectors
+ const Real* modified_kx_arr = modified_kx_vec[mfi].dataPtr();
+#if (AMREX_SPACEDIM==3)
+ const Real* modified_ky_arr = modified_ky_vec[mfi].dataPtr();
+#endif
+ const Real* modified_kz_arr = modified_kz_vec[mfi].dataPtr();
+
+ // Loop over indices within one box
+ ParallelFor(bx,
+ [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
+ {
+ // Record old values of the fields to be updated
+ using Idx = SpectralPMLIndex;
+ const Complex Ex_old = fields(i,j,k,Idx::Exy) \
+ + fields(i,j,k,Idx::Exz);
+ const Complex Ey_old = fields(i,j,k,Idx::Eyx) \
+ + fields(i,j,k,Idx::Eyz);
+ const Complex Ez_old = fields(i,j,k,Idx::Ezx) \
+ + fields(i,j,k,Idx::Ezy);
+ const Complex Bx_old = fields(i,j,k,Idx::Bxy) \
+ + fields(i,j,k,Idx::Bxz);
+ const Complex By_old = fields(i,j,k,Idx::Byx) \
+ + fields(i,j,k,Idx::Byz);
+ const Complex Bz_old = fields(i,j,k,Idx::Bzx) \
+ + fields(i,j,k,Idx::Bzy);
+ // k vector values, and coefficients
+ const Real kx = modified_kx_arr[i];
+#if (AMREX_SPACEDIM==3)
+ const Real ky = modified_ky_arr[j];
+ const Real kz = modified_kz_arr[k];
+#else
+ constexpr Real ky = 0;
+ const Real kz = modified_kz_arr[j];
+#endif
+ constexpr Real c2 = PhysConst::c*PhysConst::c;
+ const Complex I = Complex{0,1};
+ const Real C = C_arr(i,j,k);
+ const Real S_ck = S_ck_arr(i,j,k);
+
+ // Update E
+ fields(i,j,k,Idx::Exy) = C*fields(i,j,k,Idx::Exy) + S_ck*c2*I*ky*Bz_old;
+ fields(i,j,k,Idx::Exz) = C*fields(i,j,k,Idx::Exz) - S_ck*c2*I*kz*By_old;
+ fields(i,j,k,Idx::Eyz) = C*fields(i,j,k,Idx::Eyz) + S_ck*c2*I*kz*Bx_old;
+ fields(i,j,k,Idx::Eyx) = C*fields(i,j,k,Idx::Eyx) - S_ck*c2*I*kx*Bz_old;
+ fields(i,j,k,Idx::Ezx) = C*fields(i,j,k,Idx::Ezx) + S_ck*c2*I*kx*By_old;
+ fields(i,j,k,Idx::Ezy) = C*fields(i,j,k,Idx::Ezy) - S_ck*c2*I*ky*Bx_old;
+ // Update B
+ fields(i,j,k,Idx::Bxy) = C*fields(i,j,k,Idx::Bxy) - S_ck*I*ky*Ez_old;
+ fields(i,j,k,Idx::Bxz) = C*fields(i,j,k,Idx::Bxz) + S_ck*I*kz*Ey_old;
+ fields(i,j,k,Idx::Byz) = C*fields(i,j,k,Idx::Byz) - S_ck*I*kz*Ex_old;
+ fields(i,j,k,Idx::Byx) = C*fields(i,j,k,Idx::Byx) + S_ck*I*kx*Ez_old;
+ fields(i,j,k,Idx::Bzx) = C*fields(i,j,k,Idx::Bzx) - S_ck*I*kx*Ey_old;
+ fields(i,j,k,Idx::Bzy) = C*fields(i,j,k,Idx::Bzy) + S_ck*I*ky*Ex_old;
+ });
+ }
+};
+
+void PMLPsatdAlgorithm::InitializeSpectralCoefficients (
+ const SpectralKSpace& spectral_kspace,
+ const amrex::DistributionMapping& dm,
+ const amrex::Real dt)
+{
+ const BoxArray& ba = spectral_kspace.spectralspace_ba;
+ // Fill them with the right values:
+ // Loop over boxes and allocate the corresponding coefficients
+ // for each box owned by the local MPI proc
+ for (MFIter mfi(ba, dm); mfi.isValid(); ++mfi){
+
+ const Box& bx = ba[mfi];
+
+ // Extract pointers for the k vectors
+ const Real* modified_kx = modified_kx_vec[mfi].dataPtr();
+#if (AMREX_SPACEDIM==3)
+ const Real* modified_ky = modified_ky_vec[mfi].dataPtr();
+#endif
+ const Real* modified_kz = modified_kz_vec[mfi].dataPtr();
+ // Extract arrays for the coefficients
+ Array4<Real> C = C_coef[mfi].array();
+ Array4<Real> S_ck = S_ck_coef[mfi].array();
+
+ // Loop over indices within one box
+ ParallelFor(bx,
+ [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
+ {
+ // Calculate norm of vector
+ const Real k_norm = std::sqrt(
+ std::pow(modified_kx[i], 2) +
+#if (AMREX_SPACEDIM==3)
+ std::pow(modified_ky[j], 2) +
+ std::pow(modified_kz[k], 2));
+#else
+ std::pow(modified_kz[j], 2));
+#endif
+
+ // Calculate coefficients
+ constexpr Real c = PhysConst::c;
+ if (k_norm != 0){
+ C(i,j,k) = std::cos(c*k_norm*dt);
+ S_ck(i,j,k) = std::sin(c*k_norm*dt)/(c*k_norm);
+ } else { // Handle k_norm = 0, by using the analytical limit
+ C(i,j,k) = 1.;
+ S_ck(i,j,k) = dt;
+ }
+ });
+ }
+};
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H
index 12718e38b..825d04dc2 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H
@@ -13,14 +13,18 @@ class PsatdAlgorithm : public SpectralBaseAlgorithm
PsatdAlgorithm(const SpectralKSpace& spectral_kspace,
const amrex::DistributionMapping& dm,
const int norder_x, const int norder_y,
- const int norder_z, const bool nodal, const amrex::Real dt);
-
+ const int norder_z, const bool nodal,
+ const amrex::Real dt);
+ // Redefine functions from base class
+ virtual void pushSpectralFields(SpectralFieldData& f) const override final;
+ virtual int getRequiredNumberOfFields() const override final {
+ return SpectralFieldIndex::n_fields;
+ }
+
void InitializeSpectralCoefficients(const SpectralKSpace& spectral_kspace,
- const amrex::DistributionMapping& dm,
+ const amrex::DistributionMapping& dm,
const amrex::Real dt);
- void pushSpectralFields(SpectralFieldData& f) const override final;
-
private:
SpectralCoefficients C_coef, S_ck_coef, X1_coef, X2_coef, X3_coef;
};
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithm.H b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithm.H
index 602eb2473..5d5e376c1 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithm.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithm.H
@@ -14,9 +14,9 @@
class SpectralBaseAlgorithm
{
public:
- // Member function that updates the fields in spectral space ;
- // meant to be overridden in subclasses
+ // Virtual member function ; meant to be overridden in subclasses
virtual void pushSpectralFields(SpectralFieldData& f) const = 0;
+ virtual int getRequiredNumberOfFields() const = 0;
// The destructor should also be a virtual function, so that
// a pointer to subclass of `SpectraBaseAlgorithm` actually
// calls the subclass's destructor.
diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.H b/Source/FieldSolver/SpectralSolver/SpectralFieldData.H
index 7954414b8..6a2446981 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.H
@@ -8,18 +8,24 @@
// Declare type for spectral fields
using SpectralField = amrex::FabArray< amrex::BaseFab <Complex> >;
-/* Index for the fields that will be stored in spectral space */
+/* Index for the regular fields, when stored in spectral space */
struct SpectralFieldIndex {
enum { Ex=0, Ey, Ez, Bx, By, Bz, Jx, Jy, Jz, rho_old, rho_new, n_fields };
// n_fields is automatically the total number of fields
};
+/* Index for the PML fields, when stored in spectral space */
+struct SpectralPMLIndex {
+ enum { Exy=0, Exz, Eyx, Eyz, Ezx, Ezy,
+ Bxy, Bxz, Byx, Byz, Bzx, Bzy, n_fields };
+ // n_fields is automatically the total number of fields
+};
+
/* \brief Class that stores the fields in spectral space, and performs the
* Fourier transforms between real space and spectral space
*/
class SpectralFieldData
{
- friend class PsatdAlgorithm;
// Define the FFTplans type, which holds one fft plan per box
// (plans are only initialized for the boxes that are owned by
@@ -32,8 +38,9 @@ class SpectralFieldData
public:
SpectralFieldData( const amrex::BoxArray& realspace_ba,
- const SpectralKSpace& k_space,
- const amrex::DistributionMapping& dm );
+ const SpectralKSpace& k_space,
+ const amrex::DistributionMapping& dm,
+ const int n_field_required );
SpectralFieldData() = default; // Default constructor
SpectralFieldData& operator=(SpectralFieldData&& field_data) = default;
~SpectralFieldData();
@@ -41,10 +48,10 @@ class SpectralFieldData
const int field_index, const int i_comp);
void BackwardTransform( amrex::MultiFab& mf,
const int field_index, const int i_comp);
-
- private:
// `fields` stores fields in spectral space, as multicomponent FabArray
SpectralField fields;
+
+ private:
// tmpRealField and tmpSpectralField store fields
// right before/after the Fourier transform
SpectralField tmpSpectralField; // contains Complexs
diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
index 948baf0a6..8f0853484 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
@@ -5,14 +5,14 @@ using namespace amrex;
/* \brief Initialize fields in spectral space, and FFT plans */
SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba,
const SpectralKSpace& k_space,
- const DistributionMapping& dm )
+ const DistributionMapping& dm,
+ const int n_field_required )
{
const BoxArray& spectralspace_ba = k_space.spectralspace_ba;
// Allocate the arrays that contain the fields in spectral space
// (one component per field)
- fields = SpectralField(spectralspace_ba, dm,
- SpectralFieldIndex::n_fields, 0);
+ fields = SpectralField(spectralspace_ba, dm, n_field_required, 0);
// Allocate temporary arrays - in real space and spectral space
// These arrays will store the data just before/after the FFT
diff --git a/Source/FieldSolver/SpectralSolver/SpectralSolver.H b/Source/FieldSolver/SpectralSolver/SpectralSolver.H
index d4019a9a3..c570b017b 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralSolver.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralSolver.H
@@ -23,7 +23,8 @@ class SpectralSolver
const amrex::DistributionMapping& dm,
const int norder_x, const int norder_y,
const int norder_z, const bool nodal,
- const amrex::RealVect dx, const amrex::Real dt );
+ const amrex::RealVect dx, const amrex::Real dt,
+ const bool pml=false );
/* \brief Transform the component `i_comp` of MultiFab `mf`
* to spectral space, and store the corresponding result internally
diff --git a/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp b/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp
index c21c3cfb1..4b9def013 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp
@@ -1,19 +1,29 @@
#include <SpectralKSpace.H>
#include <SpectralSolver.H>
#include <PsatdAlgorithm.H>
+#include <PMLPsatdAlgorithm.H>
/* \brief Initialize the spectral Maxwell solver
*
* This function selects the spectral algorithm to be used, allocates the
* corresponding coefficients for the discretized field update equation,
* and prepares the structures that store the fields in spectral space.
+ *
+ * \param norder_x Order of accuracy of the spatial derivatives along x
+ * \param norder_y Order of accuracy of the spatial derivatives along y
+ * \param norder_z Order of accuracy of the spatial derivatives along z
+ * \param nodal Whether the solver is applied to a nodal or staggered grid
+ * \param dx Cell size along each dimension
+ * \param dt Time step
+ * \param pml Whether the boxes in which the solver is applied are PML boxes
*/
SpectralSolver::SpectralSolver(
const amrex::BoxArray& realspace_ba,
const amrex::DistributionMapping& dm,
const int norder_x, const int norder_y,
const int norder_z, const bool nodal,
- const amrex::RealVect dx, const amrex::Real dt ) {
+ const amrex::RealVect dx, const amrex::Real dt,
+ const bool pml ) {
// Initialize all structures using the same distribution mapping dm
@@ -24,12 +34,16 @@ SpectralSolver::SpectralSolver(
// - Select the algorithm depending on the input parameters
// Initialize the corresponding coefficients over k space
- // TODO: Add more algorithms + selection depending on input parameters
- // For the moment, this only uses the standard PsatdAlgorithm
- algorithm = std::unique_ptr<PsatdAlgorithm>( new PsatdAlgorithm(
+ if (pml) {
+ algorithm = std::unique_ptr<PMLPsatdAlgorithm>( new PMLPsatdAlgorithm(
+ k_space, dm, norder_x, norder_y, norder_z, nodal, dt ) );
+ } else {
+ algorithm = std::unique_ptr<PsatdAlgorithm>( new PsatdAlgorithm(
k_space, dm, norder_x, norder_y, norder_z, nodal, dt ) );
+ }
// - Initialize arrays for fields in spectral space + FFT plans
- field_data = SpectralFieldData( realspace_ba, k_space, dm );
+ field_data = SpectralFieldData( realspace_ba, k_space, dm,
+ algorithm->getRequiredNumberOfFields() );
};
diff --git a/Source/FieldSolver/WarpXPushFieldsEM.cpp b/Source/FieldSolver/WarpXPushFieldsEM.cpp
index 4fce4717b..1df05bc0f 100644
--- a/Source/FieldSolver/WarpXPushFieldsEM.cpp
+++ b/Source/FieldSolver/WarpXPushFieldsEM.cpp
@@ -18,6 +18,40 @@
using namespace amrex;
#ifdef WARPX_USE_PSATD
+namespace {
+ void
+ PushPSATDSinglePatch (
+ SpectralSolver& solver,
+ std::array<std::unique_ptr<amrex::MultiFab>,3>& Efield,
+ std::array<std::unique_ptr<amrex::MultiFab>,3>& Bfield,
+ std::array<std::unique_ptr<amrex::MultiFab>,3>& current,
+ std::unique_ptr<amrex::MultiFab>& rho ) {
+
+ using Idx = SpectralFieldIndex;
+
+ // Perform forward Fourier transform
+ solver.ForwardTransform(*Efield[0], Idx::Ex);
+ solver.ForwardTransform(*Efield[1], Idx::Ey);
+ solver.ForwardTransform(*Efield[2], Idx::Ez);
+ solver.ForwardTransform(*Bfield[0], Idx::Bx);
+ solver.ForwardTransform(*Bfield[1], Idx::By);
+ solver.ForwardTransform(*Bfield[2], Idx::Bz);
+ solver.ForwardTransform(*current[0], Idx::Jx);
+ solver.ForwardTransform(*current[1], Idx::Jy);
+ solver.ForwardTransform(*current[2], Idx::Jz);
+ solver.ForwardTransform(*rho, Idx::rho_old, 0);
+ solver.ForwardTransform(*rho, Idx::rho_new, 1);
+ // Advance fields in spectral space
+ solver.pushSpectralFields();
+ // Perform backward Fourier Transform
+ solver.BackwardTransform(*Efield[0], Idx::Ex);
+ solver.BackwardTransform(*Efield[1], Idx::Ey);
+ solver.BackwardTransform(*Efield[2], Idx::Ez);
+ solver.BackwardTransform(*Bfield[0], Idx::Bx);
+ solver.BackwardTransform(*Bfield[1], Idx::By);
+ solver.BackwardTransform(*Bfield[2], Idx::Bz);
+ }
+}
void
WarpX::PushPSATD (amrex::Real a_dt)
@@ -31,38 +65,25 @@ WarpX::PushPSATD (amrex::Real a_dt)
} else {
PushPSATD_localFFT(lev, a_dt);
}
+
+ // Evolve the fields in the PML boxes
+ if (do_pml && pml[lev]->ok()) {
+ pml[lev]->PushPSATD();
+ }
}
}
-void WarpX::PushPSATD_localFFT (int lev, amrex::Real /* dt */)
+void
+WarpX::PushPSATD_localFFT (int lev, amrex::Real /* dt */)
{
- auto& solver = *spectral_solver_fp[lev];
-
- // Perform forward Fourier transform
- solver.ForwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex);
- solver.ForwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey);
- solver.ForwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez);
- solver.ForwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx);
- solver.ForwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By);
- solver.ForwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz);
- solver.ForwardTransform(*current_fp[lev][0], SpectralFieldIndex::Jx);
- solver.ForwardTransform(*current_fp[lev][1], SpectralFieldIndex::Jy);
- solver.ForwardTransform(*current_fp[lev][2], SpectralFieldIndex::Jz);
- solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_old, 0);
- solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_new, 1);
-
- // Advance fields in spectral space
- solver.pushSpectralFields();
-
- // Perform backward Fourier Transform
- solver.BackwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex);
- solver.BackwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey);
- solver.BackwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez);
- solver.BackwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx);
- solver.BackwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By);
- solver.BackwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz);
+ // Update the fields on the fine and coarse patch
+ PushPSATDSinglePatch( *spectral_solver_fp[lev],
+ Efield_fp[lev], Bfield_fp[lev], current_fp[lev], rho_fp[lev] );
+ if (spectral_solver_cp[lev]) {
+ PushPSATDSinglePatch( *spectral_solver_cp[lev],
+ Efield_cp[lev], Bfield_cp[lev], current_cp[lev], rho_cp[lev] );
+ }
}
-
#endif
void
@@ -560,3 +581,143 @@ WarpX::EvolveF (int lev, PatchType patch_type, Real a_dt, DtType a_dt_type)
}
}
+#ifdef WARPX_DIM_RZ
+// This scales the current by the inverse volume and wraps around the depostion at negative radius.
+// It is faster to apply this on the grid than to do it particle by particle.
+// It is put here since there isn't another nice place for it.
+void
+WarpX::ApplyInverseVolumeScalingToCurrentDensity (MultiFab* Jx, MultiFab* Jy, MultiFab* Jz, int lev)
+{
+ const long ngJ = Jx->nGrow();
+ const std::array<Real,3>& dx = WarpX::CellSize(lev);
+ const Real dr = dx[0];
+
+ Box tilebox;
+
+ for ( MFIter mfi(*Jx, TilingIfNotGPU()); mfi.isValid(); ++mfi )
+ {
+
+ Array4<Real> const& Jr_arr = Jx->array(mfi);
+ Array4<Real> const& Jt_arr = Jy->array(mfi);
+ Array4<Real> const& Jz_arr = Jz->array(mfi);
+
+ tilebox = mfi.tilebox();
+ Box tbr = convert(tilebox, WarpX::jx_nodal_flag);
+ Box tbt = convert(tilebox, WarpX::jy_nodal_flag);
+ Box tbz = convert(tilebox, WarpX::jz_nodal_flag);
+
+ // Lower corner of tile box physical domain
+ // Note that this is done before the tilebox.grow so that
+ // these do not include the guard cells.
+ const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, lev);
+ const Dim3 lo = lbound(tilebox);
+ const Real rmin = xyzmin[0];
+ const int irmin = lo.x;
+
+ // Rescale current in r-z mode since the inverse volume factor was not
+ // included in the current deposition.
+ amrex::ParallelFor(tbr,
+ [=] AMREX_GPU_DEVICE (int i, int j, int k)
+ {
+ // Wrap the current density deposited in the guard cells around
+ // to the cells above the axis.
+ // Note that Jr(i==0) is at 1/2 dr.
+ if (rmin == 0. && 0 <= i && i < ngJ) {
+ Jr_arr(i,j,0) -= Jr_arr(-1-i,j,0);
+ }
+ // Apply the inverse volume scaling
+ // Since Jr is not cell centered in r, no need for distinction
+ // between on axis and off-axis factors
+ const amrex::Real r = std::abs(rmin + (i - irmin + 0.5)*dr);
+ Jr_arr(i,j,0) /= (2.*MathConst::pi*r);
+ });
+ amrex::ParallelFor(tbt,
+ [=] AMREX_GPU_DEVICE (int i, int j, int k)
+ {
+ // Wrap the current density deposited in the guard cells around
+ // to the cells above the axis.
+ // Jt is located on the boundary
+ if (rmin == 0. && 0 < i && i <= ngJ) {
+ Jt_arr(i,j,0) += Jt_arr(-i,j,0);
+ }
+
+ // Apply the inverse volume scaling
+ // Jt is forced to zero on axis.
+ const amrex::Real r = std::abs(rmin + (i - irmin)*dr);
+ if (r == 0.) {
+ Jt_arr(i,j,0) = 0.;
+ } else {
+ Jt_arr(i,j,0) /= (2.*MathConst::pi*r);
+ }
+ });
+ amrex::ParallelFor(tbz,
+ [=] AMREX_GPU_DEVICE (int i, int j, int k)
+ {
+ // Wrap the current density deposited in the guard cells around
+ // to the cells above the axis.
+ // Jz is located on the boundary
+ if (rmin == 0. && 0 < i && i <= ngJ) {
+ Jz_arr(i,j,0) += Jz_arr(-i,j,0);
+ }
+
+ // Apply the inverse volume scaling
+ const amrex::Real r = std::abs(rmin + (i - irmin)*dr);
+ if (r == 0.) {
+ // Verboncoeur JCP 164, 421-427 (2001) : corrected volume on axis
+ Jz_arr(i,j,0) /= (MathConst::pi*dr/3.);
+ } else {
+ Jz_arr(i,j,0) /= (2.*MathConst::pi*r);
+ }
+ });
+ }
+}
+
+void
+WarpX::ApplyInverseVolumeScalingToChargeDensity (MultiFab* Rho, int lev)
+{
+ const long ngRho = Rho->nGrow();
+ const std::array<Real,3>& dx = WarpX::CellSize(lev);
+ const Real dr = dx[0];
+
+ Box tilebox;
+
+ for ( MFIter mfi(*Rho, TilingIfNotGPU()); mfi.isValid(); ++mfi )
+ {
+
+ Array4<Real> const& Rho_arr = Rho->array(mfi);
+
+ tilebox = mfi.tilebox();
+ Box tb = convert(tilebox, IntVect::TheUnitVector());
+
+ // Lower corner of tile box physical domain
+ // Note that this is done before the tilebox.grow so that
+ // these do not include the guard cells.
+ const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, lev);
+ const Dim3 lo = lbound(tilebox);
+ const Real rmin = xyzmin[0];
+ const int irmin = lo.x;
+
+ // Rescale charge in r-z mode since the inverse volume factor was not
+ // included in the charge deposition.
+ amrex::ParallelFor(tb, Rho->nComp(),
+ [=] AMREX_GPU_DEVICE (int i, int j, int k, int icomp)
+ {
+ // Wrap the charge density deposited in the guard cells around
+ // to the cells above the axis.
+ // Rho is located on the boundary
+ if (rmin == 0. && 0 < i && i <= ngRho) {
+ Rho_arr(i,j,0,icomp) += Rho_arr(-i,j,0,icomp);
+ }
+
+ // Apply the inverse volume scaling
+ const amrex::Real r = std::abs(rmin + (i - irmin)*dr);
+ if (r == 0.) {
+ // Verboncoeur JCP 164, 421-427 (2001) : corrected volume on axis
+ Rho_arr(i,j,0,icomp) /= (MathConst::pi*dr/3.);
+ } else {
+ Rho_arr(i,j,0,icomp) /= (2.*MathConst::pi*r);
+ }
+ });
+ }
+}
+#endif
diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H
index 0440148eb..aac23f781 100644
--- a/Source/FortranInterface/WarpX_f.H
+++ b/Source/FortranInterface/WarpX_f.H
@@ -62,7 +62,7 @@
#define WRPX_PUSH_LEAPFROG warpx_push_leapfrog_2d
#define WRPX_PUSH_LEAPFROG_POSITIONS warpx_push_leapfrog_positions_2d
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
#define WRPX_COMPUTE_DIVE warpx_compute_dive_rz
#else
#define WRPX_COMPUTE_DIVE warpx_compute_dive_2d
@@ -75,22 +75,6 @@ extern "C"
{
#endif
- // Charge deposition
- void warpx_charge_deposition(amrex::Real* rho,
- const long* np, const amrex::Real* xp, const amrex::Real* yp, const amrex::Real* zp, const amrex::Real* w,
- const amrex::Real* q, const amrex::Real* xmin, const amrex::Real* ymin, const amrex::Real* zmin,
- const amrex::Real* dx, const amrex::Real* dy, const amrex::Real* dz,
- const long* nx, const long* ny, const long* nz,
- const long* nxguard, const long* nyguard, const long* nzguard,
- const long* nox, const long* noy,const long* noz,
- const long* lvect, const long* charge_depo_algo);
-
- // Charge deposition finalize for RZ
- void warpx_charge_deposition_rz_volume_scaling(
- amrex::Real* rho, const long* rho_ng, const int* rho_ntot,
- const amrex::Real* rmin,
- const amrex::Real* dr);
-
// Current deposition
void warpx_current_deposition(
amrex::Real* jx, const long* jx_ng, const int* jx_ntot,
@@ -106,34 +90,6 @@ extern "C"
const long* nox, const long* noy,const long* noz,
const int* l_nodal, const long* lvect, const long* current_depo_algo);
- // Current deposition finalize for RZ
- void warpx_current_deposition_rz_volume_scaling(
- amrex::Real* jx, const long* jx_ng, const int* jx_ntot,
- amrex::Real* jy, const long* jy_ng, const int* jy_ntot,
- amrex::Real* jz, const long* jz_ng, const int* jz_ntot,
- const amrex::Real* rmin,
- const amrex::Real* dr);
-
- // Field gathering
-
- void warpx_geteb_energy_conserving(const long* np,
- const amrex::Real* xp, const amrex::Real* yp, const amrex::Real* zp,
- amrex::Real* exp, amrex::Real* eyp, amrex::Real* ezp,
- amrex::Real* bxp, amrex::Real* byp, amrex::Real* bzp,
- const int* ixyzmin,
- const amrex::Real* xmin, const amrex::Real* ymin, const amrex::Real* zmin,
- const amrex::Real* dx, const amrex::Real* dy, const amrex::Real* dz,
- const long* nox, const long* noy, const long* noz,
- const amrex::Real* exg, const int* exg_lo, const int* exg_hi,
- const amrex::Real* eyg, const int* eyg_lo, const int* eyg_hi,
- const amrex::Real* ezg, const int* ezg_lo, const int* ezg_hi,
- const amrex::Real* bxg, const int* bxg_lo, const int* bxg_hi,
- const amrex::Real* byg, const int* byg_lo, const int* byg_hi,
- const amrex::Real* bzg, const int* bzg_lo, const int* bzg_hi,
- const int* ll4symtry, const int* l_lower_order_in_v,
- const int* l_nodal, const long* lvect,
- const long* field_gathe_algo);
-
// Particle pusher (velocity and position)
void warpx_particle_pusher(const long* np,
@@ -342,7 +298,7 @@ extern "C"
const BL_FORT_FAB_ARG_ANYD(ey),
const BL_FORT_FAB_ARG_ANYD(ez),
const amrex::Real* dx
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
,const amrex::Real* rmin
#endif
);
diff --git a/Source/FortranInterface/WarpX_picsar.F90 b/Source/FortranInterface/WarpX_picsar.F90
index dc47245dd..34084d753 100644
--- a/Source/FortranInterface/WarpX_picsar.F90
+++ b/Source/FortranInterface/WarpX_picsar.F90
@@ -1,20 +1,15 @@
#if (AMREX_SPACEDIM == 3)
-#define WRPX_PXR_GETEB_ENERGY_CONSERVING geteb3d_energy_conserving_generic
#define WRPX_PXR_CURRENT_DEPOSITION depose_jxjyjz_generic
#elif (AMREX_SPACEDIM == 2)
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
-#define WRPX_PXR_GETEB_ENERGY_CONSERVING geteb2drz_energy_conserving_generic
#define WRPX_PXR_CURRENT_DEPOSITION depose_jrjtjz_generic_rz
-#define WRPX_PXR_RZ_VOLUME_SCALING_RHO apply_rz_volume_scaling_rho
-#define WRPX_PXR_RZ_VOLUME_SCALING_J apply_rz_volume_scaling_j
#else
-#define WRPX_PXR_GETEB_ENERGY_CONSERVING geteb2dxz_energy_conserving_generic
#define WRPX_PXR_CURRENT_DEPOSITION depose_jxjyjz_generic_2d
#endif
@@ -56,227 +51,6 @@ contains
! _________________________________________________________________
!>
!> @brief
- !> Main subroutine for the field gathering process
- !>
- !> @param[in] np number of particles
- !> @param[in] xp,yp,zp particle position arrays
- !> @param[in] ex,ey,ez particle electric fields in each direction
- !> @param[in] bx,by,bz particle magnetic fields in each direction
- !> @param[in] ixyzmin tile grid minimum index
- !> @param[in] xmin,ymin,zmin tile grid minimum position
- !> @param[in] dx,dy,dz space discretization steps
- !> @param[in] xyzmin grid minimum position
- !> @param[in] dxyz space discretization steps
- !> @param[in] nox,noy,noz interpolation order
- !> @param[in] exg,eyg,ezg electric field grid arrays
- !> @param[in] bxg,byg,bzg electric field grid arrays
- !> @param[in] lvect vector length
- !>
- subroutine warpx_geteb_energy_conserving(np,xp,yp,zp, &
- ex,ey,ez,bx,by,bz,ixyzmin,xmin,ymin,zmin,dx,dy,dz,nox,noy,noz, &
- exg,exg_lo,exg_hi,eyg,eyg_lo,eyg_hi,ezg,ezg_lo,ezg_hi, &
- bxg,bxg_lo,bxg_hi,byg,byg_lo,byg_hi,bzg,bzg_lo,bzg_hi, &
- ll4symtry,l_lower_order_in_v, l_nodal,&
- lvect,field_gathe_algo) &
- bind(C, name="warpx_geteb_energy_conserving")
-
- integer, intent(in) :: exg_lo(AMREX_SPACEDIM), eyg_lo(AMREX_SPACEDIM), ezg_lo(AMREX_SPACEDIM), &
- bxg_lo(AMREX_SPACEDIM), byg_lo(AMREX_SPACEDIM), bzg_lo(AMREX_SPACEDIM)
- integer, intent(in) :: exg_hi(AMREX_SPACEDIM), eyg_hi(AMREX_SPACEDIM), ezg_hi(AMREX_SPACEDIM), &
- bxg_hi(AMREX_SPACEDIM), byg_hi(AMREX_SPACEDIM), bzg_hi(AMREX_SPACEDIM)
- integer, intent(in) :: ixyzmin(AMREX_SPACEDIM)
- real(amrex_real), intent(in) :: xmin,ymin,zmin,dx,dy,dz
- integer(c_long), intent(in) :: field_gathe_algo
- integer(c_long), intent(in) :: np,nox,noy,noz
- integer(c_int), intent(in) :: ll4symtry,l_lower_order_in_v, l_nodal
- integer(c_long),intent(in) :: lvect
- real(amrex_real), intent(in), dimension(np) :: xp,yp,zp
- real(amrex_real), intent(out), dimension(np) :: ex,ey,ez,bx,by,bz
- real(amrex_real),intent(in):: exg(*), eyg(*), ezg(*), bxg(*), byg(*), bzg(*)
- logical(pxr_logical) :: pxr_ll4symtry, pxr_l_lower_order_in_v, pxr_l_nodal
-
- ! Compute the number of valid cells and guard cells
- integer(c_long) :: exg_nvalid(AMREX_SPACEDIM), eyg_nvalid(AMREX_SPACEDIM), ezg_nvalid(AMREX_SPACEDIM), &
- bxg_nvalid(AMREX_SPACEDIM), byg_nvalid(AMREX_SPACEDIM), bzg_nvalid(AMREX_SPACEDIM), &
- exg_nguards(AMREX_SPACEDIM), eyg_nguards(AMREX_SPACEDIM), ezg_nguards(AMREX_SPACEDIM), &
- bxg_nguards(AMREX_SPACEDIM), byg_nguards(AMREX_SPACEDIM), bzg_nguards(AMREX_SPACEDIM)
-
- pxr_ll4symtry = ll4symtry .eq. 1
- pxr_l_lower_order_in_v = l_lower_order_in_v .eq. 1
- pxr_l_nodal = l_nodal .eq. 1
-
- exg_nguards = ixyzmin - exg_lo
- eyg_nguards = ixyzmin - eyg_lo
- ezg_nguards = ixyzmin - ezg_lo
- bxg_nguards = ixyzmin - bxg_lo
- byg_nguards = ixyzmin - byg_lo
- bzg_nguards = ixyzmin - bzg_lo
- exg_nvalid = exg_lo + exg_hi - 2_c_long*ixyzmin + 1_c_long
- eyg_nvalid = eyg_lo + eyg_hi - 2_c_long*ixyzmin + 1_c_long
- ezg_nvalid = ezg_lo + ezg_hi - 2_c_long*ixyzmin + 1_c_long
- bxg_nvalid = bxg_lo + bxg_hi - 2_c_long*ixyzmin + 1_c_long
- byg_nvalid = byg_lo + byg_hi - 2_c_long*ixyzmin + 1_c_long
- bzg_nvalid = bzg_lo + bzg_hi - 2_c_long*ixyzmin + 1_c_long
-
- CALL WRPX_PXR_GETEB_ENERGY_CONSERVING(np,xp,yp,zp, &
- ex,ey,ez,bx,by,bz,xmin,ymin,zmin,dx,dy,dz,nox,noy,noz, &
- exg,exg_nguards,exg_nvalid,&
- eyg,eyg_nguards,eyg_nvalid,&
- ezg,ezg_nguards,ezg_nvalid,&
- bxg,bxg_nguards,bxg_nvalid,&
- byg,byg_nguards,byg_nvalid,&
- bzg,bzg_nguards,bzg_nvalid,&
- pxr_ll4symtry, pxr_l_lower_order_in_v, pxr_l_nodal, &
- lvect, field_gathe_algo )
-
- end subroutine warpx_geteb_energy_conserving
-
-! _________________________________________________________________
-!>
-!> @brief
-!> Main subroutine for the charge deposition
-!>
-!> @details
-!> This subroutines enable to controle the interpolation order
-!> via the parameters nox,noy,noz and the type of algorithm via
-!> the parameter charge_depo_algo
-!
-!> @param[inout] rho charge array
-!> @param[in] np number of particles
-!> @param[in] xp,yp,zp particle position arrays
-!> @param[in] w particle weight arrays
-!> @param[in] q particle species charge
-!> @param[in] xmin,ymin,zmin tile grid minimum position
-!> @param[in] dx,dy,dz space discretization steps
-!> @param[in] nx,ny,nz number of cells
-!> @param[in] nxguard,nyguard,nzguard number of guard cells
-!> @param[in] nox,noy,noz interpolation order
-!> @param[in] lvect vector length
-!> @param[in] charge_depo_algo algorithm choice for the charge deposition
-!>
-subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
- nxguard,nyguard,nzguard,nox,noy,noz,lvect,charge_depo_algo) &
- bind(C, name="warpx_charge_deposition")
-
- integer(c_long), intent(IN) :: np
- integer(c_long), intent(IN) :: nx,ny,nz
- integer(c_long), intent(IN) :: nxguard,nyguard,nzguard
- integer(c_long), intent(IN) :: nox,noy,noz
- real(amrex_real), intent(IN OUT) :: rho(*)
- real(amrex_real), intent(IN) :: q
- real(amrex_real), intent(IN) :: dx,dy,dz
- real(amrex_real), intent(IN) :: xmin,ymin,zmin
- real(amrex_real), intent(IN), dimension(np) :: xp,yp,zp,w
- integer(c_long), intent(IN) :: lvect
- integer(c_long), intent(IN) :: charge_depo_algo
-
-
- ! Dimension 3
-#if (AMREX_SPACEDIM==3)
-
- SELECT CASE(charge_depo_algo)
-
- ! Scalar classical charge deposition subroutines
- CASE(1)
- IF ((nox.eq.1).and.(noy.eq.1).and.(noz.eq.1)) THEN
-
- CALL depose_rho_scalar_1_1_1(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
- nxguard,nyguard,nzguard,lvect)
-
- ELSE IF ((nox.eq.2).and.(noy.eq.2).and.(noz.eq.2)) THEN
-
- CALL depose_rho_scalar_2_2_2(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
- nxguard,nyguard,nzguard,lvect)
-
- ELSE IF ((nox.eq.3).and.(noy.eq.3).and.(noz.eq.3)) THEN
-
- CALL depose_rho_scalar_3_3_3(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
- nxguard,nyguard,nzguard,lvect)
-
- ELSE
- CALL pxr_depose_rho_n(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
- nxguard,nyguard,nzguard,nox,noy,noz, &
- .TRUE._c_long,.FALSE._c_long)
- ENDIF
-
- ! Optimized subroutines
- CASE DEFAULT
-
- IF ((nox.eq.1).and.(noy.eq.1).and.(noz.eq.1)) THEN
- CALL depose_rho_vecHVv2_1_1_1(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
- nxguard,nyguard,nzguard,lvect)
-
- ELSE IF ((nox.eq.2).and.(noy.eq.2).and.(noz.eq.2)) THEN
- CALL depose_rho_vecHVv2_2_2_2(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
- nxguard,nyguard,nzguard,lvect)
-
- ELSE
- CALL pxr_depose_rho_n(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
- nxguard,nyguard,nzguard,nox,noy,noz, &
- .TRUE._c_long,.FALSE._c_long)
- ENDIF
- END SELECT
-
- ! Dimension 2
-#elif (AMREX_SPACEDIM==2)
-
-#ifdef WARPX_RZ
- logical(pxr_logical) :: l_2drz = .TRUE._c_long
-#else
- logical(pxr_logical) :: l_2drz = .FALSE._c_long
-#endif
-
- CALL pxr_depose_rho_n_2dxz(rho,np,xp,yp,zp,w,q,xmin,zmin,dx,dz,nx,nz,&
- nxguard,nzguard,nox,noz, &
- .TRUE._c_long, .FALSE._c_long, l_2drz, 0_c_long)
-
-#endif
-
- end subroutine warpx_charge_deposition
-
- ! _________________________________________________________________
- !>
- !> @brief
- !> Applies the inverse volume scaling for RZ charge deposition
- !>
- !> @details
- !> The scaling is done for both single mode (FDTD) and
- !> multi mode (spectral) (todo)
- !
- !> @param[inout] rho charge array
- !> @param[in] rmin tile grid minimum radius
- !> @param[in] dr radial space discretization steps
- !> @param[in] nx,ny,nz number of cells
- !> @param[in] nxguard,nyguard,nzguard number of guard cells
- !>
- subroutine warpx_charge_deposition_rz_volume_scaling(rho,rho_ng,rho_ntot,rmin,dr) &
- bind(C, name="warpx_charge_deposition_rz_volume_scaling")
-
- integer, intent(in) :: rho_ntot(AMREX_SPACEDIM)
- integer(c_long), intent(in) :: rho_ng
- real(amrex_real), intent(IN OUT):: rho(*)
- real(amrex_real), intent(IN) :: rmin, dr
-
-#ifdef WARPX_RZ
- integer(c_long) :: type_rz_depose = 1
-#endif
-
- ! Compute the number of valid cells and guard cells
- integer(c_long) :: rho_nvalid(AMREX_SPACEDIM), rho_nguards(AMREX_SPACEDIM)
- rho_nvalid = rho_ntot - 2*rho_ng
- rho_nguards = rho_ng
-
-#ifdef WARPX_RZ
- CALL WRPX_PXR_RZ_VOLUME_SCALING_RHO( &
- rho,rho_nguards,rho_nvalid, &
- rmin,dr,type_rz_depose)
-#endif
-
- end subroutine warpx_charge_deposition_rz_volume_scaling
-
- ! _________________________________________________________________
- !>
- !> @brief
!> Main subroutine for the current deposition
!>
!> @details
@@ -355,165 +129,4 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n
end subroutine warpx_current_deposition
- ! _________________________________________________________________
- !>
- !> @brief
- !> Applies the inverse volume scaling for RZ current deposition
- !>
- !> @details
- !> The scaling is done for single mode only
- !
- !> @param[inout] jx,jy,jz current arrays
- !> @param[in] jx_ntot,jy_ntot,jz_ntot vectors with total number of
- !> cells (including guard cells) along each axis for each current
- !> @param[in] jx_ng,jy_ng,jz_ng vectors with number of guard cells along each
- !> axis for each current
- !> @param[in] rmin tile grid minimum radius
- !> @param[in] dr radial space discretization steps
- !>
- subroutine warpx_current_deposition_rz_volume_scaling( &
- jx,jx_ng,jx_ntot,jy,jy_ng,jy_ntot,jz,jz_ng,jz_ntot, &
- rmin,dr) &
- bind(C, name="warpx_current_deposition_rz_volume_scaling")
-
- integer, intent(in) :: jx_ntot(AMREX_SPACEDIM), jy_ntot(AMREX_SPACEDIM), jz_ntot(AMREX_SPACEDIM)
- integer(c_long), intent(in) :: jx_ng, jy_ng, jz_ng
- real(amrex_real), intent(IN OUT):: jx(*), jy(*), jz(*)
- real(amrex_real), intent(IN) :: rmin, dr
-
-#ifdef WARPX_RZ
- integer(c_long) :: type_rz_depose = 1
-#endif
- ! Compute the number of valid cells and guard cells
- integer(c_long) :: jx_nvalid(AMREX_SPACEDIM), jy_nvalid(AMREX_SPACEDIM), jz_nvalid(AMREX_SPACEDIM), &
- jx_nguards(AMREX_SPACEDIM), jy_nguards(AMREX_SPACEDIM), jz_nguards(AMREX_SPACEDIM)
- jx_nvalid = jx_ntot - 2*jx_ng
- jy_nvalid = jy_ntot - 2*jy_ng
- jz_nvalid = jz_ntot - 2*jz_ng
- jx_nguards = jx_ng
- jy_nguards = jy_ng
- jz_nguards = jz_ng
-
-#ifdef WARPX_RZ
- CALL WRPX_PXR_RZ_VOLUME_SCALING_J( &
- jx,jx_nguards,jx_nvalid, &
- jy,jy_nguards,jy_nvalid, &
- jz,jz_nguards,jz_nvalid, &
- rmin,dr,type_rz_depose)
-#endif
-
- end subroutine warpx_current_deposition_rz_volume_scaling
-
- ! _________________________________________________________________
- !>
- !> @brief
- !> Main subroutine for the particle pusher (velocity and position)
- !>
- !> @param[in] np number of super-particles
- !> @param[in] xp,yp,zp particle position arrays
- !> @param[in] uxp,uyp,uzp normalized momentum in each direction
- !> @param[in] gaminv particle Lorentz factors
- !> @param[in] ex,ey,ez particle electric fields in each direction
- !> @param[in] bx,by,bz particle magnetic fields in each direction
- !> @param[in] q charge
- !> @param[in] m masse
- !> @param[in] dt time step
- !> @param[in] particle_pusher_algo Particle pusher algorithm
- subroutine warpx_particle_pusher(np,xp,yp,zp,uxp,uyp,uzp, &
- gaminv,&
- ex,ey,ez,bx,by,bz,q,m,dt, &
- particle_pusher_algo) &
- bind(C, name="warpx_particle_pusher")
-
- INTEGER(c_long), INTENT(IN) :: np
- REAL(amrex_real),INTENT(INOUT) :: gaminv(np)
- REAL(amrex_real),INTENT(INOUT) :: xp(np),yp(np),zp(np)
- REAL(amrex_real),INTENT(INOUT) :: uxp(np),uyp(np),uzp(np)
- REAL(amrex_real),INTENT(IN) :: ex(np),ey(np),ez(np)
- REAL(amrex_real),INTENT(IN) :: bx(np),by(np),bz(np)
- REAL(amrex_real),INTENT(IN) :: q,m,dt
- INTEGER(c_long), INTENT(IN) :: particle_pusher_algo
-
- SELECT CASE (particle_pusher_algo)
-
- !! Vay pusher -- Full push
- CASE (1_c_long)
- CALL pxr_set_gamma(np,uxp,uyp,uzp,gaminv)
-
- CALL pxr_ebcancelpush3d(np,uxp,uyp,uzp,gaminv, &
- ex,ey,ez, &
- bx,by,bz,q,m,dt,0_c_long)
- CASE DEFAULT
-
- ! Momentum pusher in a single loop
- CALL pxr_boris_push_u_3d(np,uxp,uyp,uzp,&
- gaminv, &
- ex,ey,ez, &
- bx,by,bz, &
- q,m,dt)
-
- END SELECT
-
- !!!! --- push particle species positions a time step
-#if (AMREX_SPACEDIM == 3) || (defined WARPX_RZ)
- CALL pxr_pushxyz(np,xp,yp,zp,uxp,uyp,uzp,gaminv,dt)
-#elif (AMREX_SPACEDIM == 2)
- CALL pxr_pushxz(np,xp,zp,uxp,uzp,gaminv,dt)
-#endif
-
- end subroutine warpx_particle_pusher
-
-
- ! _________________________________________________________________
- !>
- !> @brief
- !> Main subroutine for the particle pusher (velocity)
- !>
- !> @param[in] np number of super-particles
- !> @param[in] xp,yp,zp particle position arrays
- !> @param[in] uxp,uyp,uzp normalized momentum in each direction
- !> @param[in] gaminv particle Lorentz factors
- !> @param[in] ex,ey,ez particle electric fields in each direction
- !> @param[in] bx,by,bz particle magnetic fields in each direction
- !> @param[in] q charge
- !> @param[in] m masse
- !> @param[in] dt time step
- !> @param[in] particle_pusher_algo Particle pusher algorithm
- subroutine warpx_particle_pusher_momenta(np,xp,yp,zp,uxp,uyp,uzp, &
- gaminv,&
- ex,ey,ez,bx,by,bz,q,m,dt, &
- particle_pusher_algo) &
- bind(C, name="warpx_particle_pusher_momenta")
-
- INTEGER(c_long), INTENT(IN) :: np
- REAL(amrex_real),INTENT(INOUT) :: gaminv(np)
- REAL(amrex_real),INTENT(IN) :: xp(np),yp(np),zp(np)
- REAL(amrex_real),INTENT(INOUT) :: uxp(np),uyp(np),uzp(np)
- REAL(amrex_real),INTENT(IN) :: ex(np),ey(np),ez(np)
- REAL(amrex_real),INTENT(IN) :: bx(np),by(np),bz(np)
- REAL(amrex_real),INTENT(IN) :: q,m,dt
- INTEGER(c_long), INTENT(IN) :: particle_pusher_algo
-
- SELECT CASE (particle_pusher_algo)
-
- !! Vay pusher -- Full push
- CASE (1_c_long)
- CALL pxr_set_gamma(np,uxp,uyp,uzp,gaminv)
-
- CALL pxr_ebcancelpush3d(np,uxp,uyp,uzp,gaminv, &
- ex,ey,ez, &
- bx,by,bz,q,m,dt,0_c_long)
- CASE DEFAULT
-
- ! Momentum pusher in a single loop
- CALL pxr_boris_push_u_3d(np,uxp,uyp,uzp,&
- gaminv, &
- ex,ey,ez, &
- bx,by,bz, &
- q,m,dt)
-
- END SELECT
-
- end subroutine warpx_particle_pusher_momenta
-
end module warpx_to_pxr_module
diff --git a/Source/Initialization/CustomDensityProb.H b/Source/Initialization/CustomDensityProb.H
new file mode 100644
index 000000000..b00830e6c
--- /dev/null
+++ b/Source/Initialization/CustomDensityProb.H
@@ -0,0 +1,49 @@
+#ifndef CUSTOM_DENSITY_PROB_H_
+#define CUSTOM_DENSITY_PROB_H_
+
+#include <AMReX_ParmParse.H>
+#include <AMReX_Arena.H>
+#include <AMReX_Gpu.H>
+#include <AMReX_Dim3.H>
+
+// An example of Custom Density Profile
+
+// struct whose getDensity returns density at a given position computed from
+// a custom function, with runtime input parameters.
+struct InjectorDensityCustom
+{
+ InjectorDensityCustom (std::string const& species_name)
+ : p(nullptr)
+ {
+ // Read parameters for custom density profile from file, and
+ // store them in managed memory.
+ amrex::ParmParse pp(species_name);
+ std::vector<amrex::Real> v;
+ pp.getarr("custom_profile_params", v);
+ p = static_cast<amrex::Real*>
+ (amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real)*v.size()));
+ for (int i = 0; i < static_cast<int>(v.size()); ++i) {
+ p[i] = v[i];
+ }
+ }
+
+ // Return density at given position, using user-defined parameters
+ // stored in p.
+ AMREX_GPU_HOST_DEVICE
+ amrex::Real
+ getDensity (amrex::Real, amrex::Real, amrex::Real) const noexcept
+ {
+ return p[0];
+ }
+
+ // Note that we are not allowed to have non-trivial destructor.
+ // So we rely on clear() to free memory.
+ void clear () {
+ amrex::The_Managed_Arena()->free(p);
+ }
+
+private:
+ amrex::Real* p;
+};
+
+#endif
diff --git a/Source/Initialization/CustomDensityProb.cpp b/Source/Initialization/CustomDensityProb.cpp
deleted file mode 100644
index 3efcb13c5..000000000
--- a/Source/Initialization/CustomDensityProb.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-#include <PlasmaInjector.H>
-
-#include <iostream>
-
-using namespace amrex;
-
-///
-/// This "custom" density profile just does constant
-///
-Real CustomDensityProfile::getDensity(Real x, Real y, Real z) const {
- return params[0];
-}
diff --git a/Source/Initialization/CustomMomentumProb.H b/Source/Initialization/CustomMomentumProb.H
new file mode 100644
index 000000000..f8bc29a05
--- /dev/null
+++ b/Source/Initialization/CustomMomentumProb.H
@@ -0,0 +1,30 @@
+#ifndef CUSTOM_MOMENTUM_PROB_H
+#define CUSTOM_MOMENTUM_PROB_H
+
+#include <AMReX_ParmParse.H>
+#include <AMReX_Gpu.H>
+#include <AMReX_Arena.H>
+#include <AMReX_Dim3.H>
+
+// An example of Custom Momentum Profile
+
+// struct whose getDensity returns momentum at a given position computed from
+// a custom function.
+struct InjectorMomentumCustom
+{
+ InjectorMomentumCustom (std::string const& /*a_species_name*/) {}
+
+ // Return momentum at given position (illustration: momentum=0).
+ AMREX_GPU_HOST_DEVICE
+ amrex::XDim3
+ getMomentum (amrex::Real, amrex::Real, amrex::Real) const noexcept
+ {
+ return {0., 0., 0.};
+ }
+
+ // Note that we are not allowed to have non-trivial destructor.
+ // So we rely on clear() to free memory if needed.
+ void clear () { }
+};
+
+#endif
diff --git a/Source/Initialization/CustomMomentumProb.cpp b/Source/Initialization/CustomMomentumProb.cpp
deleted file mode 100644
index fa21252d0..000000000
--- a/Source/Initialization/CustomMomentumProb.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <PlasmaInjector.H>
-
-#include <iostream>
-
-using namespace amrex;
-
-///
-/// This "custom" momentum distribution just does 0 momentum
-///
-void CustomMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) {
- u[0] = 0;
- u[1] = 0;
- u[2] = 0;
-}
diff --git a/Source/Initialization/InjectorDensity.H b/Source/Initialization/InjectorDensity.H
new file mode 100644
index 000000000..b7f5c26eb
--- /dev/null
+++ b/Source/Initialization/InjectorDensity.H
@@ -0,0 +1,202 @@
+#ifndef INJECTOR_DENSITY_H_
+#define INJECTOR_DENSITY_H_
+
+#include <AMReX_Gpu.H>
+#include <AMReX_Dim3.H>
+#include <GpuParser.H>
+#include <CustomDensityProb.H>
+#include <WarpXConst.H>
+
+// struct whose getDensity returns constant density.
+struct InjectorDensityConstant
+{
+ InjectorDensityConstant (amrex::Real a_rho) noexcept : m_rho(a_rho) {}
+
+ AMREX_GPU_HOST_DEVICE
+ amrex::Real
+ getDensity (amrex::Real, amrex::Real, amrex::Real) const noexcept
+ {
+ return m_rho;
+ }
+
+private:
+ amrex::Real m_rho;
+};
+
+// struct whose getDensity returns local density computed from parser.
+struct InjectorDensityParser
+{
+ InjectorDensityParser (WarpXParser const& a_parser) noexcept
+ : m_parser(a_parser) {}
+
+ AMREX_GPU_HOST_DEVICE
+ amrex::Real
+ getDensity (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+ {
+ return m_parser(x,y,z);
+ }
+
+ // InjectorDensityParser constructs this GpuParser from WarpXParser.
+ GpuParser m_parser;
+};
+
+// struct whose getDensity returns local density computed from predefined profile.
+struct InjectorDensityPredefined
+{
+ InjectorDensityPredefined (std::string const& a_species_name) noexcept;
+
+ void clear ();
+
+ AMREX_GPU_HOST_DEVICE
+ amrex::Real
+ getDensity (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+ {
+ // Choices for profile are:
+ // - parabolic_channel
+ switch (profile)
+ {
+ case Profile::parabolic_channel:
+ {
+ amrex::Real z_start = p[0];
+ amrex::Real ramp_up = p[1];
+ amrex::Real plateau = p[2];
+ amrex::Real ramp_down = p[3];
+ amrex::Real rc = p[4];
+ amrex::Real n0 = p[5];
+ amrex::Real n;
+ amrex::Real kp = PhysConst::q_e/PhysConst::c
+ *std::sqrt( n0/(PhysConst::m_e*PhysConst::ep0) );
+
+ if ((z-z_start)>=0 and
+ (z-z_start)<ramp_up ) {
+ n = (z-z_start)/ramp_up;
+ } else if ((z-z_start)>=ramp_up and
+ (z-z_start)< ramp_up+plateau ) {
+ n = 1.;
+ } else if ((z-z_start)>=ramp_up+plateau and
+ (z-z_start)< ramp_up+plateau+ramp_down) {
+ n = 1.-((z-z_start)-ramp_up-plateau)/ramp_down;
+ } else {
+ n = 0.;
+ }
+ n *= n0*(1.+4.*(x*x+y*y)/(kp*kp*rc*rc*rc*rc));
+ return n;
+ }
+ default:
+ amrex::Abort("InjectorDensityPredefined: how did we get here?");
+ return 0.0;
+ }
+ }
+
+private:
+ enum struct Profile { null, parabolic_channel };
+ Profile profile;
+ amrex::Real* p;
+};
+
+// Base struct for density injector.
+// InjectorDensity contains a union (called Object) that holds any one
+// instance of:
+// - InjectorDensityConstant : to generate constant density;
+// - InjectorDensityParser : to generate density from parser;
+// - InjectorDensityCustom : to generate density from custom profile;
+// - InjectorDensityPredefined: to generate density from predefined profile;
+// The choice is made at runtime, depending in the constructor called.
+// This mimics virtual functions, except the struct is stored in managed memory
+// and member functions are made __host__ __device__ to run on CPU and GPU.
+// This struct inherits from amrex::Gpu::Managed to provide new and delete
+// operators in managed memory when running on GPU. Nothing special on CPU.
+struct InjectorDensity
+ : public amrex::Gpu::Managed
+{
+ // This constructor stores a InjectorDensityConstant in union object.
+ InjectorDensity (InjectorDensityConstant* t, amrex::Real a_rho)
+ : type(Type::constant),
+ object(t,a_rho)
+ { }
+
+ // This constructor stores a InjectorDensityParser in union object.
+ InjectorDensity (InjectorDensityParser* t, WarpXParser const& a_parser)
+ : type(Type::parser),
+ object(t,a_parser)
+ { }
+
+ // This constructor stores a InjectorDensityCustom in union object.
+ InjectorDensity (InjectorDensityCustom* t, std::string const& a_species_name)
+ : type(Type::custom),
+ object(t,a_species_name)
+ { }
+
+ // This constructor stores a InjectorDensityPredefined in union object.
+ InjectorDensity (InjectorDensityPredefined* t, std::string const& a_species_name)
+ : type(Type::predefined),
+ object(t,a_species_name)
+ { }
+
+ // Explicitly prevent the compiler from generating copy constructors
+ // and copy assignment operators.
+ InjectorDensity (InjectorDensity const&) = delete;
+ InjectorDensity (InjectorDensity&&) = delete;
+ void operator= (InjectorDensity const&) = delete;
+ void operator= (InjectorDensity &&) = delete;
+
+ ~InjectorDensity ();
+
+ std::size_t sharedMemoryNeeded () const noexcept;
+
+ // call getDensity from the object stored in the union
+ // (the union is called Object, and the instance is called object).
+ AMREX_GPU_HOST_DEVICE
+ amrex::Real
+ getDensity (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+ {
+ switch (type)
+ {
+ case Type::parser:
+ {
+ return object.parser.getDensity(x,y,z);
+ }
+ case Type::constant:
+ {
+ return object.constant.getDensity(x,y,z);
+ }
+ case Type::custom:
+ {
+ return object.custom.getDensity(x,y,z);
+ }
+ case Type::predefined:
+ {
+ return object.predefined.getDensity(x,y,z);
+ }
+ default:
+ {
+ amrex::Abort("InjectorDensity: unknown type");
+ return 0.0;
+ }
+ }
+ }
+
+private:
+ enum struct Type { constant, custom, predefined, parser };
+ Type type;
+
+ // An instance of union Object constructs and stores any one of
+ // the objects declared (constant or parser or custom or predefined).
+ union Object {
+ Object (InjectorDensityConstant*, amrex::Real a_rho) noexcept
+ : constant(a_rho) {}
+ Object (InjectorDensityParser*, WarpXParser const& a_parser) noexcept
+ : parser(a_parser) {}
+ Object (InjectorDensityCustom*, std::string const& a_species_name) noexcept
+ : custom(a_species_name) {}
+ Object (InjectorDensityPredefined*, std::string const& a_species_name) noexcept
+ : predefined(a_species_name) {}
+ InjectorDensityConstant constant;
+ InjectorDensityParser parser;
+ InjectorDensityCustom custom;
+ InjectorDensityPredefined predefined;
+ };
+ Object object;
+};
+
+#endif
diff --git a/Source/Initialization/InjectorDensity.cpp b/Source/Initialization/InjectorDensity.cpp
new file mode 100644
index 000000000..54df4b14d
--- /dev/null
+++ b/Source/Initialization/InjectorDensity.cpp
@@ -0,0 +1,77 @@
+#include <PlasmaInjector.H>
+
+using namespace amrex;
+
+InjectorDensity::~InjectorDensity ()
+{
+ switch (type)
+ {
+ case Type::parser:
+ {
+ object.parser.m_parser.clear();
+ break;
+ }
+ case Type::custom:
+ {
+ object.custom.clear();
+ break;
+ }
+ case Type::predefined:
+ {
+ object.predefined.clear();
+ break;
+ }
+ }
+}
+
+// Compute the amount of memory needed in GPU Shared Memory.
+std::size_t
+InjectorDensity::sharedMemoryNeeded () const noexcept
+{
+ switch (type)
+ {
+ case Type::parser:
+ {
+ // For parser injector, the 3D position of each particle
+ // is stored in shared memory.
+ return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 3;
+ }
+ default:
+ return 0;
+ }
+}
+
+InjectorDensityPredefined::InjectorDensityPredefined (
+ std::string const& a_species_name) noexcept
+ : profile(Profile::null)
+{
+ ParmParse pp(a_species_name);
+
+ std::vector<amrex::Real> v;
+ // Read parameters for the predefined plasma profile,
+ // and store them in managed memory
+ pp.getarr("predefined_profile_params", v);
+ p = static_cast<amrex::Real*>
+ (amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real)*v.size()));
+ for (int i = 0; i < static_cast<int>(v.size()); ++i) {
+ p[i] = v[i];
+ }
+
+ // Parse predefined profile name, and update member variable profile.
+ std::string which_profile_s;
+ pp.query("predefined_profile_name", which_profile_s);
+ std::transform(which_profile_s.begin(), which_profile_s.end(),
+ which_profile_s.begin(), ::tolower);
+ if (which_profile_s == "parabolic_channel"){
+ profile = Profile::parabolic_channel;
+ AMREX_ALWAYS_ASSERT_WITH_MESSAGE(v.size() > 5,
+ "InjectorDensityPredefined::parabolic_channel: not enough parameters");
+ }
+}
+
+// Note that we are not allowed to have non-trivial destructor.
+// So we rely on clear() to free memory.
+void InjectorDensityPredefined::clear ()
+{
+ amrex::The_Managed_Arena()->free(p);
+}
diff --git a/Source/Initialization/InjectorMomentum.H b/Source/Initialization/InjectorMomentum.H
new file mode 100644
index 000000000..399ee7759
--- /dev/null
+++ b/Source/Initialization/InjectorMomentum.H
@@ -0,0 +1,223 @@
+#ifndef INJECTOR_MOMENTUM_H_
+#define INJECTOR_MOMENTUM_H_
+
+#include <AMReX_Gpu.H>
+#include <AMReX_Dim3.H>
+#include <GpuParser.H>
+#include <CustomMomentumProb.H>
+
+// struct whose getMomentum returns constant momentum.
+struct InjectorMomentumConstant
+{
+ InjectorMomentumConstant (amrex::Real a_ux, amrex::Real a_uy, amrex::Real a_uz) noexcept
+ : m_ux(a_ux), m_uy(a_uy), m_uz(a_uz) {}
+
+ AMREX_GPU_HOST_DEVICE
+ amrex::XDim3
+ getMomentum (amrex::Real, amrex::Real, amrex::Real) const noexcept
+ {
+ return amrex::XDim3{m_ux,m_uy,m_uz};
+ }
+private:
+ amrex::Real m_ux, m_uy, m_uz;
+};
+
+// struct whose getMomentum returns momentum for 1 particle, from random
+// gaussian distribution.
+struct InjectorMomentumGaussian
+{
+ InjectorMomentumGaussian (amrex::Real a_ux_m, amrex::Real a_uy_m,
+ amrex::Real a_uz_m, amrex::Real a_ux_th,
+ amrex::Real a_uy_th, amrex::Real a_uz_th) noexcept
+ : m_ux_m(a_ux_m), m_uy_m(a_uy_m), m_uz_m(a_uz_m),
+ m_ux_th(a_ux_th), m_uy_th(a_uy_th), m_uz_th(a_uz_th)
+ {}
+
+ AMREX_GPU_HOST_DEVICE
+ amrex::XDim3
+ getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+ {
+ return amrex::XDim3{amrex::RandomNormal(m_ux_m, m_ux_th),
+ amrex::RandomNormal(m_uy_m, m_uy_th),
+ amrex::RandomNormal(m_uz_m, m_uz_th)};
+ }
+private:
+ amrex::Real m_ux_m, m_uy_m, m_uz_m;
+ amrex::Real m_ux_th, m_uy_th, m_uz_th;
+};
+
+// struct whose getMomentum returns momentum for 1 particle, for
+// radial expansion
+struct InjectorMomentumRadialExpansion
+{
+ InjectorMomentumRadialExpansion (amrex::Real a_u_over_r) noexcept
+ : u_over_r(a_u_over_r)
+ {}
+
+ AMREX_GPU_HOST_DEVICE
+ amrex::XDim3
+ getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+ {
+ return {x*u_over_r, y*u_over_r, z*u_over_r};
+ }
+
+private:
+ amrex::Real u_over_r;
+};
+
+// struct whose getMomentumm returns local momentum computed from parser.
+struct InjectorMomentumParser
+{
+ InjectorMomentumParser (WarpXParser const& a_ux_parser,
+ WarpXParser const& a_uy_parser,
+ WarpXParser const& a_uz_parser) noexcept
+ : m_ux_parser(a_ux_parser), m_uy_parser(a_uy_parser),
+ m_uz_parser(a_uz_parser) {}
+
+ AMREX_GPU_HOST_DEVICE
+ amrex::XDim3
+ getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+ {
+ return amrex::XDim3{m_ux_parser(x,y,z),m_uy_parser(x,y,z),m_uz_parser(x,y,z)};
+ }
+
+ GpuParser m_ux_parser, m_uy_parser, m_uz_parser;
+};
+
+// Base struct for momentum injector.
+// InjectorMomentum contains a union (called Object) that holds any one
+// instance of:
+// - InjectorMomentumConstant : to generate constant density;
+// - InjectorMomentumGaussian : to generate gaussian distribution;
+// - InjectorMomentumRadialExpansion: to generate radial expansion;
+// - InjectorMomentumParser : to generate momentum from parser;
+// The choice is made at runtime, depending in the constructor called.
+// This mimics virtual functions, except the struct is stored in managed memory
+// and member functions are made __host__ __device__ to run on CPU and GPU.
+// This struct inherits from amrex::Gpu::Managed to provide new and delete
+// operators in managed memory when running on GPU. Nothing special on CPU.
+struct InjectorMomentum
+ : public amrex::Gpu::Managed
+{
+ // This constructor stores a InjectorMomentumConstant in union object.
+ InjectorMomentum (InjectorMomentumConstant* t,
+ amrex::Real a_ux, amrex::Real a_uy, amrex::Real a_uz)
+ : type(Type::constant),
+ object(t, a_ux, a_uy, a_uz)
+ { }
+
+ // This constructor stores a InjectorMomentumParser in union object.
+ InjectorMomentum (InjectorMomentumParser* t,
+ WarpXParser const& a_ux_parser,
+ WarpXParser const& a_uy_parser,
+ WarpXParser const& a_uz_parser)
+ : type(Type::parser),
+ object(t, a_ux_parser, a_uy_parser, a_uz_parser)
+ { }
+
+ // This constructor stores a InjectorMomentumGaussian in union object.
+ InjectorMomentum (InjectorMomentumGaussian* t,
+ amrex::Real a_ux_m, amrex::Real a_uy_m, amrex::Real a_uz_m,
+ amrex::Real a_ux_th, amrex::Real a_uy_th, amrex::Real a_uz_th)
+ : type(Type::gaussian),
+ object(t,a_ux_m,a_uy_m,a_uz_m,a_ux_th,a_uy_th,a_uz_th)
+ { }
+
+ // This constructor stores a InjectorMomentumCustom in union object.
+ InjectorMomentum (InjectorMomentumCustom* t,
+ std::string const& a_species_name)
+ : type(Type::custom),
+ object(t, a_species_name)
+ { }
+
+ // This constructor stores a InjectorMomentumRadialExpansion in union object.
+ InjectorMomentum (InjectorMomentumRadialExpansion* t,
+ amrex::Real u_over_r)
+ : type(Type::radial_expansion),
+ object(t, u_over_r)
+ { }
+
+ // Explicitly prevent the compiler from generating copy constructors
+ // and copy assignment operators.
+ InjectorMomentum (InjectorMomentum const&) = delete;
+ InjectorMomentum (InjectorMomentum&&) = delete;
+ void operator= (InjectorMomentum const&) = delete;
+ void operator= (InjectorMomentum &&) = delete;
+
+ ~InjectorMomentum ();
+
+ std::size_t sharedMemoryNeeded () const noexcept;
+
+ // call getMomentum from the object stored in the union
+ // (the union is called Object, and the instance is called object).
+ AMREX_GPU_HOST_DEVICE
+ amrex::XDim3
+ getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+ {
+ switch (type)
+ {
+ case Type::parser:
+ {
+ return object.parser.getMomentum(x,y,z);
+ }
+ case Type::gaussian:
+ {
+ return object.gaussian.getMomentum(x,y,z);
+ }
+ case Type::constant:
+ {
+ return object.constant.getMomentum(x,y,z);
+ }
+ case Type::radial_expansion:
+ {
+ return object.radial_expansion.getMomentum(x,y,z);
+ }
+ case Type::custom:
+ {
+ return object.custom.getMomentum(x,y,z);
+ }
+ default:
+ {
+ amrex::Abort("InjectorMomentum: unknown type");
+ return {0.0,0.0,0.0};
+ }
+ }
+ }
+
+private:
+ enum struct Type { constant, custom, gaussian, radial_expansion, parser };
+ Type type;
+
+ // An instance of union Object constructs and stores any one of
+ // the objects declared (constant or custom or gaussian or
+ // radial_expansion or parser).
+ union Object {
+ Object (InjectorMomentumConstant*,
+ amrex::Real a_ux, amrex::Real a_uy, amrex::Real a_uz) noexcept
+ : constant(a_ux,a_uy,a_uz) {}
+ Object (InjectorMomentumCustom*,
+ std::string const& a_species_name) noexcept
+ : custom(a_species_name) {}
+ Object (InjectorMomentumGaussian*,
+ amrex::Real a_ux_m, amrex::Real a_uy_m,
+ amrex::Real a_uz_m, amrex::Real a_ux_th,
+ amrex::Real a_uy_th, amrex::Real a_uz_th) noexcept
+ : gaussian(a_ux_m,a_uy_m,a_uz_m,a_ux_th,a_uy_th,a_uz_th) {}
+ Object (InjectorMomentumRadialExpansion*,
+ amrex::Real u_over_r) noexcept
+ : radial_expansion(u_over_r) {}
+ Object (InjectorMomentumParser*,
+ WarpXParser const& a_ux_parser,
+ WarpXParser const& a_uy_parser,
+ WarpXParser const& a_uz_parser) noexcept
+ : parser(a_ux_parser, a_uy_parser, a_uz_parser) {}
+ InjectorMomentumConstant constant;
+ InjectorMomentumCustom custom;
+ InjectorMomentumGaussian gaussian;
+ InjectorMomentumRadialExpansion radial_expansion;
+ InjectorMomentumParser parser;
+ };
+ Object object;
+};
+
+#endif
diff --git a/Source/Initialization/InjectorMomentum.cpp b/Source/Initialization/InjectorMomentum.cpp
new file mode 100644
index 000000000..a197b5bef
--- /dev/null
+++ b/Source/Initialization/InjectorMomentum.cpp
@@ -0,0 +1,40 @@
+#include <PlasmaInjector.H>
+
+using namespace amrex;
+
+InjectorMomentum::~InjectorMomentum ()
+{
+ switch (type)
+ {
+ case Type::parser:
+ {
+ object.parser.m_ux_parser.clear();
+ object.parser.m_uy_parser.clear();
+ object.parser.m_uz_parser.clear();
+ break;
+ }
+ case Type::custom:
+ {
+ object.custom.clear();
+ break;
+ }
+ }
+}
+
+// Compute the amount of memory needed in GPU Shared Memory.
+std::size_t
+InjectorMomentum::sharedMemoryNeeded () const noexcept
+{
+ switch (type)
+ {
+ case Type::parser:
+ {
+ // For parser injector, the 3D position of each particle
+ // is stored in shared memory.
+ return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 3;
+ }
+ default:
+ return 0;
+ }
+}
+
diff --git a/Source/Initialization/InjectorPosition.H b/Source/Initialization/InjectorPosition.H
new file mode 100644
index 000000000..19bb092dd
--- /dev/null
+++ b/Source/Initialization/InjectorPosition.H
@@ -0,0 +1,146 @@
+#ifndef INJECTOR_POSITION_H_
+#define INJECTOR_POSITION_H_
+
+#include <AMReX_Gpu.H>
+#include <AMReX_Dim3.H>
+#include <AMReX_Utility.H>
+
+// struct whose getPositionUnitBox returns x, y and z for a particle with
+// random distribution inside a unit cell.
+struct InjectorPositionRandom
+{
+ AMREX_GPU_HOST_DEVICE
+ amrex::XDim3
+ getPositionUnitBox (int i_part, int ref_fac=1) const noexcept
+ {
+ return amrex::XDim3{amrex::Random(), amrex::Random(), amrex::Random()};
+ }
+};
+
+// struct whose getPositionUnitBox returns x, y and z for a particle with
+// regular distribution inside a unit cell.
+struct InjectorPositionRegular
+{
+ InjectorPositionRegular (amrex::Dim3 const& a_ppc) noexcept : ppc(a_ppc) {}
+
+ // i_part: particle number within the cell, required to evenly space
+ // particles within the cell.
+ // ref_fac: the number of particles evenly-spaced within a cell
+ // is a_ppc*(ref_fac**AMREX_SPACEDIM).
+ AMREX_GPU_HOST_DEVICE
+ amrex::XDim3
+ getPositionUnitBox (int i_part, int ref_fac=1) const noexcept
+ {
+ int nx = ref_fac*ppc.x;
+ int ny = ref_fac*ppc.y;
+#if (AMREX_SPACEDIM == 3)
+ int nz = ref_fac*ppc.z;
+#else
+ int nz = 1;
+#endif
+ int ix_part = i_part/(ny*nz); // written this way backward compatibility
+ int iz_part = (i_part-ix_part*(ny*nz)) / ny;
+ int iy_part = (i_part-ix_part*(ny*nz)) - ny*iz_part;
+ return amrex::XDim3{(0.5+ix_part)/nx, (0.5+iy_part)/ny, (0.5+iz_part) / nz};
+ }
+private:
+ amrex::Dim3 ppc;
+};
+
+// Base struct for position injector.
+// InjectorPosition contains a union (called Object) that holds any one
+// instance of:
+// - InjectorPositionRandom : to generate random distribution;
+// - InjectorPositionRegular: to generate regular distribution.
+// The choice is made at runtime, depending in the constructor called.
+// This mimics virtual functions, except the struct is stored in managed memory
+// and member functions are made __host__ __device__ to run on CPU and GPU.
+// This struct inherits from amrex::Gpu::Managed to provide new and delete
+// operators in managed memory when running on GPU. Nothing special on CPU.
+struct InjectorPosition
+ : public amrex::Gpu::Managed
+{
+ // This constructor stores a InjectorPositionRandom in union object.
+ InjectorPosition (InjectorPositionRandom* t,
+ amrex::Real a_xmin, amrex::Real a_xmax,
+ amrex::Real a_ymin, amrex::Real a_ymax,
+ amrex::Real a_zmin, amrex::Real a_zmax)
+ : type(Type::random),
+ object(t),
+ xmin(a_xmin), xmax(a_xmax),
+ ymin(a_ymin), ymax(a_ymax),
+ zmin(a_zmin), zmax(a_zmax)
+ { }
+
+ // This constructor stores a InjectorPositionRegular in union object.
+ InjectorPosition (InjectorPositionRegular* t,
+ amrex::Real a_xmin, amrex::Real a_xmax,
+ amrex::Real a_ymin, amrex::Real a_ymax,
+ amrex::Real a_zmin, amrex::Real a_zmax,
+ amrex::Dim3 const& a_ppc)
+ : type(Type::regular),
+ object(t, a_ppc),
+ xmin(a_xmin), xmax(a_xmax),
+ ymin(a_ymin), ymax(a_ymax),
+ zmin(a_zmin), zmax(a_zmax)
+ { }
+
+ // Explicitly prevent the compiler from generating copy constructors
+ // and copy assignment operators.
+ InjectorPosition (InjectorPosition const&) = delete;
+ InjectorPosition (InjectorPosition&&) = delete;
+ void operator= (InjectorPosition const&) = delete;
+ void operator= (InjectorPosition &&) = delete;
+
+ std::size_t sharedMemoryNeeded () const noexcept { return 0; }
+
+ // call getPositionUnitBox from the object stored in the union
+ // (the union is called Object, and the instance is called object).
+ AMREX_GPU_HOST_DEVICE
+ amrex::XDim3
+ getPositionUnitBox (int i_part, int ref_fac=1) const noexcept
+ {
+ switch (type)
+ {
+ case Type::regular:
+ {
+ return object.regular.getPositionUnitBox(i_part, ref_fac);
+ }
+ default:
+ {
+ return object.random.getPositionUnitBox(i_part, ref_fac);
+ }
+ };
+ }
+
+ // bool: whether position specified is within bounds.
+ AMREX_GPU_HOST_DEVICE
+ bool
+ insideBounds (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+ {
+ return (x < xmax and x >= xmin and
+ y < ymax and y >= ymin and
+ z < zmax and z >= zmin);
+ }
+
+private:
+ enum struct Type { random, regular };
+ Type type;
+
+ // An instance of union Object constructs and stores any one of
+ // the objects declared (random or regular).
+ union Object {
+ Object (InjectorPositionRandom*) noexcept : random() {}
+ Object (InjectorPositionRegular*, amrex::Dim3 const& a_ppc) noexcept
+ : regular(a_ppc) {}
+ InjectorPositionRandom random;
+ InjectorPositionRegular regular;
+ };
+ Object object;
+
+ amrex::Real xmin, xmax;
+ amrex::Real ymin, ymax;
+ amrex::Real zmin, zmax;
+};
+
+#endif
diff --git a/Source/Initialization/Make.package b/Source/Initialization/Make.package
index edcf402c9..2c6458b6d 100644
--- a/Source/Initialization/Make.package
+++ b/Source/Initialization/Make.package
@@ -1,9 +1,18 @@
-CEXE_sources += CustomDensityProb.cpp
-CEXE_sources += PlasmaProfiles.cpp
CEXE_sources += WarpXInitData.cpp
-CEXE_sources += CustomMomentumProb.cpp
+
CEXE_sources += PlasmaInjector.cpp
CEXE_headers += PlasmaInjector.H
+CEXE_headers += InjectorPosition.H
+
+CEXE_headers += InjectorDensity.H
+CEXE_sources += InjectorDensity.cpp
+
+CEXE_headers += InjectorMomentum.H
+CEXE_sources += InjectorMomentum.cpp
+
+CEXE_headers += CustomDensityProb.H
+CEXE_headers += CustomMomentumProb.H
+
INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Initialization
VPATH_LOCATIONS += $(WARPX_HOME)/Source/Initialization
diff --git a/Source/Initialization/PlasmaInjector.H b/Source/Initialization/PlasmaInjector.H
index f998e217e..f7e86bff5 100644
--- a/Source/Initialization/PlasmaInjector.H
+++ b/Source/Initialization/PlasmaInjector.H
@@ -1,250 +1,16 @@
#ifndef PLASMA_INJECTOR_H_
#define PLASMA_INJECTOR_H_
-#include <array>
+#include <InjectorPosition.H>
+#include <InjectorDensity.H>
+#include <InjectorMomentum.H>
-#include "AMReX_REAL.H"
+#include <array>
#include <AMReX_Vector.H>
#include <WarpXConst.H>
#include <WarpXParser.H>
-#include "AMReX_ParmParse.H"
-#include "AMReX_Utility.H"
-
-enum class predefined_profile_flag { Null, parabolic_channel };
-
-///
-/// PlasmaDensityProfile describes how the charge density
-/// is set in particle initialization. Subclasses must define a
-/// getDensity function that describes the charge density as a
-/// function of x, y, and z.
-///
-class PlasmaDensityProfile
-{
-public:
- virtual ~PlasmaDensityProfile() {};
- virtual amrex::Real getDensity(amrex::Real x,
- amrex::Real y,
- amrex::Real z) const = 0;
-protected:
- std::string _species_name;
-};
-
-///
-/// This describes a constant density distribution.
-///
-class ConstantDensityProfile : public PlasmaDensityProfile
-{
-public:
- ConstantDensityProfile(amrex::Real _density);
- virtual amrex::Real getDensity(amrex::Real x,
- amrex::Real y,
- amrex::Real z) const override;
-
-private:
- amrex::Real _density;
-};
-
-///
-/// This describes a custom density distribution. Users can supply
-/// in their problem directory.
-///
-///
-class CustomDensityProfile : public PlasmaDensityProfile
-{
-public:
- CustomDensityProfile(const std::string& species_name);
- virtual amrex::Real getDensity(amrex::Real x,
- amrex::Real y,
- amrex::Real z) const override;
-private:
- amrex::Vector<amrex::Real> params;
-};
-
-///
-/// This describes predefined density distributions.
-///
-class PredefinedDensityProfile : public PlasmaDensityProfile
-{
-public:
- PredefinedDensityProfile(const std::string& species_name);
- virtual amrex::Real getDensity(amrex::Real x,
- amrex::Real y,
- amrex::Real z) const override;
- amrex::Real ParabolicChannel(amrex::Real x,
- amrex::Real y,
- amrex::Real z) const;
-private:
- predefined_profile_flag which_profile = predefined_profile_flag::Null;
- amrex::Vector<amrex::Real> params;
-};
-
-///
-/// This describes a density function parsed in the input file.
-///
-class ParseDensityProfile : public PlasmaDensityProfile
-{
-public:
- ParseDensityProfile(const std::string _parse_density_function);
- virtual amrex::Real getDensity(amrex::Real x,
- amrex::Real y,
- amrex::Real z) const override;
-private:
- std::string _parse_density_function;
- WarpXParser parser_density;
-};
-
-///
-/// PlasmaMomentumDistribution describes how the particle momenta
-/// are set. Subclasses must define a getMomentum method that fills
-/// a u with the 3 components of the particle momentum
-///
-class PlasmaMomentumDistribution
-{
-public:
- using vec3 = std::array<amrex::Real, 3>;
- virtual ~PlasmaMomentumDistribution() {};
- virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) = 0;
-};
-
-///
-/// This is a constant momentum distribution - all particles will
-/// have the same ux, uy, and uz
-///
-class ConstantMomentumDistribution : public PlasmaMomentumDistribution
-{
-public:
- ConstantMomentumDistribution(amrex::Real ux,
- amrex::Real uy,
- amrex::Real uz);
- virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override;
-
-private:
- amrex::Real _ux;
- amrex::Real _uy;
- amrex::Real _uz;
-};
-
-///
-/// This describes a custom momentum distribution. Users can supply
-/// in their problem directory.
-///
-///
-class CustomMomentumDistribution : public PlasmaMomentumDistribution
-{
-public:
- CustomMomentumDistribution(const std::string& species_name);
- virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override;
-
-private:
- amrex::Vector<amrex::Real> params;
-};
-
-
-///
-/// This is a Gaussian Random momentum distribution.
-/// Particles will get random momenta, drawn from a normal.
-/// ux_m, ux_y, and ux_z describe the mean components in the x, y, and z
-/// directions, while u_th is the standard deviation of the random
-/// component.
-///
-class GaussianRandomMomentumDistribution : public PlasmaMomentumDistribution
-{
-public:
- GaussianRandomMomentumDistribution(amrex::Real ux_m,
- amrex::Real uy_m,
- amrex::Real uz_m,
- amrex::Real ux_th,
- amrex::Real uy_th,
- amrex::Real uz_th);
- virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override;
-private:
- amrex::Real _ux_m;
- amrex::Real _uy_m;
- amrex::Real _uz_m;
- amrex::Real _ux_th;
- amrex::Real _uy_th;
- amrex::Real _uz_th;
-};
-
-///
-/// This is a radially expanding momentum distribution
-/// Particles will have a radial momentum proportional to their
-/// radius, with proportionality constant u_over_r
-class RadialExpansionMomentumDistribution : public PlasmaMomentumDistribution
-{
-public:
- RadialExpansionMomentumDistribution( amrex::Real u_over_r );
- virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override;
-private:
- amrex::Real _u_over_r;
-};
-
-///
-/// This describes a momentum distribution function parsed in the input file.
-///
-class ParseMomentumFunction : public PlasmaMomentumDistribution
-{
-public:
- ParseMomentumFunction(const std::string _parse_momentum_function_ux,
- const std::string _parse_momentum_function_uy,
- const std::string _parse_momentum_function_uz);
- virtual void getMomentum(vec3& u,
- amrex::Real x,
- amrex::Real y,
- amrex::Real z) override;
-private:
- std::string _parse_momentum_function_ux;
- std::string _parse_momentum_function_uy;
- std::string _parse_momentum_function_uz;
- WarpXParser parser_ux;
- WarpXParser parser_uy;
- WarpXParser parser_uz;
-};
-
-
-///
-/// PlasmaParticlePosition describes how particles are initialized
-/// into each cell box. Subclasses must define a
-/// getPositionUnitBox function that returns the position of
-/// particle number i_part in a unitary box.
-///
-class PlasmaParticlePosition{
-public:
- using vec3 = std::array<amrex::Real, 3>;
- virtual ~PlasmaParticlePosition() {};
- virtual void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1) = 0;
-};
-
-///
-/// Particles are initialized with a random uniform
-/// distribution inside each cell
-///
-class RandomPosition : public PlasmaParticlePosition{
-public:
- RandomPosition(int num_particles_per_cell);
- virtual void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1) override;
-private:
- amrex::Real _x;
- amrex::Real _y;
- amrex::Real _z;
- int _num_particles_per_cell;
-};
-
-///
-/// Particles are regularly distributed inside each cell. The user provides
-/// a 3d (resp. 2d) vector num_particles_per_cell_each_dim that contains
-/// the number of particles per cell along each dimension.
-///
-class RegularPosition : public PlasmaParticlePosition{
-public:
- RegularPosition(const amrex::Vector<int>& num_particles_per_cell_each_dim);
- virtual void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1) override;
-private:
- amrex::Real _x;
- amrex::Real _y;
- amrex::Real _z;
- amrex::Vector<int> _num_particles_per_cell_each_dim;
-};
+#include <AMReX_ParmParse.H>
+#include <AMReX_Utility.H>
///
/// The PlasmaInjector class parses and stores information about the plasma
@@ -256,28 +22,23 @@ class PlasmaInjector
public:
- using vec3 = std::array<amrex::Real, 3>;
-
- PlasmaInjector();
-
- PlasmaInjector(int ispecies, const std::string& name);
+ PlasmaInjector ();
- amrex::Real getDensity(amrex::Real x, amrex::Real y, amrex::Real z);
+ PlasmaInjector (int ispecies, const std::string& name);
- bool insideBounds(amrex::Real x, amrex::Real y, amrex::Real z);
+ bool insideBounds (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept;
int num_particles_per_cell;
amrex::Vector<int> num_particles_per_cell_each_dim;
- void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z);
+ // gamma * beta
+ amrex::XDim3 getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept;
- void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1);
+ amrex::Real getCharge () {return charge;}
+ amrex::Real getMass () {return mass;}
- amrex::Real getCharge() {return charge;}
- amrex::Real getMass() {return mass;}
-
- bool doInjection() { return part_pos != NULL;}
+ bool doInjection () const noexcept { return inj_pos != NULL;}
bool add_single_particle = false;
amrex::Vector<amrex::Real> single_particle_pos;
@@ -305,6 +66,21 @@ public:
amrex::Real xmin, xmax;
amrex::Real ymin, ymax;
amrex::Real zmin, zmax;
+ amrex::Real density_min = 0;
+ amrex::Real density_max = std::numeric_limits<amrex::Real>::max();
+
+ InjectorPosition* getInjectorPosition ();
+ InjectorDensity* getInjectorDensity ();
+ InjectorMomentum* getInjectorMomentum ();
+
+ // When running on GPU, injector for position, momentum and density store
+ // particle 3D positions in shared memory IF using the parser.
+ std::size_t
+ sharedMemoryNeeded () const noexcept {
+ return amrex::max(inj_pos->sharedMemoryNeeded(),
+ inj_rho->sharedMemoryNeeded(),
+ inj_mom->sharedMemoryNeeded());
+ }
protected:
@@ -315,13 +91,12 @@ protected:
int species_id;
std::string species_name;
- std::unique_ptr<PlasmaDensityProfile> rho_prof;
- std::unique_ptr<PlasmaMomentumDistribution> mom_dist;
- std::unique_ptr<PlasmaParticlePosition> part_pos;
-
- void parseDensity(amrex::ParmParse pp);
- void parseMomentum(amrex::ParmParse pp);
+ std::unique_ptr<InjectorPosition> inj_pos;
+ std::unique_ptr<InjectorDensity > inj_rho;
+ std::unique_ptr<InjectorMomentum> inj_mom;
+ void parseDensity (amrex::ParmParse& pp);
+ void parseMomentum (amrex::ParmParse& pp);
};
#endif
diff --git a/Source/Initialization/PlasmaInjector.cpp b/Source/Initialization/PlasmaInjector.cpp
index f9642d1b6..541999789 100644
--- a/Source/Initialization/PlasmaInjector.cpp
+++ b/Source/Initialization/PlasmaInjector.cpp
@@ -55,192 +55,34 @@ namespace {
}
}
-ConstantDensityProfile::ConstantDensityProfile(Real density)
- : _density(density)
-{}
+PlasmaInjector::PlasmaInjector () {}
-Real ConstantDensityProfile::getDensity(Real x, Real y, Real z) const
-{
- return _density;
-}
-
-CustomDensityProfile::CustomDensityProfile(const std::string& species_name)
-{
- ParmParse pp(species_name);
- pp.getarr("custom_profile_params", params);
-}
-
-PredefinedDensityProfile::PredefinedDensityProfile(const std::string& species_name)
+PlasmaInjector::PlasmaInjector (int ispecies, const std::string& name)
+ : species_id(ispecies), species_name(name)
{
ParmParse pp(species_name);
- std::string which_profile_s;
- pp.getarr("predefined_profile_params", params);
- pp.query("predefined_profile_name", which_profile_s);
- if (which_profile_s == "parabolic_channel"){
- which_profile = predefined_profile_flag::parabolic_channel;
- }
-}
-
-ParseDensityProfile::ParseDensityProfile(std::string parse_density_function)
- : _parse_density_function(parse_density_function)
-{
- parser_density.define(parse_density_function);
- parser_density.registerVariables({"x","y","z"});
-
- ParmParse pp("my_constants");
- std::set<std::string> symbols = parser_density.symbols();
- symbols.erase("x");
- symbols.erase("y");
- symbols.erase("z"); // after removing variables, we are left with constants
- for (auto it = symbols.begin(); it != symbols.end(); ) {
- Real v;
- if (pp.query(it->c_str(), v)) {
- parser_density.setConstant(*it, v);
- it = symbols.erase(it);
- } else {
- ++it;
- }
- }
- for (auto const& s : symbols) { // make sure there no unknown symbols
- amrex::Abort("ParseDensityProfile: Unknown symbol "+s);
- }
-}
-
-Real ParseDensityProfile::getDensity(Real x, Real y, Real z) const
-{
- return parser_density.eval(x,y,z);
-}
-
-ConstantMomentumDistribution::ConstantMomentumDistribution(Real ux,
- Real uy,
- Real uz)
- : _ux(ux), _uy(uy), _uz(uz)
-{}
-
-void ConstantMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) {
- u[0] = _ux;
- u[1] = _uy;
- u[2] = _uz;
-}
-CustomMomentumDistribution::CustomMomentumDistribution(const std::string& species_name)
-{
- ParmParse pp(species_name);
- pp.getarr("custom_momentum_params", params);
-}
-
-GaussianRandomMomentumDistribution::GaussianRandomMomentumDistribution(Real ux_m,
- Real uy_m,
- Real uz_m,
- Real ux_th,
- Real uy_th,
- Real uz_th)
- : _ux_m(ux_m), _uy_m(uy_m), _uz_m(uz_m), _ux_th(ux_th), _uy_th(uy_th), _uz_th(uz_th)
-{
-}
-
-void GaussianRandomMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) {
- Real ux_th = amrex::RandomNormal(0.0, _ux_th);
- Real uy_th = amrex::RandomNormal(0.0, _uy_th);
- Real uz_th = amrex::RandomNormal(0.0, _uz_th);
-
- u[0] = _ux_m + ux_th;
- u[1] = _uy_m + uy_th;
- u[2] = _uz_m + uz_th;
-}
-RadialExpansionMomentumDistribution::RadialExpansionMomentumDistribution(Real u_over_r) : _u_over_r( u_over_r )
-{
-}
-
-void RadialExpansionMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) {
- u[0] = _u_over_r * x;
- u[1] = _u_over_r * y;
- u[2] = _u_over_r * z;
-}
-
-ParseMomentumFunction::ParseMomentumFunction(std::string parse_momentum_function_ux,
- std::string parse_momentum_function_uy,
- std::string parse_momentum_function_uz)
- : _parse_momentum_function_ux(parse_momentum_function_ux),
- _parse_momentum_function_uy(parse_momentum_function_uy),
- _parse_momentum_function_uz(parse_momentum_function_uz)
-{
- parser_ux.define(parse_momentum_function_ux);
- parser_uy.define(parse_momentum_function_uy);
- parser_uz.define(parse_momentum_function_uz);
-
- amrex::Array<std::reference_wrapper<WarpXParser>,3> parsers{parser_ux, parser_uy, parser_uz};
- ParmParse pp("my_constants");
- for (auto& p : parsers) {
- auto& parser = p.get();
- parser.registerVariables({"x","y","z"});
- std::set<std::string> symbols = parser.symbols();
- symbols.erase("x");
- symbols.erase("y");
- symbols.erase("z"); // after removing variables, we are left with constants
- for (auto it = symbols.begin(); it != symbols.end(); ) {
- Real v;
- if (pp.query(it->c_str(), v)) {
- parser.setConstant(*it, v);
- it = symbols.erase(it);
- } else {
- ++it;
- }
- }
- for (auto const& s : symbols) { // make sure there no unknown symbols
- amrex::Abort("ParseMomentumFunction: Unknown symbol "+s);
- }
- }
-}
-
-void ParseMomentumFunction::getMomentum(vec3& u, Real x, Real y, Real z)
-{
- u[0] = parser_ux.eval(x,y,z);
- u[1] = parser_uy.eval(x,y,z);
- u[2] = parser_uz.eval(x,y,z);
-}
-
-RandomPosition::RandomPosition(int num_particles_per_cell):
- _num_particles_per_cell(num_particles_per_cell)
-{}
-
-void RandomPosition::getPositionUnitBox(vec3& r, int i_part, int ref_fac){
- r[0] = amrex::Random();
- r[1] = amrex::Random();
- r[2] = amrex::Random();
-}
-
-RegularPosition::RegularPosition(const amrex::Vector<int>& num_particles_per_cell_each_dim)
- : _num_particles_per_cell_each_dim(num_particles_per_cell_each_dim)
-{}
+ pp.query("radially_weighted", radially_weighted);
+ AMREX_ALWAYS_ASSERT_WITH_MESSAGE(radially_weighted, "ERROR: Only radially_weighted=true is supported");
-void RegularPosition::getPositionUnitBox(vec3& r, int i_part, int ref_fac)
-{
- int nx = ref_fac*_num_particles_per_cell_each_dim[0];
- int ny = ref_fac*_num_particles_per_cell_each_dim[1];
-#if AMREX_SPACEDIM == 3
- int nz = ref_fac*_num_particles_per_cell_each_dim[2];
-#else
- int nz = 1;
-#endif
-
- int ix_part = i_part/(ny * nz);
- int iy_part = (i_part % (ny * nz)) % ny;
- int iz_part = (i_part % (ny * nz)) / ny;
+ // parse plasma boundaries
+ xmin = std::numeric_limits<amrex::Real>::lowest();
+ ymin = std::numeric_limits<amrex::Real>::lowest();
+ zmin = std::numeric_limits<amrex::Real>::lowest();
- r[0] = (0.5+ix_part)/nx;
- r[1] = (0.5+iy_part)/ny;
- r[2] = (0.5+iz_part)/nz;
-}
+ xmax = std::numeric_limits<amrex::Real>::max();
+ ymax = std::numeric_limits<amrex::Real>::max();
+ zmax = std::numeric_limits<amrex::Real>::max();
-PlasmaInjector::PlasmaInjector(){
- part_pos = NULL;
-}
+ pp.query("xmin", xmin);
+ pp.query("ymin", ymin);
+ pp.query("zmin", zmin);
+ pp.query("xmax", xmax);
+ pp.query("ymax", ymax);
+ pp.query("zmax", zmax);
-PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name)
- : species_id(ispecies), species_name(name)
-{
- ParmParse pp(species_name);
+ pp.query("density_min", density_min);
+ pp.query("density_max", density_max);
// parse charge and mass
std::string charge_s;
@@ -290,9 +132,14 @@ PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name)
gaussian_beam = true;
parseMomentum(pp);
}
+ // Depending on injection type at runtime, initialize inj_pos
+ // so that inj_pos->getPositionUnitBox calls
+ // InjectorPosition[Random or Regular].getPositionUnitBox.
else if (part_pos_s == "nrandompercell") {
pp.query("num_particles_per_cell", num_particles_per_cell);
- part_pos.reset(new RandomPosition(num_particles_per_cell));
+ // Construct InjectorPosition with InjectorPositionRandom.
+ inj_pos.reset(new InjectorPosition((InjectorPositionRandom*)nullptr,
+ xmin, xmax, ymin, ymax, zmin, zmax));
parseDensity(pp);
parseMomentum(pp);
} else if (part_pos_s == "nuniformpercell") {
@@ -301,7 +148,12 @@ PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name)
#if ( AMREX_SPACEDIM == 2 )
num_particles_per_cell_each_dim[2] = 1;
#endif
- part_pos.reset(new RegularPosition(num_particles_per_cell_each_dim));
+ // Construct InjectorPosition from InjectorPositionRegular.
+ inj_pos.reset(new InjectorPosition((InjectorPositionRegular*)nullptr,
+ xmin, xmax, ymin, ymax, zmin, zmax,
+ Dim3{num_particles_per_cell_each_dim[0],
+ num_particles_per_cell_each_dim[1],
+ num_particles_per_cell_each_dim[2]}));
num_particles_per_cell = num_particles_per_cell_each_dim[0] *
num_particles_per_cell_each_dim[1] *
num_particles_per_cell_each_dim[2];
@@ -310,52 +162,75 @@ PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name)
} else {
StringParseAbortMessage("Injection style", part_pos_s);
}
+}
- pp.query("radially_weighted", radially_weighted);
- AMREX_ALWAYS_ASSERT_WITH_MESSAGE(radially_weighted, "ERROR: Only radially_weighted=true is supported");
-
- // parse plasma boundaries
- xmin = std::numeric_limits<amrex::Real>::lowest();
- ymin = std::numeric_limits<amrex::Real>::lowest();
- zmin = std::numeric_limits<amrex::Real>::lowest();
-
- xmax = std::numeric_limits<amrex::Real>::max();
- ymax = std::numeric_limits<amrex::Real>::max();
- zmax = std::numeric_limits<amrex::Real>::max();
+namespace {
+WarpXParser makeParser (std::string const& parse_function)
+{
+ WarpXParser parser(parse_function);
+ parser.registerVariables({"x","y","z"});
- pp.query("xmin", xmin);
- pp.query("ymin", ymin);
- pp.query("zmin", zmin);
- pp.query("xmax", xmax);
- pp.query("ymax", ymax);
- pp.query("zmax", zmax);
+ ParmParse pp("my_constants");
+ std::set<std::string> symbols = parser.symbols();
+ symbols.erase("x");
+ symbols.erase("y");
+ symbols.erase("z"); // after removing variables, we are left with constants
+ for (auto it = symbols.begin(); it != symbols.end(); ) {
+ Real v;
+ if (pp.query(it->c_str(), v)) {
+ parser.setConstant(*it, v);
+ it = symbols.erase(it);
+ } else {
+ ++it;
+ }
+ }
+ for (auto const& s : symbols) { // make sure there no unknown symbols
+ amrex::Abort("PlasmaInjector::makeParser: Unknown symbol "+s);
+ }
+ return parser;
+}
}
-void PlasmaInjector::parseDensity(ParmParse pp){
+// Depending on injection type at runtime, initialize inj_rho
+// so that inj_rho->getDensity calls
+// InjectorPosition[Constant or Custom or etc.].getDensity.
+void PlasmaInjector::parseDensity (ParmParse& pp)
+{
// parse density information
std::string rho_prof_s;
pp.get("profile", rho_prof_s);
- std::transform(rho_prof_s.begin(),
- rho_prof_s.end(),
- rho_prof_s.begin(),
- ::tolower);
+ std::transform(rho_prof_s.begin(), rho_prof_s.end(),
+ rho_prof_s.begin(), ::tolower);
if (rho_prof_s == "constant") {
pp.get("density", density);
- rho_prof.reset(new ConstantDensityProfile(density));
+ // Construct InjectorDensity with InjectorDensityConstant.
+ inj_rho.reset(new InjectorDensity((InjectorDensityConstant*)nullptr, density));
} else if (rho_prof_s == "custom") {
- rho_prof.reset(new CustomDensityProfile(species_name));
+ // Construct InjectorDensity with InjectorDensityCustom.
+ inj_rho.reset(new InjectorDensity((InjectorDensityCustom*)nullptr, species_name));
} else if (rho_prof_s == "predefined") {
- rho_prof.reset(new PredefinedDensityProfile(species_name));
+ // Construct InjectorDensity with InjectorDensityPredefined.
+ inj_rho.reset(new InjectorDensity((InjectorDensityPredefined*)nullptr,species_name));
} else if (rho_prof_s == "parse_density_function") {
- pp.get("density_function(x,y,z)", str_density_function);
- rho_prof.reset(new ParseDensityProfile(str_density_function));
+ std::vector<std::string> f;
+ pp.getarr("density_function(x,y,z)", f);
+ for (auto const& s : f) {
+ str_density_function += s;
+ }
+ // Construct InjectorDensity with InjectorDensityParser.
+ inj_rho.reset(new InjectorDensity((InjectorDensityParser*)nullptr,
+ makeParser(str_density_function)));
} else {
StringParseAbortMessage("Density profile type", rho_prof_s);
}
}
-void PlasmaInjector::parseMomentum(ParmParse pp){
+// Depending on injection type at runtime, initialize inj_mom
+// so that inj_mom->getMomentum calls
+// InjectorMomentum[Constant or Custom or etc.].getMomentum.
+void PlasmaInjector::parseMomentum (ParmParse& pp)
+{
// parse momentum information
std::string mom_dist_s;
pp.get("momentum_distribution_type", mom_dist_s);
@@ -370,9 +245,11 @@ void PlasmaInjector::parseMomentum(ParmParse pp){
pp.query("ux", ux);
pp.query("uy", uy);
pp.query("uz", uz);
- mom_dist.reset(new ConstantMomentumDistribution(ux, uy, uz));
+ // Construct InjectorMomentum with InjectorMomentumConstant.
+ inj_mom.reset(new InjectorMomentum((InjectorMomentumConstant*)nullptr, ux,uy, uz));
} else if (mom_dist_s == "custom") {
- mom_dist.reset(new CustomMomentumDistribution(species_name));
+ // Construct InjectorMomentum with InjectorMomentumCustom.
+ inj_mom.reset(new InjectorMomentum((InjectorMomentumCustom*)nullptr, species_name));
} else if (mom_dist_s == "gaussian") {
Real ux_m = 0.;
Real uy_m = 0.;
@@ -386,42 +263,68 @@ void PlasmaInjector::parseMomentum(ParmParse pp){
pp.query("ux_th", ux_th);
pp.query("uy_th", uy_th);
pp.query("uz_th", uz_th);
- mom_dist.reset(new GaussianRandomMomentumDistribution(ux_m, uy_m, uz_m,
- ux_th, uy_th, uz_th));
+ // Construct InjectorMomentum with InjectorMomentumGaussian.
+ inj_mom.reset(new InjectorMomentum((InjectorMomentumGaussian*)nullptr,
+ ux_m, uy_m, uz_m, ux_th, uy_th, uz_th));
} else if (mom_dist_s == "radial_expansion") {
Real u_over_r = 0.;
pp.query("u_over_r", u_over_r);
- mom_dist.reset(new RadialExpansionMomentumDistribution(u_over_r));
+ // Construct InjectorMomentum with InjectorMomentumRadialExpansion.
+ inj_mom.reset(new InjectorMomentum
+ ((InjectorMomentumRadialExpansion*)nullptr, u_over_r));
} else if (mom_dist_s == "parse_momentum_function") {
- pp.get("momentum_function_ux(x,y,z)", str_momentum_function_ux);
- pp.get("momentum_function_uy(x,y,z)", str_momentum_function_uy);
- pp.get("momentum_function_uz(x,y,z)", str_momentum_function_uz);
- mom_dist.reset(new ParseMomentumFunction(str_momentum_function_ux,
- str_momentum_function_uy,
- str_momentum_function_uz));
+ std::vector<std::string> f;
+ pp.getarr("momentum_function_ux(x,y,z)", f);
+ for (auto const& s : f) {
+ str_momentum_function_ux += s;
+ }
+ f.clear();
+ pp.getarr("momentum_function_uy(x,y,z)", f);
+ for (auto const& s : f) {
+ str_momentum_function_uy += s;
+ }
+ f.clear();
+ pp.getarr("momentum_function_uz(x,y,z)", f);
+ for (auto const& s : f) {
+ str_momentum_function_uz += s;
+ }
+ // Construct InjectorMomentum with InjectorMomentumParser.
+ inj_mom.reset(new InjectorMomentum((InjectorMomentumParser*)nullptr,
+ makeParser(str_momentum_function_ux),
+ makeParser(str_momentum_function_uy),
+ makeParser(str_momentum_function_uz)));
} else {
StringParseAbortMessage("Momentum distribution type", mom_dist_s);
}
}
-void PlasmaInjector::getPositionUnitBox(vec3& r, int i_part, int ref_fac) {
- return part_pos->getPositionUnitBox(r, i_part, ref_fac);
+XDim3 PlasmaInjector::getMomentum (Real x, Real y, Real z) const noexcept
+{
+ return inj_mom->getMomentum(x, y, z); // gamma*beta
+}
+
+bool PlasmaInjector::insideBounds (Real x, Real y, Real z) const noexcept
+{
+ return (x < xmax and x >= xmin and
+ y < ymax and y >= ymin and
+ z < zmax and z >= zmin);
}
-void PlasmaInjector::getMomentum(vec3& u, Real x, Real y, Real z) {
- mom_dist->getMomentum(u, x, y, z);
- u[0] *= PhysConst::c;
- u[1] *= PhysConst::c;
- u[2] *= PhysConst::c;
+InjectorPosition*
+PlasmaInjector::getInjectorPosition ()
+{
+ return inj_pos.get();
}
-bool PlasmaInjector::insideBounds(Real x, Real y, Real z) {
- if (x >= xmax || x < xmin ||
- y >= ymax || y < ymin ||
- z >= zmax || z < zmin ) return false;
- return true;
+InjectorDensity*
+PlasmaInjector::getInjectorDensity ()
+{
+ return inj_rho.get();
}
-Real PlasmaInjector::getDensity(Real x, Real y, Real z) {
- return rho_prof->getDensity(x, y, z);
+InjectorMomentum*
+PlasmaInjector::getInjectorMomentum ()
+{
+ return inj_mom.get();
}
+
diff --git a/Source/Initialization/PlasmaProfiles.cpp b/Source/Initialization/PlasmaProfiles.cpp
deleted file mode 100644
index d9d207f7e..000000000
--- a/Source/Initialization/PlasmaProfiles.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-#include <PlasmaInjector.H>
-#include <cmath>
-#include <iostream>
-#include <WarpXConst.H>
-
-using namespace amrex;
-
-Real PredefinedDensityProfile::getDensity(Real x, Real y, Real z) const {
- Real n;
- if ( which_profile == predefined_profile_flag::parabolic_channel ) {
- n = ParabolicChannel(x,y,z);
- }
- return n;
-}
-
-///
-/// plateau between linear upramp and downramp, and parab transverse profile
-///
-Real PredefinedDensityProfile::ParabolicChannel(Real x, Real y, Real z) const {
- // params = [z_start ramp_up plateau ramp_down rc n0]
- Real z_start = params[0];
- Real ramp_up = params[1];
- Real plateau = params[2];
- Real ramp_down = params[3];
- Real rc = params[4];
- Real n0 = params[5];
- Real n;
- Real kp = PhysConst::q_e/PhysConst::c*sqrt( n0/(PhysConst::m_e*PhysConst::ep0) );
-
- if ((z-z_start)>=0 and (z-z_start)<ramp_up ) {
- n = (z-z_start)/ramp_up;
- } else if ((z-z_start)>=ramp_up and (z-z_start)<ramp_up+plateau ) {
- n = 1;
- } else if ((z-z_start)>=ramp_up+plateau and (z-z_start)<ramp_up+plateau+ramp_down) {
- n = 1-((z-z_start)-ramp_up-plateau)/ramp_down;
- } else {
- n = 0;
- }
- n *= n0*(1+4*(x*x+y*y)/(kp*kp*std::pow(rc,4)));
- return n;
-}
diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp
index 2442e0205..590c11b84 100644
--- a/Source/Initialization/WarpXInitData.cpp
+++ b/Source/Initialization/WarpXInitData.cpp
@@ -1,6 +1,4 @@
-#include <numeric>
-
#include <AMReX_ParallelDescriptor.H>
#include <AMReX_ParmParse.H>
@@ -88,7 +86,7 @@ WarpX::InitDiagnostics () {
const Real* current_lo = geom[0].ProbLo();
const Real* current_hi = geom[0].ProbHi();
Real dt_boost = dt[0];
-
+
// Find the positions of the lab-frame box that corresponds to the boosted-frame box at t=0
Real zmin_lab = current_lo[moving_window_dir]/( (1.+beta_boost)*gamma_boost );
Real zmax_lab = current_hi[moving_window_dir]/( (1.+beta_boost)*gamma_boost );
@@ -97,7 +95,7 @@ WarpX::InitDiagnostics () {
zmax_lab,
moving_window_v, dt_snapshots_lab,
num_snapshots_lab, gamma_boost,
- t_new[0], dt_boost,
+ t_new[0], dt_boost,
moving_window_dir, geom[0]));
}
}
@@ -118,10 +116,10 @@ WarpX::InitFromScratch ()
InitPML();
-#ifdef WARPX_DO_ELECTROSTATIC
+#ifdef WARPX_DO_ELECTROSTATIC
if (do_electrostatic) {
getLevelMasks(masks);
-
+
// the plus one is to convert from num_cells to num_nodes
getLevelMasks(gather_masks, n_buffer + 1);
}
@@ -133,14 +131,35 @@ WarpX::InitPML ()
{
if (do_pml)
{
+ amrex::IntVect do_pml_Lo_corrected = do_pml_Lo;
+
+#ifdef WARPX_DIM_RZ
+ do_pml_Lo_corrected[0] = 0; // no PML at r=0, in cylindrical geometry
+#endif
pml[0].reset(new PML(boxArray(0), DistributionMap(0), &Geom(0), nullptr,
- pml_ncell, pml_delta, 0, do_dive_cleaning, do_moving_window));
+ pml_ncell, pml_delta, 0,
+#ifdef WARPX_USE_PSATD
+ dt[0], nox_fft, noy_fft, noz_fft, do_nodal,
+#endif
+ do_dive_cleaning, do_moving_window,
+ do_pml_Lo_corrected, do_pml_Hi));
for (int lev = 1; lev <= finest_level; ++lev)
{
+ amrex::IntVect do_pml_Lo_MR = amrex::IntVect::TheUnitVector();
+#ifdef WARPX_DIM_RZ
+ //In cylindrical geometry, if the edge of the patch is at r=0, do not add PML
+ if ((max_level > 0) && (fine_tag_lo[0]==0.)) {
+ do_pml_Lo_MR[0] = 0;
+ }
+#endif
pml[lev].reset(new PML(boxArray(lev), DistributionMap(lev),
&Geom(lev), &Geom(lev-1),
- pml_ncell, pml_delta, refRatio(lev-1)[0], do_dive_cleaning,
- do_moving_window));
+ pml_ncell, pml_delta, refRatio(lev-1)[0],
+#ifdef WARPX_USE_PSATD
+ dt[lev], nox_fft, noy_fft, noz_fft, do_nodal,
+#endif
+ do_dive_cleaning, do_moving_window,
+ do_pml_Lo_MR, amrex::IntVect::TheUnitVector()));
}
}
}
@@ -226,7 +245,7 @@ WarpX::InitOpenbc ()
Vector<int> alllohi(6*nprocs,100000);
MPI_Allgather(lohi, 6, MPI_INT, alllohi.data(), 6, MPI_INT, ParallelDescriptor::Communicator());
-
+
BoxList bl{IndexType::TheNodeType()};
for (int i = 0; i < nprocs; ++i)
{
@@ -252,7 +271,7 @@ WarpX::InitOpenbc ()
rho_openbc.copy(*rho, 0, 0, 1, rho->nGrow(), 0, gm.periodicity(), FabArrayBase::ADD);
const Real* dx = gm.CellSize();
-
+
warpx_openbc_potential(rho_openbc[myproc].dataPtr(), phi_openbc[myproc].dataPtr(), dx);
BoxArray nba = boxArray(lev);
@@ -322,7 +341,7 @@ WarpX::InitLevelData (int lev, Real time)
void
WarpX::InitLevelDataFFT (int lev, Real time)
{
-
+
Efield_fp_fft[lev][0]->setVal(0.0);
Efield_fp_fft[lev][1]->setVal(0.0);
Efield_fp_fft[lev][2]->setVal(0.0);
diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp
index 3d3447a3c..786ebc622 100644
--- a/Source/Laser/LaserParticleContainer.cpp
+++ b/Source/Laser/LaserParticleContainer.cpp
@@ -453,7 +453,12 @@ LaserParticleContainer::Evolve (int lev,
pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
BL_PROFILE_VAR_STOP(blp_copy);
- if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev);
+ if (rho) {
+ DepositCharge(pti, wp, rho, 0, 0, np_current, thread_num, lev, lev);
+ if (crho) {
+ DepositCharge(pti, wp, crho, 0, np_current, np-np_current, thread_num, lev, lev-1);
+ }
+ }
//
// Particle Push
@@ -504,15 +509,15 @@ LaserParticleContainer::Evolve (int lev,
// Current Deposition
//
// Deposit inside domains
- DepositCurrentFortran(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz,
- 0, np_current, thread_num,
- lev, lev, dt);
+ DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz,
+ 0, np_current, thread_num,
+ lev, lev, dt);
bool has_buffer = cjx;
if (has_buffer){
// Deposit in buffers
- DepositCurrentFortran(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz,
- np_current, np-np_current, thread_num,
- lev, lev-1, dt);
+ DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz,
+ np_current, np-np_current, thread_num,
+ lev, lev-1, dt);
}
//
@@ -522,7 +527,12 @@ LaserParticleContainer::Evolve (int lev,
pti.SetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
BL_PROFILE_VAR_STOP(blp_copy);
- if (rho) DepositCharge(pti, wp, rho, crho, 1, np_current, np, thread_num, lev);
+ if (rho) {
+ DepositCharge(pti, wp, rho, 1, 0, np_current, thread_num, lev, lev);
+ if (crho) {
+ DepositCharge(pti, wp, crho, 1, np_current, np-np_current, thread_num, lev, lev-1);
+ }
+ }
if (cost) {
const Box& tbx = pti.tilebox();
diff --git a/Source/Make.WarpX b/Source/Make.WarpX
index 3060ae8f0..e3a33a00f 100644
--- a/Source/Make.WarpX
+++ b/Source/Make.WarpX
@@ -97,16 +97,24 @@ ifeq ($(USE_OPENBC_POISSON),TRUE)
endif
ifeq ($(USE_OPENPMD), TRUE)
- OPENPMD_LIB_PATH ?= NOT_SET
- ifneq ($(OPENPMD_LIB_PATH),NOT_SET)
- LIBRARY_LOCATIONS += $(OPENPMD_LIB_PATH)
+ # try pkg-config query
+ ifeq (0, $(shell pkg-config "openPMD >= 0.9.0"; echo $$?))
+ CXXFLAGS += $(shell pkg-config --cflags openPMD)
+ LDFLAGS += $(shell pkg-config --libs openPMD)
+ LDFLAGS += -Xlinker -rpath -Xlinker $(shell pkg-config --variable=libdir openPMD)
+ # fallback to manual settings
+ else
+ OPENPMD_LIB_PATH ?= NOT_SET
+ ifneq ($(OPENPMD_LIB_PATH),NOT_SET)
+ LIBRARY_LOCATIONS += $(OPENPMD_LIB_PATH)
+ endif
+ OPENPMD_INCLUDE_PATH ?= NOT_SET
+ ifneq ($(OPENPMD_INCLUDE_PATH),NOT_SET)
+ INCLUDE_LOCATIONS += $(OPENPMD_INCLUDE_PATH)
+ endif
+ libraries += -lopenPMD
endif
- OPENPMD_INCLUDE_PATH ?= NOT_SET
- ifneq ($(OPENPMD_INCLUDE_PATH),NOT_SET)
- INCLUDE_LOCATIONS += $(OPENPMD_INCLUDE_PATH)
- endif
- DEFINES += -DWARPX_USE_OPENPMD -DopenPMD_HAVE_MPI=1
- LIBRARIES += -lopenPMD -lhdf5
+ DEFINES += -DWARPX_USE_OPENPMD
endif
@@ -115,7 +123,7 @@ ifeq ($(USE_PSATD),TRUE)
DEFINES += -DWARPX_USE_PSATD
ifeq ($(USE_CUDA),FALSE) # Running on CPU
# Use FFTW
- LIBRARIES += -lfftw3_mpi -lfftw3 -lfftw3_threads
+ libraries += -lfftw3_mpi -lfftw3 -lfftw3_threads
FFTW_HOME ?= NOT_SET
ifneq ($(FFTW_HOME),NOT_SET)
VPATH_LOCATIONS += $(FFTW_HOME)/include
@@ -127,13 +135,12 @@ ifeq ($(USE_PSATD),TRUE)
DEFINES += -DFFTW # PICSAR uses it
else
# Use cuFFT
- LIBRARIES += -lcufft
+ libraries += -lcufft
endif
endif
ifeq ($(USE_RZ),TRUE)
USERSuffix := $(USERSuffix).RZ
- DEFINES += -DWARPX_RZ
endif
ifeq ($(DO_ELECTROSTATIC),TRUE)
@@ -151,7 +158,7 @@ ifeq ($(USE_HDF5),TRUE)
LIBRARY_LOCATIONS += $(HDF5_HOME)/lib
endif
DEFINES += -DWARPX_USE_HDF5
- LIBRARIES += -lhdf5 -lz
+ libraries += -lhdf5 -lz
endif
# job_info support
diff --git a/Source/Parser/GpuParser.H b/Source/Parser/GpuParser.H
new file mode 100644
index 000000000..1533ee6b9
--- /dev/null
+++ b/Source/Parser/GpuParser.H
@@ -0,0 +1,72 @@
+#ifndef WARPX_GPU_PARSER_H_
+#define WARPX_GPU_PARSER_H_
+
+#include <WarpXParser.H>
+#include <AMReX_Gpu.H>
+
+// When compiled for CPU, wrap WarpXParser and enable threading.
+// When compiled for GPU, store one copy of the parser in
+// CUDA managed memory for __device__ code, and one copy of the parser
+// in CUDA managed memory for __host__ code. This way, the parser can be
+// efficiently called from both host and device.
+class GpuParser
+{
+public:
+ GpuParser (WarpXParser const& wp);
+ void clear ();
+
+ AMREX_GPU_HOST_DEVICE
+ double
+ operator() (double x, double y, double z) const noexcept
+ {
+#ifdef AMREX_USE_GPU
+
+#ifdef AMREX_DEVICE_COMPILE
+// WarpX compiled for GPU, function compiled for __device__
+ // the 3D position of each particle is stored in shared memory.
+ amrex::Gpu::SharedMemory<double> gsm;
+ double* p = gsm.dataPtr();
+ int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y);
+ p[tid*3] = x;
+ p[tid*3+1] = y;
+ p[tid*3+2] = z;
+ return wp_ast_eval(m_gpu_parser.ast);
+#else
+// WarpX compiled for GPU, function compiled for __host__
+ m_var.x = x;
+ m_var.y = y;
+ m_var.z = z;
+ return wp_ast_eval(m_cpu_parser.ast);
+#endif
+
+#else
+// WarpX compiled for CPU
+#ifdef _OPENMP
+ int tid = omp_get_thread_num();
+#else
+ int tid = 0;
+#endif
+ m_var[tid].x = x;
+ m_var[tid].y = y;
+ m_var[tid].z = z;
+ return wp_ast_eval(m_parser[tid]->ast);
+#endif
+ }
+
+private:
+
+#ifdef AMREX_USE_GPU
+ // Copy of the parser running on __device__
+ struct wp_parser m_gpu_parser;
+ // Copy of the parser running on __host__
+ struct wp_parser m_cpu_parser;
+ mutable amrex::XDim3 m_var;
+#else
+ // Only one parser
+ struct wp_parser** m_parser;
+ mutable amrex::XDim3* m_var;
+ int nthreads;
+#endif
+};
+
+#endif
diff --git a/Source/Parser/GpuParser.cpp b/Source/Parser/GpuParser.cpp
new file mode 100644
index 000000000..db1c2287d
--- /dev/null
+++ b/Source/Parser/GpuParser.cpp
@@ -0,0 +1,73 @@
+#include <GpuParser.H>
+
+GpuParser::GpuParser (WarpXParser const& wp)
+{
+#ifdef AMREX_USE_GPU
+
+ struct wp_parser* a_wp = wp.m_parser;
+ // Initialize GPU parser: allocate memory in CUDA managed memory,
+ // copy all data needed on GPU to m_gpu_parser
+ m_gpu_parser.sz_mempool = wp_ast_size(a_wp->ast);
+ m_gpu_parser.p_root = (struct wp_node*)
+ amrex::The_Managed_Arena()->alloc(m_gpu_parser.sz_mempool);
+ m_gpu_parser.p_free = m_gpu_parser.p_root;
+ // 0: don't free the source
+ m_gpu_parser.ast = wp_parser_ast_dup(&m_gpu_parser, a_wp->ast, 0);
+ wp_parser_regvar_gpu(&m_gpu_parser, "x", 0);
+ wp_parser_regvar_gpu(&m_gpu_parser, "y", 1);
+ wp_parser_regvar_gpu(&m_gpu_parser, "z", 2);
+
+ // Initialize CPU parser: allocate memory in CUDA managed memory,
+ // copy all data needed on CPU to m_cpu_parser
+ m_cpu_parser.sz_mempool = wp_ast_size(a_wp->ast);
+ m_cpu_parser.p_root = (struct wp_node*)
+ amrex::The_Managed_Arena()->alloc(m_cpu_parser.sz_mempool);
+ m_cpu_parser.p_free = m_cpu_parser.p_root;
+ // 0: don't free the source
+ m_cpu_parser.ast = wp_parser_ast_dup(&m_cpu_parser, a_wp->ast, 0);
+ wp_parser_regvar(&m_cpu_parser, "x", &(m_var.x));
+ wp_parser_regvar(&m_cpu_parser, "y", &(m_var.y));
+ wp_parser_regvar(&m_cpu_parser, "z", &(m_var.z));
+
+#else // not defined AMREX_USE_GPU
+
+#ifdef _OPENMP
+ nthreads = omp_get_max_threads();
+#else // _OPENMP
+ nthreads = 1;
+#endif // _OPENMP
+
+ m_parser = ::new struct wp_parser*[nthreads];
+ m_var = ::new amrex::XDim3[nthreads];
+
+ for (int tid = 0; tid < nthreads; ++tid)
+ {
+#ifdef _OPENMP
+ m_parser[tid] = wp_parser_dup(wp.m_parser[tid]);
+#else // _OPENMP
+ m_parser[tid] = wp_parser_dup(wp.m_parser);
+#endif // _OPENMP
+ wp_parser_regvar(m_parser[tid], "x", &(m_var[tid].x));
+ wp_parser_regvar(m_parser[tid], "y", &(m_var[tid].y));
+ wp_parser_regvar(m_parser[tid], "z", &(m_var[tid].z));
+ }
+
+#endif // AMREX_USE_GPU
+}
+
+void
+GpuParser::clear ()
+{
+#ifdef AMREX_USE_GPU
+ amrex::The_Managed_Arena()->free(m_gpu_parser.ast);
+ amrex::The_Managed_Arena()->free(m_cpu_parser.ast);
+#else
+ for (int tid = 0; tid < nthreads; ++tid)
+ {
+ wp_parser_delete(m_parser[tid]);
+ }
+ ::delete[] m_parser;
+ ::delete[] m_var;
+#endif
+}
+
diff --git a/Source/Parser/Make.package b/Source/Parser/Make.package
index 26ef4fb43..5ce02cbda 100644
--- a/Source/Parser/Make.package
+++ b/Source/Parser/Make.package
@@ -3,6 +3,8 @@ cEXE_sources += wp_parser_y.c wp_parser.tab.c wp_parser.lex.c wp_parser_c.c
cEXE_headers += wp_parser_y.h wp_parser.tab.h wp_parser.lex.h wp_parser_c.h
CEXE_sources += WarpXParser.cpp
CEXE_headers += WarpXParser.H
+CEXE_headers += GpuParser.H
+CEXE_sources += GpuParser.cpp
INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Parser
VPATH_LOCATIONS += $(WARPX_HOME)/Source/Parser
diff --git a/Source/Parser/WarpXParser.H b/Source/Parser/WarpXParser.H
index 046491e29..ffa61e457 100644
--- a/Source/Parser/WarpXParser.H
+++ b/Source/Parser/WarpXParser.H
@@ -13,6 +13,8 @@
#include <omp.h>
#endif
+class GpuParser;
+
class WarpXParser
{
public:
@@ -46,6 +48,8 @@ public:
std::set<std::string> symbols () const;
+ friend class GpuParser;
+
private:
void clear ();
diff --git a/Source/Parser/wp_parser_c.h b/Source/Parser/wp_parser_c.h
index d810bd685..3aafdec65 100644
--- a/Source/Parser/wp_parser_c.h
+++ b/Source/Parser/wp_parser_c.h
@@ -2,6 +2,8 @@
#define WP_PARSER_C_H_
#include "wp_parser_y.h"
+#include <AMReX_GpuQualifiers.H>
+#include <AMReX_Extension.H>
#ifdef __cplusplus
extern "C" {
@@ -18,71 +20,167 @@ extern "C" {
#include <set>
#include <string>
-inline
-double
+AMREX_GPU_HOST_DEVICE
+inline double
wp_ast_eval (struct wp_node* node)
{
double result;
+#ifdef AMREX_DEVICE_COMPILE
+ extern __shared__ double extern_xyz[];
+ int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y);
+ double* x = extern_xyz + tid*3;
+#endif
+
switch (node->type)
{
case WP_NUMBER:
+ {
result = ((struct wp_number*)node)->value;
break;
+ }
case WP_SYMBOL:
- result = *(((struct wp_symbol*)node)->pointer);
+ {
+#ifdef AMREX_DEVICE_COMPILE
+ int i =((struct wp_symbol*)node)->ip.i;
+ result = x[i];
+#else
+ result = *(((struct wp_symbol*)node)->ip.p);
+#endif
break;
+ }
case WP_ADD:
+ {
result = wp_ast_eval(node->l) + wp_ast_eval(node->r);
break;
+ }
case WP_SUB:
+ {
result = wp_ast_eval(node->l) - wp_ast_eval(node->r);
break;
+ }
case WP_MUL:
+ {
result = wp_ast_eval(node->l) * wp_ast_eval(node->r);
break;
+ }
case WP_DIV:
+ {
result = wp_ast_eval(node->l) / wp_ast_eval(node->r);
break;
+ }
case WP_NEG:
+ {
result = -wp_ast_eval(node->l);
break;
+ }
case WP_F1:
+ {
result = wp_call_f1(((struct wp_f1*)node)->ftype,
wp_ast_eval(((struct wp_f1*)node)->l));
break;
+ }
case WP_F2:
+ {
result = wp_call_f2(((struct wp_f2*)node)->ftype,
wp_ast_eval(((struct wp_f2*)node)->l),
wp_ast_eval(((struct wp_f2*)node)->r));
break;
+ }
case WP_ADD_VP:
- result = node->lvp.v + *(node->rp);
+ {
+#ifdef AMREX_DEVICE_COMPILE
+ int i = node->rip.i;
+ result = node->lvp.v + x[i];
+#else
+ result = node->lvp.v + *(node->rip.p);
+#endif
break;
+ }
case WP_ADD_PP:
- result = *(node->lvp.p) + *(node->rp);
+ {
+#ifdef AMREX_DEVICE_COMPILE
+ int i = node->lvp.ip.i;
+ int j = node->rip.i;
+ result = x[i] + x[j];
+#else
+ result = *(node->lvp.ip.p) + *(node->rip.p);
+#endif
break;
+ }
case WP_SUB_VP:
- result = node->lvp.v - *(node->rp);
+ {
+#ifdef AMREX_DEVICE_COMPILE
+ int i = node->rip.i;
+ result = node->lvp.v - x[i];
+#else
+ result = node->lvp.v - *(node->rip.p);
+#endif
break;
+ }
case WP_SUB_PP:
- result = *(node->lvp.p) - *(node->rp);
+ {
+#ifdef AMREX_DEVICE_COMPILE
+ int i = node->lvp.ip.i;
+ int j = node->rip.i;
+ result = x[i] - x[j];
+#else
+ result = *(node->lvp.ip.p) - *(node->rip.p);
+#endif
break;
+ }
case WP_MUL_VP:
- result = node->lvp.v * *(node->rp);
+ {
+#ifdef AMREX_DEVICE_COMPILE
+ int i = node->rip.i;
+ result = node->lvp.v * x[i];
+#else
+ result = node->lvp.v * *(node->rip.p);
+#endif
break;
+ }
case WP_MUL_PP:
- result = *(node->lvp.p) * *(node->rp);
+ {
+#ifdef AMREX_DEVICE_COMPILE
+ int i = node->lvp.ip.i;
+ int j = node->rip.i;
+ result = x[i] * x[j];
+#else
+ result = *(node->lvp.ip.p) * *(node->rip.p);
+#endif
break;
+ }
case WP_DIV_VP:
- result = node->lvp.v / *(node->rp);
+ {
+#ifdef AMREX_DEVICE_COMPILE
+ int i = node->rip.i;
+ result = node->lvp.v / x[i];
+#else
+ result = node->lvp.v / *(node->rip.p);
+#endif
break;
+ }
case WP_DIV_PP:
- result = *(node->lvp.p) / *(node->rp);
+ {
+#ifdef AMREX_DEVICE_COMPILE
+ int i = node->lvp.ip.i;
+ int j = node->rip.i;
+ result = x[i] / x[j];
+#else
+ result = *(node->lvp.ip.p) / *(node->rip.p);
+#endif
break;
+ }
case WP_NEG_P:
- result = -*(node->lvp.p);
+ {
+#ifdef AMREX_DEVICE_COMPILE
+ int i = node->rip.i;
+ result = -x[i];
+#else
+ result = -*(node->lvp.ip.p);
+#endif
break;
+ }
default:
yyerror("wp_ast_eval: unknown node type %d\n", node->type);
}
diff --git a/Source/Parser/wp_parser_y.c b/Source/Parser/wp_parser_y.c
index 46cb199db..259f9368b 100644
--- a/Source/Parser/wp_parser_y.c
+++ b/Source/Parser/wp_parser_y.c
@@ -6,6 +6,8 @@
#include "wp_parser_y.h"
#include "wp_parser.tab.h"
+#include <AMReX_GpuQualifiers.H>
+
static struct wp_node* wp_root = NULL;
/* This is called by a bison rule to store the original AST in a
@@ -33,7 +35,7 @@ wp_makesymbol (char* name)
struct wp_symbol* symbol = (struct wp_symbol*) malloc(sizeof(struct wp_symbol));
symbol->type = WP_SYMBOL;
symbol->name = strdup(name);
- symbol->pointer = NULL;
+ symbol->ip.p = NULL;
return symbol;
}
@@ -74,13 +76,19 @@ wp_newf2 (enum wp_f2_t ftype, struct wp_node* l, struct wp_node* r)
return (struct wp_node*) tmp;
}
+AMREX_GPU_HOST_DEVICE
void
yyerror (char const *s, ...)
{
va_list vl;
va_start(vl, s);
+#ifdef AMREX_DEVICE_COMPILE
+ printf(s,"\n");
+ assert(0);
+#else
vfprintf(stderr, s, vl);
fprintf(stderr, "\n");
+#endif
va_end(vl);
}
@@ -97,7 +105,7 @@ wp_parser_new (void)
my_parser->ast = wp_parser_ast_dup(my_parser, wp_root,1); /* 1: free the source wp_root */
- if (my_parser->p_root + my_parser->sz_mempool != my_parser->p_free) {
+ if ((char*)my_parser->p_root + my_parser->sz_mempool != (char*)my_parser->p_free) {
yyerror("wp_parser_new: error in memory size");
exit(1);
}
@@ -145,6 +153,7 @@ wp_parser_dup (struct wp_parser* source)
return dest;
}
+AMREX_GPU_HOST_DEVICE
double
wp_call_f1 (enum wp_f1_t type, double a)
{
@@ -175,6 +184,7 @@ wp_call_f1 (enum wp_f1_t type, double a)
}
}
+AMREX_GPU_HOST_DEVICE
double
wp_call_f2 (enum wp_f2_t type, double a, double b)
{
@@ -346,23 +356,23 @@ wp_parser_ast_dup (struct wp_parser* my_parser, struct wp_node* node, int move)
#define WP_MOVEUP_R(node, v) \
struct wp_node* n = node->r->r; \
- double* p = node->r->rp; \
+ double* p = node->r->rip.p; \
node->r = n; \
node->lvp.v = v; \
- node->rp = p;
+ node->rip.p = p;
#define WP_MOVEUP_L(node, v) \
struct wp_node* n = node->l->r; \
- double* p = node->l->rp; \
+ double* p = node->l->rip.p; \
node->r = n; \
node->lvp.v = v; \
- node->rp = p;
+ node->rip.p = p;
#define WP_EVAL_R(node) node->r->lvp.v
#define WP_EVAL_L(node) node->l->lvp.v
#define WP_NEG_MOVEUP(node) \
node->r = node->l->r; \
node->lvp.v = -node->l->lvp.v; \
- node->rp = node->l->rp;
+ node->rip.p = node->l->rip.p;
void
wp_ast_optimize (struct wp_node* node)
@@ -391,22 +401,22 @@ wp_ast_optimize (struct wp_node* node)
node->r->type == WP_SYMBOL)
{
node->lvp.v = ((struct wp_number*)(node->l))->value;
- node->rp = ((struct wp_symbol*)(node->r))->pointer;
+ node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
node->type = WP_ADD_VP;
}
else if (node->l->type == WP_SYMBOL &&
node->r->type == WP_NUMBER)
{
node->lvp.v = ((struct wp_number*)(node->r))->value;
- node->rp = ((struct wp_symbol*)(node->l))->pointer;
+ node->rip.p = ((struct wp_symbol*)(node->l))->ip.p;
node->r = node->l;
node->type = WP_ADD_VP;
}
else if (node->l->type == WP_SYMBOL &&
node->r->type == WP_SYMBOL)
{
- node->lvp.p = ((struct wp_symbol*)(node->l))->pointer;
- node->rp = ((struct wp_symbol*)(node->r))->pointer;
+ node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p;
+ node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
node->type = WP_ADD_PP;
}
else if (node->l->type == WP_NUMBER &&
@@ -454,22 +464,22 @@ wp_ast_optimize (struct wp_node* node)
node->r->type == WP_SYMBOL)
{
node->lvp.v = ((struct wp_number*)(node->l))->value;
- node->rp = ((struct wp_symbol*)(node->r))->pointer;
+ node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
node->type = WP_SUB_VP;
}
else if (node->l->type == WP_SYMBOL &&
node->r->type == WP_NUMBER)
{
node->lvp.v = -((struct wp_number*)(node->r))->value;
- node->rp = ((struct wp_symbol*)(node->l))->pointer;
+ node->rip.p = ((struct wp_symbol*)(node->l))->ip.p;
node->r = node->l;
node->type = WP_ADD_VP;
}
else if (node->l->type == WP_SYMBOL &&
node->r->type == WP_SYMBOL)
{
- node->lvp.p = ((struct wp_symbol*)(node->l))->pointer;
- node->rp = ((struct wp_symbol*)(node->r))->pointer;
+ node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p;
+ node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
node->type = WP_SUB_PP;
}
else if (node->l->type == WP_NUMBER &&
@@ -517,22 +527,22 @@ wp_ast_optimize (struct wp_node* node)
node->r->type == WP_SYMBOL)
{
node->lvp.v = ((struct wp_number*)(node->l))->value;
- node->rp = ((struct wp_symbol*)(node->r))->pointer;
+ node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
node->type = WP_MUL_VP;
}
else if (node->l->type == WP_SYMBOL &&
node->r->type == WP_NUMBER)
{
node->lvp.v = ((struct wp_number*)(node->r))->value;
- node->rp = ((struct wp_symbol*)(node->l))->pointer;
+ node->rip.p = ((struct wp_symbol*)(node->l))->ip.p;
node->r = node->l;
node->type = WP_MUL_VP;
}
else if (node->l->type == WP_SYMBOL &&
node->r->type == WP_SYMBOL)
{
- node->lvp.p = ((struct wp_symbol*)(node->l))->pointer;
- node->rp = ((struct wp_symbol*)(node->r))->pointer;
+ node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p;
+ node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
node->type = WP_MUL_PP;
}
else if (node->l->type == WP_NUMBER &&
@@ -580,22 +590,22 @@ wp_ast_optimize (struct wp_node* node)
node->r->type == WP_SYMBOL)
{
node->lvp.v = ((struct wp_number*)(node->l))->value;
- node->rp = ((struct wp_symbol*)(node->r))->pointer;
+ node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
node->type = WP_DIV_VP;
}
else if (node->l->type == WP_SYMBOL &&
node->r->type == WP_NUMBER)
{
node->lvp.v = 1./((struct wp_number*)(node->r))->value;
- node->rp = ((struct wp_symbol*)(node->l))->pointer;
+ node->rip.p = ((struct wp_symbol*)(node->l))->ip.p;
node->r = node->l;
node->type = WP_MUL_VP;
}
else if (node->l->type == WP_SYMBOL &&
node->r->type == WP_SYMBOL)
{
- node->lvp.p = ((struct wp_symbol*)(node->l))->pointer;
- node->rp = ((struct wp_symbol*)(node->r))->pointer;
+ node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p;
+ node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
node->type = WP_DIV_PP;
}
else if (node->l->type == WP_NUMBER &&
@@ -637,7 +647,7 @@ wp_ast_optimize (struct wp_node* node)
}
else if (node->l->type == WP_SYMBOL)
{
- node->lvp.p = ((struct wp_symbol*)(node->l))->pointer;
+ node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p;
node->type = WP_NEG_P;
}
else if (node->l->type == WP_ADD_VP)
@@ -936,7 +946,7 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p)
break;
case WP_SYMBOL:
if (strcmp(name, ((struct wp_symbol*)node)->name) == 0) {
- ((struct wp_symbol*)node)->pointer = p;
+ ((struct wp_symbol*)node)->ip.p = p;
}
break;
case WP_ADD:
@@ -961,11 +971,11 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p)
case WP_MUL_VP:
case WP_DIV_VP:
wp_ast_regvar(node->r, name, p);
- node->rp = ((struct wp_symbol*)(node->r))->pointer;
+ node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
break;
case WP_NEG_P:
wp_ast_regvar(node->l, name, p);
- node->lvp.p = ((struct wp_symbol*)(node->l))->pointer;
+ node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p;
break;
case WP_ADD_PP:
case WP_SUB_PP:
@@ -973,8 +983,8 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p)
case WP_DIV_PP:
wp_ast_regvar(node->l, name, p);
wp_ast_regvar(node->r, name, p);
- node->lvp.p = ((struct wp_symbol*)(node->l))->pointer;
- node->rp = ((struct wp_symbol*)(node->r))->pointer;
+ node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p;
+ node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
break;
default:
yyerror("wp_ast_regvar: unknown node type %d\n", node->type);
@@ -982,6 +992,61 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p)
}
}
+void
+wp_ast_regvar_gpu (struct wp_node* node, char const* name, int i)
+{
+ switch (node->type)
+ {
+ case WP_NUMBER:
+ break;
+ case WP_SYMBOL:
+ if (strcmp(name, ((struct wp_symbol*)node)->name) == 0) {
+ ((struct wp_symbol*)node)->ip.i = i;
+ }
+ break;
+ case WP_ADD:
+ case WP_SUB:
+ case WP_MUL:
+ case WP_DIV:
+ wp_ast_regvar_gpu(node->l, name, i);
+ wp_ast_regvar_gpu(node->r, name, i);
+ break;
+ case WP_NEG:
+ wp_ast_regvar_gpu(node->l, name, i);
+ break;
+ case WP_F1:
+ wp_ast_regvar_gpu(node->l, name, i);
+ break;
+ case WP_F2:
+ wp_ast_regvar_gpu(node->l, name, i);
+ wp_ast_regvar_gpu(node->r, name, i);
+ break;
+ case WP_ADD_VP:
+ case WP_SUB_VP:
+ case WP_MUL_VP:
+ case WP_DIV_VP:
+ wp_ast_regvar_gpu(node->r, name, i);
+ node->rip.i = ((struct wp_symbol*)(node->r))->ip.i;
+ break;
+ case WP_NEG_P:
+ wp_ast_regvar_gpu(node->l, name, i);
+ node->lvp.ip.i = ((struct wp_symbol*)(node->l))->ip.i;
+ break;
+ case WP_ADD_PP:
+ case WP_SUB_PP:
+ case WP_MUL_PP:
+ case WP_DIV_PP:
+ wp_ast_regvar_gpu(node->l, name, i);
+ wp_ast_regvar_gpu(node->r, name, i);
+ node->lvp.ip.i = ((struct wp_symbol*)(node->l))->ip.i;
+ node->rip.i = ((struct wp_symbol*)(node->r))->ip.i;
+ break;
+ default:
+ yyerror("wp_ast_regvar_gpu: unknown node type %d\n", node->type);
+ exit(1);
+ }
+}
+
void wp_ast_setconst (struct wp_node* node, char const* name, double c)
{
switch (node->type)
@@ -1040,6 +1105,12 @@ wp_parser_regvar (struct wp_parser* parser, char const* name, double* p)
}
void
+wp_parser_regvar_gpu (struct wp_parser* parser, char const* name, int i)
+{
+ wp_ast_regvar_gpu(parser->ast, name, i);
+}
+
+void
wp_parser_setconst (struct wp_parser* parser, char const* name, double c)
{
wp_ast_setconst(parser->ast, name, c);
diff --git a/Source/Parser/wp_parser_y.h b/Source/Parser/wp_parser_y.h
index 4a3aeda40..8c9f8e4e4 100644
--- a/Source/Parser/wp_parser_y.h
+++ b/Source/Parser/wp_parser_y.h
@@ -1,6 +1,8 @@
#ifndef WP_PARSER_Y_H_
#define WP_PARSER_Y_H_
+#include <AMReX_GpuQualifiers.H>
+
#ifdef __cplusplus
#include <cstdlib>
extern "C" {
@@ -73,17 +75,22 @@ enum wp_node_t {
* wp_node_t type can be safely checked to determine their real type.
*/
-union wp_vp {
- double v;
+union wp_ip {
+ int i;
double* p;
};
+union wp_vp {
+ double v;
+ union wp_ip ip;
+};
+
struct wp_node {
enum wp_node_t type;
struct wp_node* l;
struct wp_node* r;
union wp_vp lvp; // After optimization, this may store left value/pointer.
- double* rp; // this may store right pointer.
+ union wp_ip rip; // this may store right pointer.
};
struct wp_number {
@@ -94,7 +101,7 @@ struct wp_number {
struct wp_symbol {
enum wp_node_t type;
char* name;
- double* pointer;
+ union wp_ip ip;
};
struct wp_f1 { /* Builtin functions with one argument */
@@ -124,6 +131,7 @@ struct wp_node* wp_newf1 (enum wp_f1_t ftype, struct wp_node* l);
struct wp_node* wp_newf2 (enum wp_f2_t ftype, struct wp_node* l,
struct wp_node* r);
+AMREX_GPU_HOST_DEVICE
void yyerror (char const *s, ...);
/*******************************************************************/
@@ -146,6 +154,7 @@ struct wp_parser* wp_parser_dup (struct wp_parser* source);
struct wp_node* wp_parser_ast_dup (struct wp_parser* parser, struct wp_node* src, int move);
void wp_parser_regvar (struct wp_parser* parser, char const* name, double* p);
+void wp_parser_regvar_gpu (struct wp_parser* parser, char const* name, int i);
void wp_parser_setconst (struct wp_parser* parser, char const* name, double c);
/* We need to walk the tree in these functions */
@@ -153,10 +162,11 @@ void wp_ast_optimize (struct wp_node* node);
size_t wp_ast_size (struct wp_node* node);
void wp_ast_print (struct wp_node* node);
void wp_ast_regvar (struct wp_node* node, char const* name, double* p);
+void wp_ast_regvar_gpu (struct wp_node* node, char const* name, int i);
void wp_ast_setconst (struct wp_node* node, char const* name, double c);
-double wp_call_f1 (enum wp_f1_t type, double a);
-double wp_call_f2 (enum wp_f2_t type, double a, double b);
+AMREX_GPU_HOST_DEVICE double wp_call_f1 (enum wp_f1_t type, double a);
+AMREX_GPU_HOST_DEVICE double wp_call_f2 (enum wp_f2_t type, double a, double b);
#ifdef __cplusplus
}
diff --git a/Source/Particles/Deposition/ChargeDeposition.H b/Source/Particles/Deposition/ChargeDeposition.H
new file mode 100755
index 000000000..a6573b7ab
--- /dev/null
+++ b/Source/Particles/Deposition/ChargeDeposition.H
@@ -0,0 +1,97 @@
+#ifndef CHARGEDEPOSITION_H_
+#define CHARGEDEPOSITION_H_
+
+#include "ShapeFactors.H"
+
+/* \brief Charge Deposition for thread thread_num
+ * /param xp, yp, zp : Pointer to arrays of particle positions.
+ * \param wp : Pointer to array of particle weights.
+ * \param rho_arr : Array4 of charge density, either full array or tile.
+ * \param np_to_depose : Number of particles for which current is deposited.
+ * \param dx : 3D cell size
+ * \param xyzmin : Physical lower bounds of domain.
+ * \param lo : Index lower bounds of domain.
+ * /param q : species charge.
+ */
+template <int depos_order>
+void doChargeDepositionShapeN(const amrex::Real * const xp,
+ const amrex::Real * const yp,
+ const amrex::Real * const zp,
+ const amrex::Real * const wp,
+ const amrex::Array4<amrex::Real>& rho_arr,
+ const long np_to_depose,
+ const std::array<amrex::Real,3>& dx,
+ const std::array<amrex::Real, 3> xyzmin,
+ const amrex::Dim3 lo,
+ const amrex::Real q)
+{
+ const amrex::Real dxi = 1.0/dx[0];
+ const amrex::Real dzi = 1.0/dx[2];
+#if (AMREX_SPACEDIM == 2)
+ const amrex::Real invvol = dxi*dzi;
+#elif (defined WARPX_DIM_3D)
+ const amrex::Real dyi = 1.0/dx[1];
+ const amrex::Real invvol = dxi*dyi*dzi;
+#endif
+
+ const amrex::Real xmin = xyzmin[0];
+ const amrex::Real ymin = xyzmin[1];
+ const amrex::Real zmin = xyzmin[2];
+
+ // Loop over particles and deposit into rho_arr
+ amrex::ParallelFor(
+ np_to_depose,
+ [=] AMREX_GPU_DEVICE (long ip) {
+ // --- Get particle quantities
+ const amrex::Real wq = q*wp[ip]*invvol;
+
+ // --- Compute shape factors
+ // x direction
+ // Get particle position in grid coordinates
+#if (defined WARPX_DIM_RZ)
+ const amrex::Real r = std::sqrt(xp[ip]*xp[ip] + yp[ip]*yp[ip]);
+ const amrex::Real x = (r - xmin)*dxi;
+#else
+ const amrex::Real x = (xp[ip] - xmin)*dxi;
+#endif
+ // Compute shape factors for node-centered quantities
+ amrex::Real AMREX_RESTRICT sx[depos_order + 1];
+ // i: leftmost grid point (node-centered) that the particle touches
+ const int i = compute_shape_factor<depos_order>(sx, x);
+
+#if (defined WARPX_DIM_3D)
+ // y direction
+ const amrex::Real y = (yp[ip] - ymin)*dyi;
+ amrex::Real AMREX_RESTRICT sy[depos_order + 1];
+ const int j = compute_shape_factor<depos_order>(sy, y);
+#endif
+ // z direction
+ const amrex::Real z = (zp[ip] - zmin)*dzi;
+ amrex::Real AMREX_RESTRICT sz[depos_order + 1];
+ const int k = compute_shape_factor<depos_order>(sz, z);
+
+ // Deposit charge into rho_arr
+#if (defined WARPX_DIM_2D) || (defined WARPX_DIM_RZ)
+ for (int iz=0; iz<=depos_order; iz++){
+ for (int ix=0; ix<=depos_order; ix++){
+ amrex::Gpu::Atomic::Add(
+ &rho_arr(lo.x+i+ix, lo.y+k+iz, 0),
+ sx[ix]*sz[iz]*wq);
+ }
+ }
+#elif (defined WARPX_DIM_3D)
+ for (int iz=0; iz<=depos_order; iz++){
+ for (int iy=0; iy<=depos_order; iy++){
+ for (int ix=0; ix<=depos_order; ix++){
+ amrex::Gpu::Atomic::Add(
+ &rho_arr(lo.x+i+ix, lo.y+j+iy, lo.z+k+iz),
+ sx[ix]*sy[iy]*sz[iz]*wq);
+ }
+ }
+ }
+#endif
+ }
+ );
+}
+
+#endif // CHARGEDEPOSITION_H_
diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H
index 97bc53c20..4a392b57e 100644
--- a/Source/Particles/Deposition/CurrentDeposition.H
+++ b/Source/Particles/Deposition/CurrentDeposition.H
@@ -1,52 +1,7 @@
#ifndef CURRENTDEPOSITION_H_
#define CURRENTDEPOSITION_H_
-using namespace amrex;
-
-// Compute shape factor and return index of leftmost cell where
-// particle writes.
-// Specialized templates are defined below for orders 1, 2 and 3.
-template <int depos_order>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shape_factor(Real* const sx, Real xint);
-
-// Compute shape factor for order 1.
-template <>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shape_factor <1> (Real* const sx, Real xmid){
- int j = (int) xmid;
- Real xint = xmid-j;
- sx[0] = 1.0 - xint;
- sx[1] = xint;
- return j;
-}
-
-// Compute shape factor for order 2.
-template <>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shape_factor <2> (Real* const sx, Real xmid){
- int j = (int) (xmid+0.5);
- Real xint = xmid-j;
- sx[0] = 0.5*(0.5-xint)*(0.5-xint);
- sx[1] = 0.75-xint*xint;
- sx[2] = 0.5*(0.5+xint)*(0.5+xint);
- // index of the leftmost cell where particle deposits
- return j-1;
-}
-
-// Compute shape factor for order 3.
-template <>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shape_factor <3> (Real* const sx, Real xmid){
- int j = (int) xmid;
- Real xint = xmid-j;
- sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint);
- sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0);
- sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint));
- sx[3] = 1.0/6.0*xint*xint*xint;
- // index of the leftmost cell where particle deposits
- return j-1;
-}
+#include "ShapeFactors.H"
/* \brief Current Deposition for thread thread_num
* /param xp, yp, zp : Pointer to arrays of particle positions.
@@ -55,9 +10,7 @@ int compute_shape_factor <3> (Real* const sx, Real xmid){
* \param jx_arr : Array4 of current density, either full array or tile.
* \param jy_arr : Array4 of current density, either full array or tile.
* \param jz_arr : Array4 of current density, either full array or tile.
- * \param offset : Index of first particle for which current is deposited
* \param np_to_depose : Number of particles for which current is deposited.
- Particles [offset,offset+np_tp_depose] deposit current.
* \param dt : Time step for particle level
* \param dx : 3D cell size
* \param xyzmin : Physical lower bounds of domain.
@@ -66,164 +19,144 @@ int compute_shape_factor <3> (Real* const sx, Real xmid){
* /param q : species charge.
*/
template <int depos_order>
-void doDepositionShapeN(const Real * const xp, const Real * const yp, const Real * const zp,
- const Real * const wp, const Real * const uxp,
- const Real * const uyp, const Real * const uzp,
- const amrex::Array4<amrex::Real>& jx_arr,
- const amrex::Array4<amrex::Real>& jy_arr,
+void doDepositionShapeN(const amrex::Real * const xp,
+ const amrex::Real * const yp,
+ const amrex::Real * const zp,
+ const amrex::Real * const wp,
+ const amrex::Real * const uxp,
+ const amrex::Real * const uyp,
+ const amrex::Real * const uzp,
+ const amrex::Array4<amrex::Real>& jx_arr,
+ const amrex::Array4<amrex::Real>& jy_arr,
const amrex::Array4<amrex::Real>& jz_arr,
- const long offset, const long np_to_depose,
- const amrex::Real dt, const std::array<amrex::Real,3>& dx,
- const std::array<Real, 3> xyzmin,
- const Dim3 lo,
+ const long np_to_depose, const amrex::Real dt,
+ const std::array<amrex::Real,3>& dx,
+ const std::array<amrex::Real, 3> xyzmin,
+ const amrex::Dim3 lo,
const amrex::Real stagger_shift,
const amrex::Real q)
{
- const Real dxi = 1.0/dx[0];
- const Real dzi = 1.0/dx[2];
- const Real dts2dx = 0.5*dt*dxi;
- const Real dts2dz = 0.5*dt*dzi;
+ const amrex::Real dxi = 1.0/dx[0];
+ const amrex::Real dzi = 1.0/dx[2];
+ const amrex::Real dts2dx = 0.5*dt*dxi;
+ const amrex::Real dts2dz = 0.5*dt*dzi;
#if (AMREX_SPACEDIM == 2)
- const Real invvol = dxi*dzi;
-#else // (AMREX_SPACEDIM == 3)
- const Real dyi = 1.0/dx[1];
- const Real dts2dy = 0.5*dt*dyi;
- const Real invvol = dxi*dyi*dzi;
+ const amrex::Real invvol = dxi*dzi;
+#elif (defined WARPX_DIM_3D)
+ const amrex::Real dyi = 1.0/dx[1];
+ const amrex::Real dts2dy = 0.5*dt*dyi;
+ const amrex::Real invvol = dxi*dyi*dzi;
#endif
- const Real xmin = xyzmin[0];
- const Real ymin = xyzmin[1];
- const Real zmin = xyzmin[2];
- const Real clightsq = 1.0/PhysConst::c/PhysConst::c;
+ const amrex::Real xmin = xyzmin[0];
+ const amrex::Real ymin = xyzmin[1];
+ const amrex::Real zmin = xyzmin[2];
+ const amrex::Real clightsq = 1.0/PhysConst::c/PhysConst::c;
// Loop over particles and deposit into jx_arr, jy_arr and jz_arr
- ParallelFor( np_to_depose,
- [=] AMREX_GPU_DEVICE (long ip) {
- // --- Get particle quantities
- const Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq
- + uyp[ip]*uyp[ip]*clightsq
- + uzp[ip]*uzp[ip]*clightsq);
- const Real wq = q*wp[ip];
- const Real vx = uxp[ip]*gaminv;
- const Real vy = uyp[ip]*gaminv;
- const Real vz = uzp[ip]*gaminv;
- // wqx, wqy wqz are particle current in each direction
- const Real wqx = wq*invvol*vx;
- const Real wqy = wq*invvol*vy;
- const Real wqz = wq*invvol*vz;
-
- // --- Compute shape factors
- // x direction
- // Get particle position after 1/2 push back in position
- const Real xmid = (xp[ip]-xmin)*dxi-dts2dx*vx;
- // Compute shape factors for node-centered quantities
- Real AMREX_RESTRICT sx [depos_order + 1];
- // j: leftmost grid point (node-centered) that the particle touches
- const int j = compute_shape_factor<depos_order>(sx, xmid);
- // Compute shape factors for cell-centered quantities
- Real AMREX_RESTRICT sx0[depos_order + 1];
- // j0: leftmost grid point (cell-centered) that the particle touches
- const int j0 = compute_shape_factor<depos_order>(sx0, xmid-stagger_shift);
+ amrex::ParallelFor(
+ np_to_depose,
+ [=] AMREX_GPU_DEVICE (long ip) {
+ // --- Get particle quantities
+ const amrex::Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq
+ + uyp[ip]*uyp[ip]*clightsq
+ + uzp[ip]*uzp[ip]*clightsq);
+ const amrex::Real wq = q*wp[ip];
+ const amrex::Real vx = uxp[ip]*gaminv;
+ const amrex::Real vy = uyp[ip]*gaminv;
+ const amrex::Real vz = uzp[ip]*gaminv;
+ // wqx, wqy wqz are particle current in each direction
+#if (defined WARPX_DIM_RZ)
+ // In RZ, wqx is actually wqr, and wqy is wqtheta
+ // Convert to cylinderical at the mid point
+ const amrex::Real xpmid = xp[ip] - 0.5*dt*vx;
+ const amrex::Real ypmid = yp[ip] - 0.5*dt*vy;
+ const amrex::Real rpmid = std::sqrt(xpmid*xpmid + ypmid*ypmid);
+ amrex::Real costheta;
+ amrex::Real sintheta;
+ if (rpmid > 0.) {
+ costheta = xpmid/rpmid;
+ sintheta = ypmid/rpmid;
+ } else {
+ costheta = 1.;
+ sintheta = 0.;
+ }
+ const amrex::Real wqx = wq*invvol*(+vx*costheta + vy*sintheta);
+ const amrex::Real wqy = wq*invvol*(-vx*sintheta + vy*costheta);
+#else
+ const amrex::Real wqx = wq*invvol*vx;
+ const amrex::Real wqy = wq*invvol*vy;
+#endif
+ const amrex::Real wqz = wq*invvol*vz;
+
+ // --- Compute shape factors
+ // x direction
+ // Get particle position after 1/2 push back in position
+#if (defined WARPX_DIM_RZ)
+ const amrex::Real xmid = (rpmid-xmin)*dxi;
+#else
+ const amrex::Real xmid = (xp[ip]-xmin)*dxi-dts2dx*vx;
+#endif
+ // Compute shape factors for node-centered quantities
+ amrex::Real AMREX_RESTRICT sx [depos_order + 1];
+ // j: leftmost grid point (node-centered) that the particle touches
+ const int j = compute_shape_factor<depos_order>(sx, xmid);
+ // Compute shape factors for cell-centered quantities
+ amrex::Real AMREX_RESTRICT sx0[depos_order + 1];
+ // j0: leftmost grid point (cell-centered) that the particle touches
+ const int j0 = compute_shape_factor<depos_order>(sx0, xmid-stagger_shift);
-#if (AMREX_SPACEDIM == 3)
- // y direction
- const Real ymid= (yp[ip]-ymin)*dyi-dts2dy*vy;
- Real AMREX_RESTRICT sy [depos_order + 1];
- const int k = compute_shape_factor<depos_order>(sy, ymid);
- Real AMREX_RESTRICT sy0[depos_order + 1];
- const int k0 = compute_shape_factor<depos_order>(sy0, ymid-stagger_shift);
+#if (defined WARPX_DIM_3D)
+ // y direction
+ const amrex::Real ymid= (yp[ip]-ymin)*dyi-dts2dy*vy;
+ amrex::Real AMREX_RESTRICT sy [depos_order + 1];
+ const int k = compute_shape_factor<depos_order>(sy, ymid);
+ amrex::Real AMREX_RESTRICT sy0[depos_order + 1];
+ const int k0 = compute_shape_factor<depos_order>(sy0, ymid-stagger_shift);
#endif
- // z direction
- const Real zmid= (zp[ip]-zmin)*dzi-dts2dz*vz;
- Real AMREX_RESTRICT sz [depos_order + 1];
- const int l = compute_shape_factor<depos_order>(sz, zmid);
- Real AMREX_RESTRICT sz0[depos_order + 1];
- const int l0 = compute_shape_factor<depos_order>(sz0, zmid-stagger_shift);
-
- // Deposit current into jx_arr, jy_arr and jz_arr
-#if (AMREX_SPACEDIM == 2)
- for (int iz=0; iz<=depos_order; iz++){
- for (int ix=0; ix<=depos_order; ix++){
- amrex::Gpu::Atomic::Add(
- &jx_arr(lo.x+j0+ix, lo.y+l +iz, 0),
- sx0[ix]*sz [iz]*wqx);
- amrex::Gpu::Atomic::Add(
- &jy_arr(lo.x+j +ix, lo.y+l +iz, 0),
- sx [ix]*sz [iz]*wqy);
- amrex::Gpu::Atomic::Add(
- &jz_arr(lo.x+j +ix, lo.y+l0+iz, 0),
- sx [ix]*sz0[iz]*wqz);
- }
- }
-#else // (AMREX_SPACEDIM == 3)
- for (int iz=0; iz<=depos_order; iz++){
- for (int iy=0; iy<=depos_order; iy++){
- for (int ix=0; ix<=depos_order; ix++){
- amrex::Gpu::Atomic::Add(
- &jx_arr(lo.x+j0+ix, lo.y+k +iy, lo.z+l +iz),
- sx0[ix]*sy [iy]*sz [iz]*wqx);
- amrex::Gpu::Atomic::Add(
- &jy_arr(lo.x+j +ix, lo.y+k0+iy, lo.z+l +iz),
- sx [ix]*sy0[iy]*sz [iz]*wqy);
- amrex::Gpu::Atomic::Add(
- &jz_arr(lo.x+j +ix, lo.y+k +iy, lo.z+l0+iz),
- sx [ix]*sy [iy]*sz0[iz]*wqz);
- }
- }
- }
+ // z direction
+ const amrex::Real zmid= (zp[ip]-zmin)*dzi-dts2dz*vz;
+ amrex::Real AMREX_RESTRICT sz [depos_order + 1];
+ const int l = compute_shape_factor<depos_order>(sz, zmid);
+ amrex::Real AMREX_RESTRICT sz0[depos_order + 1];
+ const int l0 = compute_shape_factor<depos_order>(sz0, zmid-stagger_shift);
+
+ // Deposit current into jx_arr, jy_arr and jz_arr
+#if (defined WARPX_DIM_2D) || (defined WARPX_DIM_RZ)
+ for (int iz=0; iz<=depos_order; iz++){
+ for (int ix=0; ix<=depos_order; ix++){
+ amrex::Gpu::Atomic::Add(
+ &jx_arr(lo.x+j0+ix, lo.y+l +iz, 0),
+ sx0[ix]*sz [iz]*wqx);
+ amrex::Gpu::Atomic::Add(
+ &jy_arr(lo.x+j +ix, lo.y+l +iz, 0),
+ sx [ix]*sz [iz]*wqy);
+ amrex::Gpu::Atomic::Add(
+ &jz_arr(lo.x+j +ix, lo.y+l0+iz, 0),
+ sx [ix]*sz0[iz]*wqz);
+ }
+ }
+#elif (defined WARPX_DIM_3D)
+ for (int iz=0; iz<=depos_order; iz++){
+ for (int iy=0; iy<=depos_order; iy++){
+ for (int ix=0; ix<=depos_order; ix++){
+ amrex::Gpu::Atomic::Add(
+ &jx_arr(lo.x+j0+ix, lo.y+k +iy, lo.z+l +iz),
+ sx0[ix]*sy [iy]*sz [iz]*wqx);
+ amrex::Gpu::Atomic::Add(
+ &jy_arr(lo.x+j +ix, lo.y+k0+iy, lo.z+l +iz),
+ sx [ix]*sy0[iy]*sz [iz]*wqy);
+ amrex::Gpu::Atomic::Add(
+ &jz_arr(lo.x+j +ix, lo.y+k +iy, lo.z+l0+iz),
+ sx [ix]*sy [iy]*sz0[iz]*wqz);
+ }
+ }
+ }
#endif
- }
+ }
);
}
-// Compute shape factor and return index of leftmost cell where
-// particle writes.
-// Specialized templates are defined below for orders 1, 2 and 3.
-template <int depos_order>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shifted_shape_factor (Real* const sx, const Real x_old, const int i_new);
-
-// Compute shape factor for order 1.
-template <>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shifted_shape_factor <1> (Real* const sx, const Real x_old, const int i_new){
- const int i = (int) x_old;
- const int i_shift = i - i_new;
- const Real xint = x_old - i;
- sx[1+i_shift] = 1.0 - xint;
- sx[2+i_shift] = xint;
- return i;
-}
-
-// Compute shape factor for order 2.
-template <>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shifted_shape_factor <2> (Real* const sx, const Real x_old, const int i_new){
- const int i = (int) (x_old+0.5);
- const int i_shift = i - (i_new + 1);
- const Real xint = x_old - i;
- sx[1+i_shift] = 0.5*(0.5-xint)*(0.5-xint);
- sx[2+i_shift] = 0.75-xint*xint;
- sx[3+i_shift] = 0.5*(0.5+xint)*(0.5+xint);
- // index of the leftmost cell where particle deposits
- return i-1;
-}
-
-// Compute shape factor for order 3.
-template <>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shifted_shape_factor <3> (Real* const sx, const Real x_old, const int i_new){
- const int i = (int) x_old;
- const int i_shift = i - (i_new + 1);
- const Real xint = x_old - i;
- sx[1+i_shift] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint);
- sx[2+i_shift] = 2.0/3.0-xint*xint*(1-xint/2.0);
- sx[3+i_shift] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint));
- sx[4+i_shift] = 1.0/6.0*xint*xint*xint;
- // index of the leftmost cell where particle deposits
- return i-1;
-}
-
/* \brief Esirkepov Current Deposition for thread thread_num
* /param xp, yp, zp : Pointer to arrays of particle positions.
* \param wp : Pointer to array of particle weights.
@@ -239,170 +172,197 @@ int compute_shifted_shape_factor <3> (Real* const sx, const Real x_old, const in
* /param q : species charge.
*/
template <int depos_order>
-void doEsirkepovDepositionShapeN (const Real * const xp, const Real * const yp, const Real * const zp,
- const Real * const wp, const Real * const uxp,
- const Real * const uyp, const Real * const uzp,
+void doEsirkepovDepositionShapeN (const amrex::Real * const xp,
+ const amrex::Real * const yp,
+ const amrex::Real * const zp,
+ const amrex::Real * const wp,
+ const amrex::Real * const uxp,
+ const amrex::Real * const uyp,
+ const amrex::Real * const uzp,
const amrex::Array4<amrex::Real>& Jx_arr,
const amrex::Array4<amrex::Real>& Jy_arr,
const amrex::Array4<amrex::Real>& Jz_arr,
const long np_to_depose,
- const amrex::Real dt, const std::array<amrex::Real,3>& dx,
- const std::array<Real, 3> xyzmin,
- const Dim3 lo,
+ const amrex::Real dt,
+ const std::array<amrex::Real,3>& dx,
+ const std::array<amrex::Real, 3> xyzmin,
+ const amrex::Dim3 lo,
const amrex::Real q)
{
- const Real dxi = 1.0/dx[0];
- const Real dtsdx0 = dt*dxi;
- const Real xmin = xyzmin[0];
-#if (AMREX_SPACEDIM == 3)
- const Real dyi = 1.0/dx[1];
- const Real dtsdy0 = dt*dyi;
- const Real ymin = xyzmin[1];
+ const amrex::Real dxi = 1.0/dx[0];
+ const amrex::Real dtsdx0 = dt*dxi;
+ const amrex::Real xmin = xyzmin[0];
+#if (defined WARPX_DIM_3D)
+ const amrex::Real dyi = 1.0/dx[1];
+ const amrex::Real dtsdy0 = dt*dyi;
+ const amrex::Real ymin = xyzmin[1];
#endif
- const Real dzi = 1.0/dx[2];
- const Real dtsdz0 = dt*dzi;
- const Real zmin = xyzmin[2];
-
-#if (AMREX_SPACEDIM == 3)
- const Real invdtdx = 1.0/(dt*dx[1]*dx[2]);
- const Real invdtdy = 1.0/(dt*dx[0]*dx[2]);
- const Real invdtdz = 1.0/(dt*dx[0]*dx[1]);
-#elif (AMREX_SPACEDIM == 2)
- const Real invdtdx = 1.0/(dt*dx[2]);
- const Real invdtdz = 1.0/(dt*dx[0]);
- const Real invvol = 1.0/(dx[0]*dx[2]);
+ const amrex::Real dzi = 1.0/dx[2];
+ const amrex::Real dtsdz0 = dt*dzi;
+ const amrex::Real zmin = xyzmin[2];
+
+#if (defined WARPX_DIM_3D)
+ const amrex::Real invdtdx = 1.0/(dt*dx[1]*dx[2]);
+ const amrex::Real invdtdy = 1.0/(dt*dx[0]*dx[2]);
+ const amrex::Real invdtdz = 1.0/(dt*dx[0]*dx[1]);
+#elif (defined WARPX_DIM_2D) || (defined WARPX_DIM_RZ)
+ const amrex::Real invdtdx = 1.0/(dt*dx[2]);
+ const amrex::Real invdtdz = 1.0/(dt*dx[0]);
+ const amrex::Real invvol = 1.0/(dx[0]*dx[2]);
#endif
- const Real clightsq = 1.0/PhysConst::c/PhysConst::c;
+ const amrex::Real clightsq = 1.0/PhysConst::c/PhysConst::c;
// Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr
- ParallelFor( np_to_depose,
- [=] AMREX_GPU_DEVICE (long ip) {
-
- // --- Get particle quantities
- const Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq
- + uyp[ip]*uyp[ip]*clightsq
- + uzp[ip]*uzp[ip]*clightsq);
-
- // wqx, wqy wqz are particle current in each direction
- const Real wq = q*wp[ip];
- const Real wqx = wq*invdtdx;
-#if (AMREX_SPACEDIM == 3)
- const Real wqy = wq*invdtdy;
+ amrex::ParallelFor(
+ np_to_depose,
+ [=] AMREX_GPU_DEVICE (long ip) {
+
+ // --- Get particle quantities
+ const amrex::Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq
+ + uyp[ip]*uyp[ip]*clightsq
+ + uzp[ip]*uzp[ip]*clightsq);
+
+ // wqx, wqy wqz are particle current in each direction
+ const amrex::Real wq = q*wp[ip];
+ const amrex::Real wqx = wq*invdtdx;
+#if (defined WARPX_DIM_3D)
+ const amrex::Real wqy = wq*invdtdy;
#endif
- const Real wqz = wq*invdtdz;
-
- // computes current and old position in grid units
- const Real x_new = (xp[ip] - xmin)*dxi;
- const Real x_old = x_new - dtsdx0*uxp[ip]*gaminv;
-#if (AMREX_SPACEDIM == 3)
- const Real y_new = (yp[ip] - ymin)*dyi;
- const Real y_old = y_new - dtsdy0*uyp[ip]*gaminv;
+ const amrex::Real wqz = wq*invdtdz;
+
+ // computes current and old position in grid units
+#if (defined WARPX_DIM_RZ)
+ const amrex::Real r_new = std::sqrt(xp[ip]*xp[ip] + yp[ip]*yp[ip]);
+ const amrex::Real r_old = std::sqrt((xp[ip] - dt*uxp[ip]*gaminv)*(xp[ip] - dt*uxp[ip]*gaminv) +
+ (yp[ip] - dt*uyp[ip]*gaminv)*(yp[ip] - dt*uyp[ip]*gaminv));
+ const amrex::Real x_new = (r_new - xmin)*dxi;
+ const amrex::Real x_old = (r_old - xmin)*dxi;
+#else
+ const amrex::Real x_new = (xp[ip] - xmin)*dxi;
+ const amrex::Real x_old = x_new - dtsdx0*uxp[ip]*gaminv;
#endif
- const Real z_new = (zp[ip] - zmin)*dzi;
- const Real z_old = z_new - dtsdz0*uzp[ip]*gaminv;
-
- // Shape factor arrays
- // Note that there are extra values above and below
- // to possibly hold the factor for the old particle
- // which can be at a different grid location.
- Real AMREX_RESTRICT sx_new[depos_order + 3] = {0.};
- Real AMREX_RESTRICT sx_old[depos_order + 3] = {0.};
-#if (AMREX_SPACEDIM == 3)
- Real AMREX_RESTRICT sy_new[depos_order + 3] = {0.};
- Real AMREX_RESTRICT sy_old[depos_order + 3] = {0.};
+#if (defined WARPX_DIM_3D)
+ const amrex::Real y_new = (yp[ip] - ymin)*dyi;
+ const amrex::Real y_old = y_new - dtsdy0*uyp[ip]*gaminv;
+#endif
+ const amrex::Real z_new = (zp[ip] - zmin)*dzi;
+ const amrex::Real z_old = z_new - dtsdz0*uzp[ip]*gaminv;
+
+#if (defined WARPX_DIM_RZ)
+ amrex::Real costheta;
+ amrex::Real sintheta;
+ if (r_new > 0.) {
+ costheta = xp[ip]/r_new;
+ sintheta = yp[ip]/r_new;
+ } else {
+ costheta = 1.;
+ sintheta = 0.;
+ }
+ const amrex::Real vy = (-uxp[ip]*sintheta + uyp[ip]*costheta)*gaminv;
+#elif (defined WARPX_DIM_2D)
+ const amrex::Real vy = uyp[ip]*gaminv;
#endif
- Real AMREX_RESTRICT sz_new[depos_order + 3] = {0.};
- Real AMREX_RESTRICT sz_old[depos_order + 3] = {0.};
- // --- Compute shape factors
- // Compute shape factors for position as they are now and at old positions
- // [ijk]_new: leftmost grid point that the particle touches
- const int i_new = compute_shape_factor<depos_order>(sx_new+1, x_new);
- const int i_old = compute_shifted_shape_factor<depos_order>(sx_old, x_old, i_new);
-#if (AMREX_SPACEDIM == 3)
- const int j_new = compute_shape_factor<depos_order>(sy_new+1, y_new);
- const int j_old = compute_shifted_shape_factor<depos_order>(sy_old, y_old, j_new);
+ // Shape factor arrays
+ // Note that there are extra values above and below
+ // to possibly hold the factor for the old particle
+ // which can be at a different grid location.
+ amrex::Real AMREX_RESTRICT sx_new[depos_order + 3] = {0.};
+ amrex::Real AMREX_RESTRICT sx_old[depos_order + 3] = {0.};
+#if (defined WARPX_DIM_3D)
+ amrex::Real AMREX_RESTRICT sy_new[depos_order + 3] = {0.};
+ amrex::Real AMREX_RESTRICT sy_old[depos_order + 3] = {0.};
+#endif
+ amrex::Real AMREX_RESTRICT sz_new[depos_order + 3] = {0.};
+ amrex::Real AMREX_RESTRICT sz_old[depos_order + 3] = {0.};
+
+ // --- Compute shape factors
+ // Compute shape factors for position as they are now and at old positions
+ // [ijk]_new: leftmost grid point that the particle touches
+ const int i_new = compute_shape_factor<depos_order>(sx_new+1, x_new);
+ const int i_old = compute_shifted_shape_factor<depos_order>(sx_old, x_old, i_new);
+#if (defined WARPX_DIM_3D)
+ const int j_new = compute_shape_factor<depos_order>(sy_new+1, y_new);
+ const int j_old = compute_shifted_shape_factor<depos_order>(sy_old, y_old, j_new);
#endif
- const int k_new = compute_shape_factor<depos_order>(sz_new+1, z_new);
- const int k_old = compute_shifted_shape_factor<depos_order>(sz_old, z_old, k_new);
-
- // computes min/max positions of current contributions
- int dil = 1, diu = 1;
- if (i_old < i_new) dil = 0;
- if (i_old > i_new) diu = 0;
-#if (AMREX_SPACEDIM == 3)
- int djl = 1, dju = 1;
- if (j_old < j_new) djl = 0;
- if (j_old > j_new) dju = 0;
+ const int k_new = compute_shape_factor<depos_order>(sz_new+1, z_new);
+ const int k_old = compute_shifted_shape_factor<depos_order>(sz_old, z_old, k_new);
+
+ // computes min/max positions of current contributions
+ int dil = 1, diu = 1;
+ if (i_old < i_new) dil = 0;
+ if (i_old > i_new) diu = 0;
+#if (defined WARPX_DIM_3D)
+ int djl = 1, dju = 1;
+ if (j_old < j_new) djl = 0;
+ if (j_old > j_new) dju = 0;
#endif
- int dkl = 1, dku = 1;
- if (k_old < k_new) dkl = 0;
- if (k_old > k_new) dku = 0;
-
-#if (AMREX_SPACEDIM == 3)
-
- for (int k=dkl; k<=depos_order+2-dku; k++) {
- for (int j=djl; j<=depos_order+2-dju; j++) {
- Real sdxi = 0.;
- for (int i=dil; i<=depos_order+1-diu; i++) {
- sdxi += wqx*(sx_old[i] - sx_new[i])*((sy_new[j] + 0.5*(sy_old[j] - sy_new[j]))*sz_new[k] +
- (0.5*sy_new[j] + 1./3.*(sy_old[j] - sy_new[j]))*(sz_old[k] - sz_new[k]));
- amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi);
- }
- }
- }
- for (int k=dkl; k<=depos_order+2-dku; k++) {
- for (int i=dil; i<=depos_order+2-diu; i++) {
- Real sdyj = 0.;
- for (int j=djl; j<=depos_order+1-dju; j++) {
- sdyj += wqy*(sy_old[j] - sy_new[j])*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] +
- (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i]));
- amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj);
- }
- }
- }
- for (int j=djl; j<=depos_order+2-dju; j++) {
- for (int i=dil; i<=depos_order+2-diu; i++) {
- Real sdzk = 0.;
- for (int k=dkl; k<=depos_order+1-dku; k++) {
- sdzk += wqz*(sz_old[k] - sz_new[k])*((sx_new[i] + 0.5*(sx_old[i] - sx_new[i]))*sy_new[j] +
- (0.5*sx_new[i] + 1./3.*(sx_old[i] - sx_new[i]))*(sy_old[j] - sy_new[j]));
- amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk);
- }
- }
- }
-
-#elif (AMREX_SPACEDIM == 2)
-
- for (int k=dkl; k<=depos_order+2-dku; k++) {
- Real sdxi = 0.;
- for (int i=dil; i<=depos_order+1-diu; i++) {
- sdxi += wqx*(sx_old[i] - sx_new[i])*(sz_new[k] + 0.5*(sz_old[k] - sz_new[k]));
- amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdxi);
- }
+ int dkl = 1, dku = 1;
+ if (k_old < k_new) dkl = 0;
+ if (k_old > k_new) dku = 0;
+
+#if (defined WARPX_DIM_3D)
+
+ for (int k=dkl; k<=depos_order+2-dku; k++) {
+ for (int j=djl; j<=depos_order+2-dju; j++) {
+ amrex::Real sdxi = 0.;
+ for (int i=dil; i<=depos_order+1-diu; i++) {
+ sdxi += wqx*(sx_old[i] - sx_new[i])*((sy_new[j] + 0.5*(sy_old[j] - sy_new[j]))*sz_new[k] +
+ (0.5*sy_new[j] + 1./3.*(sy_old[j] - sy_new[j]))*(sz_old[k] - sz_new[k]));
+ amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi);
}
- for (int k=dkl; k<=depos_order+2-dku; k++) {
- for (int i=dil; i<=depos_order+2-diu; i++) {
- const Real sdyj = wq*uyp[ip]*gaminv*invvol*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] +
- (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i]));
- amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdyj);
- }
+ }
+ }
+ for (int k=dkl; k<=depos_order+2-dku; k++) {
+ for (int i=dil; i<=depos_order+2-diu; i++) {
+ amrex::Real sdyj = 0.;
+ for (int j=djl; j<=depos_order+1-dju; j++) {
+ sdyj += wqy*(sy_old[j] - sy_new[j])*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] +
+ (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i]));
+ amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj);
}
- for (int i=dil; i<=depos_order+2-diu; i++) {
- Real sdzk = 0.;
- for (int k=dkl; k<=depos_order+1-dku; k++) {
- sdzk += wqz*(sz_old[k] - sz_new[k])*(sx_new[i] + 0.5*(sx_old[i] - sx_new[i]));
- amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdzk);
- }
+ }
+ }
+ for (int j=djl; j<=depos_order+2-dju; j++) {
+ for (int i=dil; i<=depos_order+2-diu; i++) {
+ amrex::Real sdzk = 0.;
+ for (int k=dkl; k<=depos_order+1-dku; k++) {
+ sdzk += wqz*(sz_old[k] - sz_new[k])*((sx_new[i] + 0.5*(sx_old[i] - sx_new[i]))*sy_new[j] +
+ (0.5*sx_new[i] + 1./3.*(sx_old[i] - sx_new[i]))*(sy_old[j] - sy_new[j]));
+ amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk);
}
+ }
+ }
+
+#elif (defined WARPX_DIM_2D) || (defined WARPX_DIM_RZ)
+
+ for (int k=dkl; k<=depos_order+2-dku; k++) {
+ amrex::Real sdxi = 0.;
+ for (int i=dil; i<=depos_order+1-diu; i++) {
+ sdxi += wqx*(sx_old[i] - sx_new[i])*(sz_new[k] + 0.5*(sz_old[k] - sz_new[k]));
+ amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdxi);
+ }
+ }
+ for (int k=dkl; k<=depos_order+2-dku; k++) {
+ for (int i=dil; i<=depos_order+2-diu; i++) {
+ const amrex::Real sdyj = wq*vy*invvol*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] +
+ (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i]));
+ amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdyj);
+ }
+ }
+ for (int i=dil; i<=depos_order+2-diu; i++) {
+ amrex::Real sdzk = 0.;
+ for (int k=dkl; k<=depos_order+1-dku; k++) {
+ sdzk += wqz*(sz_old[k] - sz_new[k])*(sx_new[i] + 0.5*(sx_old[i] - sx_new[i]));
+ amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdzk);
+ }
+ }
+
#endif
- }
+ }
);
-
-
-
}
#endif // CURRENTDEPOSITION_H_
diff --git a/Source/Particles/Deposition/Make.package b/Source/Particles/Deposition/Make.package
index 0d5ebe2a7..e1aace998 100644
--- a/Source/Particles/Deposition/Make.package
+++ b/Source/Particles/Deposition/Make.package
@@ -1,3 +1,4 @@
CEXE_headers += CurrentDeposition.H
+CEXE_headers += ChargeDeposition.H
INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Deposition
VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles/Deposition
diff --git a/Source/Particles/Gather/FieldGather.H b/Source/Particles/Gather/FieldGather.H
new file mode 100644
index 000000000..8f5e8d4cf
--- /dev/null
+++ b/Source/Particles/Gather/FieldGather.H
@@ -0,0 +1,216 @@
+#ifndef FIELDGATHER_H_
+#define FIELDGATHER_H_
+
+#include "ShapeFactors.H"
+
+/* \brief Field gather for particles handled by thread thread_num
+ * /param xp, yp, zp : Pointer to arrays of particle positions.
+ * \param Exp, Eyp, Ezp: Pointer to array of electric field on particles.
+ * \param Bxp, Byp, Bzp: Pointer to array of magnetic field on particles.
+ * \param ex_arr ey_arr: Array4 of current density, either full array or tile.
+ * \param ez_arr bx_arr: Array4 of current density, either full array or tile.
+ * \param by_arr bz_arr: Array4 of current density, either full array or tile.
+ * \param np_to_gather : Number of particles for which field is gathered.
+ * \param dx : 3D cell size
+ * \param xyzmin : Physical lower bounds of domain.
+ * \param lo : Index lower bounds of domain.
+ * \param stagger_shift: 0 if nodal, 0.5 if staggered.
+ */
+template <int depos_order, int lower_in_v>
+void doGatherShapeN(const amrex::Real * const xp,
+ const amrex::Real * const yp,
+ const amrex::Real * const zp,
+ amrex::Real * const Exp, amrex::Real * const Eyp,
+ amrex::Real * const Ezp, amrex::Real * const Bxp,
+ amrex::Real * const Byp, amrex::Real * const Bzp,
+ const amrex::Array4<const amrex::Real>& ex_arr,
+ const amrex::Array4<const amrex::Real>& ey_arr,
+ const amrex::Array4<const amrex::Real>& ez_arr,
+ const amrex::Array4<const amrex::Real>& bx_arr,
+ const amrex::Array4<const amrex::Real>& by_arr,
+ const amrex::Array4<const amrex::Real>& bz_arr,
+ const long np_to_gather,
+ const std::array<amrex::Real, 3>& dx,
+ const std::array<amrex::Real, 3> xyzmin,
+ const amrex::Dim3 lo,
+ const amrex::Real stagger_shift)
+{
+ const amrex::Real dxi = 1.0/dx[0];
+ const amrex::Real dzi = 1.0/dx[2];
+#if (AMREX_SPACEDIM == 3)
+ const amrex::Real dyi = 1.0/dx[1];
+#endif
+
+ const amrex::Real xmin = xyzmin[0];
+#if (AMREX_SPACEDIM == 3)
+ const amrex::Real ymin = xyzmin[1];
+#endif
+ const amrex::Real zmin = xyzmin[2];
+
+ // Loop over particles and gather fields from
+ // {e,b}{x,y,z}_arr to {E,B}{xyz}p.
+ amrex::ParallelFor(
+ np_to_gather,
+ [=] AMREX_GPU_DEVICE (long ip) {
+ // --- Compute shape factors
+ // x direction
+ // Get particle position
+#ifdef WARPX_DIM_RZ
+ const amrex::Real r = std::sqrt(xp[ip]*xp[ip] + yp[ip]*yp[ip]);
+ const amrex::Real x = (r - xmin)*dxi;
+#else
+ const amrex::Real x = (xp[ip]-xmin)*dxi;
+#endif
+ // Compute shape factors for node-centered quantities
+ amrex::Real AMREX_RESTRICT sx [depos_order + 1];
+ // j: leftmost grid point (node-centered) that particle touches
+ const int j = compute_shape_factor<depos_order>(sx, x);
+ // Compute shape factors for cell-centered quantities
+ amrex::Real AMREX_RESTRICT sx0[depos_order + 1 - lower_in_v];
+ // j0: leftmost grid point (cell-centered) that particle touches
+ const int j0 = compute_shape_factor<depos_order - lower_in_v>(
+ sx0, x-stagger_shift);
+#if (AMREX_SPACEDIM == 3)
+ // y direction
+ const amrex::Real y = (yp[ip]-ymin)*dyi;
+ amrex::Real AMREX_RESTRICT sy [depos_order + 1];
+ const int k = compute_shape_factor<depos_order>(sy, y);
+ amrex::Real AMREX_RESTRICT sy0[depos_order + 1 - lower_in_v];
+ const int k0 = compute_shape_factor<depos_order-lower_in_v>(
+ sy0, y-stagger_shift);
+#endif
+ // z direction
+ const amrex::Real z = (zp[ip]-zmin)*dzi;
+ amrex::Real AMREX_RESTRICT sz [depos_order + 1];
+ const int l = compute_shape_factor<depos_order>(sz, z);
+ amrex::Real AMREX_RESTRICT sz0[depos_order + 1 - lower_in_v];
+ const int l0 = compute_shape_factor<depos_order - lower_in_v>(
+ sz0, z-stagger_shift);
+
+ // Set fields on particle to zero
+ Exp[ip] = 0;
+ Eyp[ip] = 0;
+ Ezp[ip] = 0;
+ Bxp[ip] = 0;
+ Byp[ip] = 0;
+ Bzp[ip] = 0;
+ // Each field is gathered in a separate block of
+ // AMREX_SPACEDIM nested loops because the deposition
+ // order can differ for each component of each field
+ // when lower_in_v is set to 1
+#if (AMREX_SPACEDIM == 2)
+ // Gather field on particle Eyp[i] from field on grid ey_arr
+ for (int iz=0; iz<=depos_order; iz++){
+ for (int ix=0; ix<=depos_order; ix++){
+ Eyp[ip] += sx[ix]*sz[iz]*
+ ey_arr(lo.x+j+ix, lo.y+l+iz, 0);
+ }
+ }
+ // Gather field on particle Exp[i] from field on grid ex_arr
+ // Gather field on particle Bzp[i] from field on grid bz_arr
+ for (int iz=0; iz<=depos_order; iz++){
+ for (int ix=0; ix<=depos_order-lower_in_v; ix++){
+ Exp[ip] += sx0[ix]*sz[iz]*
+ ex_arr(lo.x+j0+ix, lo.y+l +iz, 0);
+ Bzp[ip] += sx0[ix]*sz[iz]*
+ bz_arr(lo.x+j0+ix, lo.y+l +iz, 0);
+ }
+ }
+ // Gather field on particle Ezp[i] from field on grid ez_arr
+ // Gather field on particle Bxp[i] from field on grid bx_arr
+ for (int iz=0; iz<=depos_order-lower_in_v; iz++){
+ for (int ix=0; ix<=depos_order; ix++){
+ Ezp[ip] += sx[ix]*sz0[iz]*
+ ez_arr(lo.x+j+ix, lo.y+l0 +iz, 0);
+ Bxp[ip] += sx[ix]*sz0[iz]*
+ bx_arr(lo.x+j+ix, lo.y+l0 +iz, 0);
+ }
+ }
+ // Gather field on particle Byp[i] from field on grid by_arr
+ for (int iz=0; iz<=depos_order-lower_in_v; iz++){
+ for (int ix=0; ix<=depos_order-lower_in_v; ix++){
+ Byp[ip] += sx0[ix]*sz0[iz]*
+ by_arr(lo.x+j0+ix, lo.y+l0+iz, 0);
+ }
+ }
+
+#ifdef WARPX_DIM_RZ
+ // Convert Exp and Eyp (which are actually Er and Etheta) to Ex and Ey
+ amrex::Real costheta;
+ amrex::Real sintheta;
+ if (r > 0.) {
+ costheta = xp[ip]/r;
+ sintheta = yp[ip]/r;
+ } else {
+ costheta = 1.;
+ sintheta = 0.;
+ }
+ const amrex::Real Exp_save = Exp[ip];
+ Exp[ip] = costheta*Exp[ip] - sintheta*Eyp[ip];
+ Eyp[ip] = costheta*Eyp[ip] + sintheta*Exp_save;
+ const amrex::Real Bxp_save = Bxp[ip];
+ Bxp[ip] = costheta*Bxp[ip] - sintheta*Byp[ip];
+ Byp[ip] = costheta*Byp[ip] + sintheta*Bxp_save;
+#endif
+
+#else // (AMREX_SPACEDIM == 3)
+ // Gather field on particle Exp[i] from field on grid ex_arr
+ for (int iz=0; iz<=depos_order; iz++){
+ for (int iy=0; iy<=depos_order; iy++){
+ for (int ix=0; ix<=depos_order-lower_in_v; ix++){
+ Exp[ip] += sx0[ix]*sy[iy]*sz[iz]*
+ ex_arr(lo.x+j0+ix, lo.y+k+iy, lo.z+l+iz);
+ }
+ }
+ }
+ // Gather field on particle Eyp[i] from field on grid ey_arr
+ for (int iz=0; iz<=depos_order; iz++){
+ for (int iy=0; iy<=depos_order-lower_in_v; iy++){
+ for (int ix=0; ix<=depos_order; ix++){
+ Eyp[ip] += sx[ix]*sy0[iy]*sz[iz]*
+ ey_arr(lo.x+j+ix, lo.y+k0+iy, lo.z+l+iz);
+ }
+ }
+ }
+ // Gather field on particle Ezp[i] from field on grid ez_arr
+ for (int iz=0; iz<=depos_order-lower_in_v; iz++){
+ for (int iy=0; iy<=depos_order; iy++){
+ for (int ix=0; ix<=depos_order; ix++){
+ Ezp[ip] += sx[ix]*sy[iy]*sz0[iz]*
+ ez_arr(lo.x+j+ix, lo.y+k+iy, lo.z+l0+iz);
+ }
+ }
+ }
+ // Gather field on particle Bzp[i] from field on grid bz_arr
+ for (int iz=0; iz<=depos_order; iz++){
+ for (int iy=0; iy<=depos_order-lower_in_v; iy++){
+ for (int ix=0; ix<=depos_order-lower_in_v; ix++){
+ Bzp[ip] += sx0[ix]*sy0[iy]*sz[iz]*
+ bz_arr(lo.x+j0+ix, lo.y+k0+iy, lo.z+l+iz);
+ }
+ }
+ }
+ // Gather field on particle Byp[i] from field on grid by_arr
+ for (int iz=0; iz<=depos_order-lower_in_v; iz++){
+ for (int iy=0; iy<=depos_order; iy++){
+ for (int ix=0; ix<=depos_order-lower_in_v; ix++){
+ Byp[ip] += sx0[ix]*sy[iy]*sz0[iz]*
+ by_arr(lo.x+j0+ix, lo.y+k+iy, lo.z+l0+iz);
+ }
+ }
+ }
+ // Gather field on particle Bxp[i] from field on grid bx_arr
+ for (int iz=0; iz<=depos_order-lower_in_v; iz++){
+ for (int iy=0; iy<=depos_order-lower_in_v; iy++){
+ for (int ix=0; ix<=depos_order; ix++){
+ Bxp[ip] += sx[ix]*sy0[iy]*sz0[iz]*
+ bx_arr(lo.x+j+ix, lo.y+k0+iy, lo.z+l0+iz);
+ }
+ }
+ }
+#endif
+ }
+ );
+}
+
+#endif // FIELDGATHER_H_
diff --git a/Source/Particles/Gather/Make.package b/Source/Particles/Gather/Make.package
new file mode 100644
index 000000000..10abfcaaf
--- /dev/null
+++ b/Source/Particles/Gather/Make.package
@@ -0,0 +1,3 @@
+CEXE_headers += FieldGather.H
+INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Gather
+VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles/Gather
diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package
index 2038472a1..db90de1dc 100644
--- a/Source/Particles/Make.package
+++ b/Source/Particles/Make.package
@@ -9,9 +9,11 @@ CEXE_headers += MultiParticleContainer.H
CEXE_headers += WarpXParticleContainer.H
CEXE_headers += RigidInjectedParticleContainer.H
CEXE_headers += PhysicalParticleContainer.H
+CEXE_headers += ShapeFactors.H
include $(WARPX_HOME)/Source/Particles/Pusher/Make.package
include $(WARPX_HOME)/Source/Particles/Deposition/Make.package
+include $(WARPX_HOME)/Source/Particles/Gather/Make.package
INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles
VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles
diff --git a/Source/Particles/MultiParticleContainer.H b/Source/Particles/MultiParticleContainer.H
index 869126fef..7c9ede411 100644
--- a/Source/Particles/MultiParticleContainer.H
+++ b/Source/Particles/MultiParticleContainer.H
@@ -85,8 +85,9 @@ public:
/// in the MultiParticleContainer. This is the electromagnetic version of the field gather.
///
void FieldGather (int lev,
- const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, const amrex::MultiFab& Ez,
- const amrex::MultiFab& Bx, const amrex::MultiFab& By, const amrex::MultiFab& Bz);
+ const amrex::MultiFab& Ex, const amrex::MultiFab& Ey,
+ const amrex::MultiFab& Ez, const amrex::MultiFab& Bx,
+ const amrex::MultiFab& By, const amrex::MultiFab& Bz);
///
/// This evolves all the particles by one PIC time step, including current deposition, the
diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp
index 9d39ec2f9..982e04e39 100644
--- a/Source/Particles/MultiParticleContainer.cpp
+++ b/Source/Particles/MultiParticleContainer.cpp
@@ -172,30 +172,6 @@ MultiParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<MultiF
}
void
-MultiParticleContainer::Evolve (int lev,
- const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
- const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz,
- MultiFab& jx, MultiFab& jy, MultiFab& jz,
- MultiFab* cjx, MultiFab* cjy, MultiFab* cjz,
- MultiFab* rho,
- const MultiFab* cEx, const MultiFab* cEy, const MultiFab* cEz,
- const MultiFab* cBx, const MultiFab* cBy, const MultiFab* cBz,
- Real t, Real dt)
-{
- jx.setVal(0.0);
- jy.setVal(0.0);
- jz.setVal(0.0);
- if (cjx) cjx->setVal(0.0);
- if (cjy) cjy->setVal(0.0);
- if (cjz) cjz->setVal(0.0);
- if (rho) rho->setVal(0.0);
- for (auto& pc : allcontainers) {
- pc->Evolve(lev, Ex, Ey, Ez, Bx, By, Bz, jx, jy, jz, cjx, cjy, cjz,
- rho, cEx, cEy, cEz, cBx, cBy, cBz, t, dt);
- }
-}
-
-void
MultiParticleContainer::PushXES (Real dt)
{
for (auto& pc : allcontainers) {
@@ -240,8 +216,9 @@ MultiParticleContainer::sumParticleCharge (bool local)
void
MultiParticleContainer::FieldGather (int lev,
- const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
- const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz)
+ const MultiFab& Ex, const MultiFab& Ey,
+ const MultiFab& Ez, const MultiFab& Bx,
+ const MultiFab& By, const MultiFab& Bz)
{
for (auto& pc : allcontainers) {
pc->FieldGather(lev, Ex, Ey, Ez, Bx, By, Bz);
@@ -331,7 +308,7 @@ MultiParticleContainer::RedistributeLocal (const int num_ghost)
}
Vector<long>
-MultiParticleContainer::NumberOfParticlesInGrid(int lev) const
+MultiParticleContainer::NumberOfParticlesInGrid (int lev) const
{
const bool only_valid=true, only_local=true;
Vector<long> r = allcontainers[0]->NumberOfParticlesInGrid(lev,only_valid,only_local);
diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H
index d55764682..b80619733 100644
--- a/Source/Particles/PhysicalParticleContainer.H
+++ b/Source/Particles/PhysicalParticleContainer.H
@@ -27,17 +27,37 @@ public:
const amrex::Vector<std::unique_ptr<amrex::FabArray<amrex::BaseFab<int> > > >& masks) override;
virtual void EvolveES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E,
- amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rho,
+ amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rho,
amrex::Real t, amrex::Real dt) override;
#endif // WARPX_DO_ELECTROSTATIC
- virtual void FieldGather(int lev,
- const amrex::MultiFab& Ex,
- const amrex::MultiFab& Ey,
- const amrex::MultiFab& Ez,
- const amrex::MultiFab& Bx,
- const amrex::MultiFab& By,
- const amrex::MultiFab& Bz) final;
+ virtual void FieldGather (int lev,
+ const amrex::MultiFab& Ex,
+ const amrex::MultiFab& Ey,
+ const amrex::MultiFab& Ez,
+ const amrex::MultiFab& Bx,
+ const amrex::MultiFab& By,
+ const amrex::MultiFab& Bz) final;
+
+ void FieldGather (WarpXParIter& pti,
+ RealVector& Exp,
+ RealVector& Eyp,
+ RealVector& Ezp,
+ RealVector& Bxp,
+ RealVector& Byp,
+ RealVector& Bzp,
+ amrex::FArrayBox const * exfab,
+ amrex::FArrayBox const * eyfab,
+ amrex::FArrayBox const * ezfab,
+ amrex::FArrayBox const * bxfab,
+ amrex::FArrayBox const * byfab,
+ amrex::FArrayBox const * bzfab,
+ const int ngE, const int e_is_nodal,
+ const long offset,
+ const long np_to_gather,
+ int thread_num,
+ int lev,
+ int depos_lev);
virtual void Evolve (int lev,
const amrex::MultiFab& Ex,
@@ -87,11 +107,8 @@ public:
// Inject particles in Box 'part_box'
virtual void AddParticles (int lev);
+
void AddPlasma(int lev, amrex::RealBox part_realbox = amrex::RealBox());
- void AddPlasmaCPU (int lev, amrex::RealBox part_realbox);
-#ifdef AMREX_USE_GPU
- void AddPlasmaGPU (int lev, amrex::RealBox part_realbox);
-#endif
void MapParticletoBoostedFrame(amrex::Real& x, amrex::Real& y, amrex::Real& z, std::array<amrex::Real, 3>& u);
@@ -120,16 +137,8 @@ protected:
bool boost_adjust_transverse_positions = false;
bool do_backward_propagation = false;
- long NumParticlesToAdd (const amrex::Box& overlap_box,
- const amrex::RealBox& overlap_realbox,
- const amrex::RealBox& tile_real_box,
- const amrex::RealBox& particle_real_box);
-
- int GetRefineFac(const amrex::Real x, const amrex::Real y, const amrex::Real z);
- std::unique_ptr<amrex::IArrayBox> m_refined_injection_mask = nullptr;
-
// Inject particles during the whole simulation
- void ContinuousInjection(const amrex::RealBox& injection_box) override;
+ void ContinuousInjection (const amrex::RealBox& injection_box) override;
};
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index d47a7b220..d10390204 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -6,65 +6,16 @@
#include <WarpX.H>
#include <WarpXConst.H>
#include <WarpXWrappers.h>
+#include <FieldGather.H>
+#include <WarpXAlgorithmSelection.H>
-using namespace amrex;
-
-long PhysicalParticleContainer::
-NumParticlesToAdd(const Box& overlap_box, const RealBox& overlap_realbox,
- const RealBox& tile_realbox, const RealBox& particle_real_box)
-{
- const int lev = 0;
- const Geometry& geom = Geom(lev);
- int num_ppc = plasma_injector->num_particles_per_cell;
- const Real* dx = geom.CellSize();
+// Import low-level single-particle kernels
+#include <UpdatePosition.H>
+#include <UpdateMomentumBoris.H>
+#include <UpdateMomentumVay.H>
- long np = 0;
- const auto& overlap_corner = overlap_realbox.lo();
- for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv))
- {
- int fac;
- if (do_continuous_injection) {
-#if ( AMREX_SPACEDIM == 3 )
- Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
- Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
- Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
-#elif ( AMREX_SPACEDIM == 2 )
- Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
- Real y = 0;
- Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
-#endif
- fac = GetRefineFac(x, y, z);
- } else {
- fac = 1.0;
- }
-
- int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
- for (int i_part=0; i_part<ref_num_ppc;i_part++) {
- std::array<Real, 3> r;
- plasma_injector->getPositionUnitBox(r, i_part, fac);
-#if ( AMREX_SPACEDIM == 3 )
- Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
- Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
- Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
-#elif ( AMREX_SPACEDIM == 2 )
- Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
- Real y = 0;
- Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
-#endif
- // If the new particle is not inside the tile box,
- // go to the next generated particle.
-#if ( AMREX_SPACEDIM == 3 )
- if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
-#elif ( AMREX_SPACEDIM == 2 )
- if(!tile_realbox.contains( RealVect{x, z} )) continue;
-#endif
- ++np;
- }
- }
-
- return np;
-}
+using namespace amrex;
PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int ispecies,
const std::string& name)
@@ -127,9 +78,7 @@ PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core)
void PhysicalParticleContainer::InitData()
{
AddParticles(0); // Note - add on level 0
- if (maxLevel() > 0) {
- Redistribute(); // We then redistribute
- }
+ Redistribute(); // We then redistribute
}
void PhysicalParticleContainer::MapParticletoBoostedFrame(Real& x, Real& y, Real& z, std::array<Real, 3>& u)
@@ -193,45 +142,36 @@ PhysicalParticleContainer::AddGaussianBeam(Real x_m, Real y_m, Real z_m,
std::normal_distribution<double> distz(z_m, z_rms);
if (ParallelDescriptor::IOProcessor()) {
- std::array<Real, 3> u;
- Real weight;
// If do_symmetrize, create 4x fewer particles, and
// Replicate each particle 4 times (x,y) (-x,y) (x,-y) (-x,-y)
if (do_symmetrize){
npart /= 4;
}
for (long i = 0; i < npart; ++i) {
-#if ( AMREX_SPACEDIM == 3 | WARPX_RZ)
- weight = q_tot/npart/charge;
+#if ( AMREX_SPACEDIM == 3 | WARPX_DIM_RZ)
+ Real weight = q_tot/npart/charge;
Real x = distx(mt);
Real y = disty(mt);
Real z = distz(mt);
#elif ( AMREX_SPACEDIM == 2 )
- weight = q_tot/npart/charge/y_rms;
+ Real weight = q_tot/npart/charge/y_rms;
Real x = distx(mt);
Real y = 0.;
Real z = distz(mt);
#endif
if (plasma_injector->insideBounds(x, y, z)) {
- plasma_injector->getMomentum(u, x, y, z);
+ XDim3 u = plasma_injector->getMomentum(x, y, z);
+ u.x *= PhysConst::c;
+ u.y *= PhysConst::c;
+ u.z *= PhysConst::c;
if (do_symmetrize){
- std::array<Real, 3> u_tmp;
- Real x_tmp, y_tmp;
// Add four particles to the beam:
- // (x,ux,y,uy) (-x,-ux,y,uy) (x,ux,-y,-uy) (-x,-ux,-y,-uy)
- for (int ix=0; ix<2; ix++){
- for (int iy=0; iy<2; iy++){
- u_tmp = u;
- x_tmp = x*std::pow(-1,ix);
- u_tmp[0] *= std::pow(-1,ix);
- y_tmp = y*std::pow(-1,iy);
- u_tmp[1] *= std::pow(-1,iy);
- CheckAndAddParticle(x_tmp, y_tmp, z,
- u_tmp, weight/4);
- }
- }
+ CheckAndAddParticle( x, y, z, { u.x, u.y, u.z}, weight/4. );
+ CheckAndAddParticle( x,-y, z, { u.x,-u.y, u.z}, weight/4. );
+ CheckAndAddParticle(-x, y, z, {-u.x, u.y, u.z}, weight/4. );
+ CheckAndAddParticle(-x,-y, z, {-u.x,-u.y, u.z}, weight/4. );
} else {
- CheckAndAddParticle(x, y, z, u, weight);
+ CheckAndAddParticle(x, y, z, {u.x,u.y,u.z}, weight);
}
}
}
@@ -322,28 +262,19 @@ PhysicalParticleContainer::AddParticles (int lev)
void
PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
{
-#ifdef AMREX_USE_GPU
- AddPlasmaGPU(lev, part_realbox);
-#else
- AddPlasmaCPU(lev, part_realbox);
-#endif
-}
-
-void
-PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox)
-{
- BL_PROFILE("PhysicalParticleContainer::AddPlasmaCPU");
+ BL_PROFILE("PhysicalParticleContainer::AddPlasma");
// If no part_realbox is provided, initialize particles in the whole domain
const Geometry& geom = Geom(lev);
if (!part_realbox.ok()) part_realbox = geom.ProbDomain();
int num_ppc = plasma_injector->num_particles_per_cell;
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
Real rmax = std::min(plasma_injector->xmax, part_realbox.hi(0));
#endif
- const Real* dx = geom.CellSize();
+ const auto dx = geom.CellSizeArray();
+ const auto problo = geom.ProbLoArray();
Real scale_fac;
#if AMREX_SPACEDIM==3
@@ -358,490 +289,341 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox)
const int grid_id = mfi.index();
const int tile_id = mfi.LocalTileIndex();
GetParticles(lev)[std::make_pair(grid_id, tile_id)];
+ if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) {
+ DefineAndReturnParticleTile(lev, grid_id, tile_id);
+ }
}
#endif
MultiFab* cost = WarpX::getCosts(lev);
- if ( (not m_refined_injection_mask) and WarpX::do_moving_window)
+ const int nlevs = numLevels();
+ static bool refine_injection = false;
+ static Box fine_injection_box;
+ static int rrfac = 1;
+ // This does not work if the mesh is dynamic. But in that case, we should
+ // not use refined injected either. We also assume there is only one fine level.
+ if (WarpX::do_moving_window and WarpX::refine_plasma
+ and do_continuous_injection and nlevs == 2)
{
- Box mask_box = geom.Domain();
- mask_box.setSmall(WarpX::moving_window_dir, 0);
- mask_box.setBig(WarpX::moving_window_dir, 0);
- m_refined_injection_mask.reset( new IArrayBox(mask_box));
- m_refined_injection_mask->setVal(-1);
+ refine_injection = true;
+ fine_injection_box = ParticleBoxArray(1).minimalBox();
+ fine_injection_box.setSmall(WarpX::moving_window_dir, std::numeric_limits<int>::lowest());
+ fine_injection_box.setBig(WarpX::moving_window_dir, std::numeric_limits<int>::max());
+ rrfac = m_gdb->refRatio(0)[0];
+ fine_injection_box.coarsen(rrfac);
}
+ InjectorPosition* inj_pos = plasma_injector->getInjectorPosition();
+ InjectorDensity* inj_rho = plasma_injector->getInjectorDensity();
+ InjectorMomentum* inj_mom = plasma_injector->getInjectorMomentum();
+ Real gamma_boost = WarpX::gamma_boost;
+ Real beta_boost = WarpX::beta_boost;
+ Real t = WarpX::GetInstance().gett_new(lev);
+ Real density_min = plasma_injector->density_min;
+ Real density_max = plasma_injector->density_max;
+
+#ifdef WARPX_DIM_RZ
+ bool radially_weighted = plasma_injector->radially_weighted;
+#endif
+
MFItInfo info;
- if (do_tiling) {
+ if (do_tiling && Gpu::notInLaunchRegion()) {
info.EnableTiling(tile_size);
}
- info.SetDynamic(true);
-
#ifdef _OPENMP
+ info.SetDynamic(true);
#pragma omp parallel if (not WarpX::serialize_ics)
#endif
+ for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi)
{
- std::array<Real,PIdx::nattribs> attribs;
- attribs.fill(0.0);
-
- // Loop through the tiles
- for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) {
-
- Real wt = amrex::second();
-
- const Box& tile_box = mfi.tilebox();
- const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev);
-
- // Find the cells of part_box that overlap with tile_realbox
- // If there is no overlap, just go to the next tile in the loop
- RealBox overlap_realbox;
- Box overlap_box;
- Real ncells_adjust;
- bool no_overlap = 0;
-
- for (int dir=0; dir<AMREX_SPACEDIM; dir++) {
- if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) {
- ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] );
- overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]);
- } else {
- no_overlap = 1; break;
- }
- if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) {
- ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] );
- overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]);
- } else {
- no_overlap = 1; break;
- }
- // Count the number of cells in this direction in overlap_realbox
- overlap_box.setSmall( dir, 0 );
- overlap_box.setBig( dir,
- int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
+ Real wt = amrex::second();
+
+ const Box& tile_box = mfi.tilebox();
+ const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev);
+
+ // Find the cells of part_box that overlap with tile_realbox
+ // If there is no overlap, just go to the next tile in the loop
+ RealBox overlap_realbox;
+ Box overlap_box;
+ IntVect shifted;
+ bool no_overlap = false;
+
+ for (int dir=0; dir<AMREX_SPACEDIM; dir++) {
+ if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) {
+ Real ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] );
+ overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]);
+ } else {
+ no_overlap = true; break;
}
- if (no_overlap == 1) {
- continue; // Go to the next tile
+ if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) {
+ Real ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] );
+ overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]);
+ } else {
+ no_overlap = true; break;
}
+ // Count the number of cells in this direction in overlap_realbox
+ overlap_box.setSmall( dir, 0 );
+ overlap_box.setBig( dir,
+ int( std::round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))
+ /dx[dir] )) - 1);
+ shifted[dir] = std::round((overlap_realbox.lo(dir)-problo[dir])/dx[dir]);
+ // shifted is exact in non-moving-window direction. That's all we care.
+ }
+ if (no_overlap == 1) {
+ continue; // Go to the next tile
+ }
- const int grid_id = mfi.index();
- const int tile_id = mfi.LocalTileIndex();
-
- // Loop through the cells of overlap_box and inject
- // the corresponding particles
- const auto& overlap_corner = overlap_realbox.lo();
- for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv))
- {
- int fac;
- if (do_continuous_injection) {
-#if ( AMREX_SPACEDIM == 3 )
- Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
- Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
- Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
-#elif ( AMREX_SPACEDIM == 2 )
- Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
- Real y = 0;
- Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
-#endif
- fac = GetRefineFac(x, y, z);
- } else {
- fac = 1.0;
- }
-
- int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
- for (int i_part=0; i_part<ref_num_ppc;i_part++) {
- std::array<Real, 3> r;
- plasma_injector->getPositionUnitBox(r, i_part, fac);
-#if ( AMREX_SPACEDIM == 3 )
- Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
- Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
- Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
-#elif ( AMREX_SPACEDIM == 2 )
- Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
- Real y = 0;
- Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
-#endif
- // If the new particle is not inside the tile box,
- // go to the next generated particle.
-#if ( AMREX_SPACEDIM == 3 )
- if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
-#elif ( AMREX_SPACEDIM == 2 )
- if(!tile_realbox.contains( RealVect{x, z} )) continue;
-#endif
+ const int grid_id = mfi.index();
+ const int tile_id = mfi.LocalTileIndex();
- // Save the x and y values to use in the insideBounds checks.
- // This is needed with WARPX_RZ since x and y are modified.
- Real xb = x;
- Real yb = y;
-
-#ifdef WARPX_RZ
- // Replace the x and y, choosing the angle randomly.
- // These x and y are used to get the momentum and density
- Real theta = 2.*MathConst::pi*amrex::Random();
- y = x*std::sin(theta);
- x = x*std::cos(theta);
-#endif
+ // Max number of new particles, if particles are created in the whole
+ // overlap_box. All of them are created, and invalid ones are then
+ // discaded
+ int max_new_particles = overlap_box.numPts() * num_ppc;
- Real dens;
- std::array<Real, 3> u;
- if (WarpX::gamma_boost == 1.){
- // Lab-frame simulation
- // If the particle is not within the species's
- // xmin, xmax, ymin, ymax, zmin, zmax, go to
- // the next generated particle.
- if (!plasma_injector->insideBounds(xb, yb, z)) continue;
- plasma_injector->getMomentum(u, x, y, z);
- dens = plasma_injector->getDensity(x, y, z);
- } else {
- // Boosted-frame simulation
- Real c = PhysConst::c;
- Real gamma_boost = WarpX::gamma_boost;
- Real beta_boost = WarpX::beta_boost;
- // Since the user provides the density distribution
- // at t_lab=0 and in the lab-frame coordinates,
- // we need to find the lab-frame position of this
- // particle at t_lab=0, from its boosted-frame coordinates
- // Assuming ballistic motion, this is given by:
- // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
- // where betaz_lab is the speed of the particle in the lab frame
- //
- // In order for this equation to be solvable, betaz_lab
- // is explicitly assumed to have no dependency on z0_lab
- plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
- // At this point u is the lab-frame momentum
- // => Apply the above formula for z0_lab
- Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
- Real betaz_lab = u[2]/gamma_lab/c;
- Real t = WarpX::GetInstance().gett_new(lev);
- Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
- // If the particle is not within the lab-frame zmin, zmax, etc.
- // go to the next generated particle.
- if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue;
- // call `getDensity` with lab-frame parameters
- dens = plasma_injector->getDensity(x, y, z0_lab);
- // At this point u and dens are the lab-frame quantities
- // => Perform Lorentz transform
- dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
- u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
- }
- Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac));
-#ifdef WARPX_RZ
- if (plasma_injector->radially_weighted) {
- weight *= 2*MathConst::pi*xb;
- } else {
- // This is not correct since it might shift the particle
- // out of the local grid
- x = std::sqrt(xb*rmax);
- weight *= dx[0];
- }
-#endif
- attribs[PIdx::w ] = weight;
- attribs[PIdx::ux] = u[0];
- attribs[PIdx::uy] = u[1];
- attribs[PIdx::uz] = u[2];
-
- if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
- {
- auto& particle_tile = DefineAndReturnParticleTile(lev, grid_id, tile_id);
- particle_tile.push_back_real(particle_comps["xold"], x);
- particle_tile.push_back_real(particle_comps["yold"], y);
- particle_tile.push_back_real(particle_comps["zold"], z);
-
- particle_tile.push_back_real(particle_comps["uxold"], u[0]);
- particle_tile.push_back_real(particle_comps["uyold"], u[1]);
- particle_tile.push_back_real(particle_comps["uzold"], u[2]);
- }
-
- AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs);
+ // If refine injection, build pointer dp_cellid that holds pointer to
+ // array of refined cell IDs.
+ Vector<int> cellid_v;
+ if (refine_injection and lev == 0)
+ {
+ // then how many new particles will be injected is not that simple
+ // We have to shift fine_injection_box because overlap_box has been shifted.
+ Box fine_overlap_box = overlap_box & amrex::shift(fine_injection_box,shifted);
+ max_new_particles += fine_overlap_box.numPts() * num_ppc
+ * (AMREX_D_TERM(rrfac,*rrfac,*rrfac)-1);
+ for (int icell = 0, ncells = overlap_box.numPts(); icell < ncells; ++icell) {
+ IntVect iv = overlap_box.atOffset(icell);
+ int r = (fine_overlap_box.contains(iv)) ? AMREX_D_TERM(rrfac,*rrfac,*rrfac) : 1;
+ for (int ipart = 0; ipart < r; ++ipart) {
+ cellid_v.push_back(icell);
+ cellid_v.push_back(ipart);
}
}
+ }
+ int const* hp_cellid = (cellid_v.empty()) ? nullptr : cellid_v.data();
+ amrex::AsyncArray<int> cellid_aa(hp_cellid, cellid_v.size());
+ int const* dp_cellid = cellid_aa.data();
- if (cost) {
- wt = (amrex::second() - wt) / tile_box.d_numPts();
- Array4<Real> const& costarr = cost->array(mfi);
- amrex::ParallelFor(tile_box,
- [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
- {
- costarr(i,j,k) += wt;
- });
- }
+ // Update NextID to include particles created in this function
+ int pid;
+#pragma omp critical (add_plasma_nextid)
+ {
+ pid = ParticleType::NextID();
+ ParticleType::NextID(pid+max_new_particles);
}
- }
-}
+ const int cpuid = ParallelDescriptor::MyProc();
-#ifdef AMREX_USE_GPU
-void
-PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox)
-{
- BL_PROFILE("PhysicalParticleContainer::AddPlasmaGPU");
+ auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)];
+ bool do_boosted = false;
+ if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) {
+ do_boosted = true;
+ DefineAndReturnParticleTile(lev, grid_id, tile_id);
+ }
+ auto old_size = particle_tile.GetArrayOfStructs().size();
+ auto new_size = old_size + max_new_particles;
+ particle_tile.resize(new_size);
+
+ ParticleType* pp = particle_tile.GetArrayOfStructs()().data() + old_size;
+ auto& soa = particle_tile.GetStructOfArrays();
+ GpuArray<Real*,PIdx::nattribs> pa;
+ for (int ia = 0; ia < PIdx::nattribs; ++ia) {
+ pa[ia] = soa.GetRealData(ia).data() + old_size;
+ }
+ GpuArray<Real*,6> pb;
+ if (do_boosted) {
+ pb[0] = soa.GetRealData(particle_comps[ "xold"]).data() + old_size;
+ pb[1] = soa.GetRealData(particle_comps[ "yold"]).data() + old_size;
+ pb[2] = soa.GetRealData(particle_comps[ "zold"]).data() + old_size;
+ pb[3] = soa.GetRealData(particle_comps["uxold"]).data() + old_size;
+ pb[4] = soa.GetRealData(particle_comps["uyold"]).data() + old_size;
+ pb[5] = soa.GetRealData(particle_comps["uzold"]).data() + old_size;
+ }
- // If no part_realbox is provided, initialize particles in the whole domain
- const Geometry& geom = Geom(lev);
- if (!part_realbox.ok()) part_realbox = geom.ProbDomain();
+ const GpuArray<Real,AMREX_SPACEDIM> overlap_corner
+ {AMREX_D_DECL(overlap_realbox.lo(0),
+ overlap_realbox.lo(1),
+ overlap_realbox.lo(2))};
- int num_ppc = plasma_injector->num_particles_per_cell;
-#ifdef WARPX_RZ
- Real rmax = std::min(plasma_injector->xmax, part_realbox.hi(0));
-#endif
+ std::size_t shared_mem_bytes = plasma_injector->sharedMemoryNeeded();
+ int lrrfac = rrfac;
- const Real* dx = geom.CellSize();
+ // Loop over all new particles and inject them (creates too many
+ // particles, in particular does not consider xmin, xmax etc.).
+ // The invalid ones are given negative ID and are deleted during the
+ // next redistribute.
+ amrex::For(max_new_particles, [=] AMREX_GPU_DEVICE (int ip) noexcept
+ {
+ ParticleType& p = pp[ip];
+ p.id() = pid+ip;
+ p.cpu() = cpuid;
+
+ int cellid, i_part;
+ Real fac;
+ if (dp_cellid == nullptr) {
+ cellid = ip/num_ppc;
+ i_part = ip - cellid*num_ppc;
+ fac = 1.0;
+ } else {
+ cellid = dp_cellid[2*ip];
+ i_part = dp_cellid[2*ip+1];
+ fac = lrrfac;
+ }
- Real scale_fac;
-#if AMREX_SPACEDIM==3
- scale_fac = dx[0]*dx[1]*dx[2]/num_ppc;
-#elif AMREX_SPACEDIM==2
- scale_fac = dx[0]*dx[1]/num_ppc;
-#endif
+ IntVect iv = overlap_box.atOffset(cellid);
-#ifdef _OPENMP
- // First touch all tiles in the map in serial
- for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) {
- const int grid_id = mfi.index();
- const int tile_id = mfi.LocalTileIndex();
- GetParticles(lev)[std::make_pair(grid_id, tile_id)];
- }
+ const XDim3 r = inj_pos->getPositionUnitBox(i_part, fac);
+#if (AMREX_SPACEDIM == 3)
+ Real x = overlap_corner[0] + (iv[0]+r.x)*dx[0];
+ Real y = overlap_corner[1] + (iv[1]+r.y)*dx[1];
+ Real z = overlap_corner[2] + (iv[2]+r.z)*dx[2];
+#else
+ Real x = overlap_corner[0] + (iv[0]+r.x)*dx[0];
+ Real y = 0.0;
+ Real z = overlap_corner[1] + (iv[1]+r.y)*dx[1];
#endif
- MultiFab* cost = WarpX::getCosts(lev);
-
- if ( (not m_refined_injection_mask) and WarpX::do_moving_window)
- {
- Box mask_box = geom.Domain();
- mask_box.setSmall(WarpX::moving_window_dir, 0);
- mask_box.setBig(WarpX::moving_window_dir, 0);
- m_refined_injection_mask.reset( new IArrayBox(mask_box));
- m_refined_injection_mask->setVal(-1);
- }
-
- MFItInfo info;
- if (do_tiling) {
- info.EnableTiling(tile_size);
- }
- info.SetDynamic(true);
-
-#ifdef _OPENMP
-#pragma omp parallel if (not WarpX::serialize_ics)
+#if (AMREX_SPACEDIM == 3)
+ if (!tile_realbox.contains(XDim3{x,y,z})) {
+ p.id() = -1;
+ return;
+ }
+#else
+ if (!tile_realbox.contains(XDim3{x,z,0.0})) {
+ p.id() = -1;
+ return;
+ }
#endif
- {
- std::array<Real,PIdx::nattribs> attribs;
- attribs.fill(0.0);
-
- // Loop through the tiles
- for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) {
- Real wt = amrex::second();
-
- const Box& tile_box = mfi.tilebox();
- const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev);
-
- // Find the cells of part_box that overlap with tile_realbox
- // If there is no overlap, just go to the next tile in the loop
- RealBox overlap_realbox;
- Box overlap_box;
- Real ncells_adjust;
- bool no_overlap = 0;
+ // Save the x and y values to use in the insideBounds checks.
+ // This is needed with WARPX_DIM_RZ since x and y are modified.
+ Real xb = x;
+ Real yb = y;
+
+#ifdef WARPX_DIM_RZ
+ // Replace the x and y, choosing the angle randomly.
+ // These x and y are used to get the momentum and density
+ Real theta = 2.*MathConst::pi*amrex::Random();
+ x = xb*std::cos(theta);
+ y = xb*std::sin(theta);
+#endif
- for (int dir=0; dir<AMREX_SPACEDIM; dir++) {
- if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) {
- ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] );
- overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]);
- } else {
- no_overlap = 1; break;
+ Real dens;
+ XDim3 u;
+ if (gamma_boost == 1.) {
+ // Lab-frame simulation
+ // If the particle is not within the species's
+ // xmin, xmax, ymin, ymax, zmin, zmax, go to
+ // the next generated particle.
+ if (!inj_pos->insideBounds(xb, yb, z)) {
+ p.id() = -1;
+ return;
}
- if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) {
- ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] );
- overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]);
- } else {
- no_overlap = 1; break;
+ u = inj_mom->getMomentum(x, y, z);
+ dens = inj_rho->getDensity(x, y, z);
+ // Remove particle if density below threshold
+ if ( dens < density_min ){
+ p.id() = -1;
+ return;
}
- // Count the number of cells in this direction in overlap_realbox
- overlap_box.setSmall( dir, 0 );
- overlap_box.setBig( dir,
- int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
- }
- if (no_overlap == 1) {
- continue; // Go to the next tile
- }
-
- const int grid_id = mfi.index();
- const int tile_id = mfi.LocalTileIndex();
-
- Cuda::HostVector<ParticleType> host_particles;
- std::array<Cuda::HostVector<Real>, PIdx::nattribs> host_attribs;
-
- // Loop through the cells of overlap_box and inject
- // the corresponding particles
- const auto& overlap_corner = overlap_realbox.lo();
- for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv))
- {
- int fac;
- if (do_continuous_injection) {
-#if ( AMREX_SPACEDIM == 3 )
- Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
- Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
- Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
-#elif ( AMREX_SPACEDIM == 2 )
- Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
- Real y = 0;
- Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
-#endif
- fac = GetRefineFac(x, y, z);
- } else {
- fac = 1.0;
+ // Cut density if above threshold
+ dens = amrex::min(dens, density_max);
+ } else {
+ // Boosted-frame simulation
+ // Since the user provides the density distribution
+ // at t_lab=0 and in the lab-frame coordinates,
+ // we need to find the lab-frame position of this
+ // particle at t_lab=0, from its boosted-frame coordinates
+ // Assuming ballistic motion, this is given by:
+ // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
+ // where betaz_lab is the speed of the particle in the lab frame
+ //
+ // In order for this equation to be solvable, betaz_lab
+ // is explicitly assumed to have no dependency on z0_lab
+ u = inj_mom->getMomentum(x, y, 0.); // No z0_lab dependency
+ // At this point u is the lab-frame momentum
+ // => Apply the above formula for z0_lab
+ Real gamma_lab = std::sqrt( 1.+(u.x*u.x+u.y*u.y+u.z*u.z) );
+ Real betaz_lab = u.z/(gamma_lab);
+ Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab)
+ - PhysConst::c*t*(betaz_lab-beta_boost) );
+ // If the particle is not within the lab-frame zmin, zmax, etc.
+ // go to the next generated particle.
+ if (!inj_pos->insideBounds(xb, yb, z0_lab)) {
+ p.id() = -1;
+ return;
}
+ // call `getDensity` with lab-frame parameters
+ dens = inj_rho->getDensity(x, y, z0_lab);
+ // Remove particle if density below threshold
+ if ( dens < density_min ){
+ p.id() = -1;
+ return;
+ }
+ // Cut density if above threshold
+ dens = amrex::min(dens, density_max);
+ // At this point u and dens are the lab-frame quantities
+ // => Perform Lorentz transform
+ dens = gamma_boost * dens * ( 1.0 - beta_boost*betaz_lab );
+ u.z = gamma_boost * ( u.z -beta_boost*gamma_lab );
+ }
- int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
- for (int i_part=0; i_part<ref_num_ppc;i_part++) {
- std::array<Real, 3> r;
- plasma_injector->getPositionUnitBox(r, i_part, fac);
-#if ( AMREX_SPACEDIM == 3 )
- Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
- Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
- Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
-#elif ( AMREX_SPACEDIM == 2 )
- Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
- Real y = 0;
- Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
-#endif
- // If the new particle is not inside the tile box,
- // go to the next generated particle.
-#if ( AMREX_SPACEDIM == 3 )
- if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
-#elif ( AMREX_SPACEDIM == 2 )
- if(!tile_realbox.contains( RealVect{x, z} )) continue;
-#endif
-
- // Save the x and y values to use in the insideBounds checks.
- // This is needed with WARPX_RZ since x and y are modified.
- Real xb = x;
- Real yb = y;
-
-#ifdef WARPX_RZ
- // Replace the x and y, choosing the angle randomly.
- // These x and y are used to get the momentum and density
- Real theta = 2.*MathConst::pi*amrex::Random();
- x = xb*std::cos(theta);
- y = xb*std::sin(theta);
-#endif
+ u.x *= PhysConst::c;
+ u.y *= PhysConst::c;
+ u.z *= PhysConst::c;
- Real dens;
- std::array<Real, 3> u;
- if (WarpX::gamma_boost == 1.){
- // Lab-frame simulation
- // If the particle is not within the species's
- // xmin, xmax, ymin, ymax, zmin, zmax, go to
- // the next generated particle.
- if (!plasma_injector->insideBounds(xb, yb, z)) continue;
- plasma_injector->getMomentum(u, x, y, z);
- dens = plasma_injector->getDensity(x, y, z);
- } else {
- // Boosted-frame simulation
- Real c = PhysConst::c;
- Real gamma_boost = WarpX::gamma_boost;
- Real beta_boost = WarpX::beta_boost;
- // Since the user provides the density distribution
- // at t_lab=0 and in the lab-frame coordinates,
- // we need to find the lab-frame position of this
- // particle at t_lab=0, from its boosted-frame coordinates
- // Assuming ballistic motion, this is given by:
- // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
- // where betaz_lab is the speed of the particle in the lab frame
- //
- // In order for this equation to be solvable, betaz_lab
- // is explicitly assumed to have no dependency on z0_lab
- plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
- // At this point u is the lab-frame momentum
- // => Apply the above formula for z0_lab
- Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
- Real betaz_lab = u[2]/gamma_lab/c;
- Real t = WarpX::GetInstance().gett_new(lev);
- Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
- // If the particle is not within the lab-frame zmin, zmax, etc.
- // go to the next generated particle.
- if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue;
- // call `getDensity` with lab-frame parameters
- dens = plasma_injector->getDensity(x, y, z0_lab);
- // At this point u and dens are the lab-frame quantities
- // => Perform Lorentz transform
- dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
- u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
- }
- Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac));
-#ifdef WARPX_RZ
- if (plasma_injector->radially_weighted) {
- weight *= 2*MathConst::pi*xb;
- } else {
- // This is not correct since it might shift the particle
- // out of the local grid
- x = std::sqrt(xb*rmax);
- weight *= dx[0];
- }
+ // Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac));
+ Real weight = dens * scale_fac;
+#ifdef WARPX_DIM_RZ
+ if (radially_weighted) {
+ weight *= 2.*MathConst::pi*xb;
+ } else {
+ // This is not correct since it might shift the particle
+ // out of the local grid
+ x = std::sqrt(xb*rmax);
+ weight *= dx[0];
+ }
#endif
- attribs[PIdx::w ] = weight;
- attribs[PIdx::ux] = u[0];
- attribs[PIdx::uy] = u[1];
- attribs[PIdx::uz] = u[2];
-
- // note - this will be slow on the GPU, need to revisit
- if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
- {
- auto& particle_tile = DefineAndReturnParticleTile(lev, grid_id, tile_id);
- particle_tile.push_back_real(particle_comps["xold"], x);
- particle_tile.push_back_real(particle_comps["yold"], y);
- particle_tile.push_back_real(particle_comps["zold"], z);
-
- particle_tile.push_back_real(particle_comps["uxold"], u[0]);
- particle_tile.push_back_real(particle_comps["uyold"], u[1]);
- particle_tile.push_back_real(particle_comps["uzold"], u[2]);
- }
+ pa[PIdx::w ][ip] = weight;
+ pa[PIdx::ux][ip] = u.x;
+ pa[PIdx::uy][ip] = u.y;
+ pa[PIdx::uz][ip] = u.z;
+
+ if (do_boosted) {
+ pb[0][ip] = x;
+ pb[1][ip] = y;
+ pb[2][ip] = z;
+ pb[3][ip] = u.x;
+ pb[4][ip] = u.y;
+ pb[5][ip] = u.z;
+ }
- ParticleType p;
- p.id() = ParticleType::NextID();
- p.cpu() = ParallelDescriptor::MyProc();
#if (AMREX_SPACEDIM == 3)
- p.pos(0) = x;
- p.pos(1) = y;
- p.pos(2) = z;
+ p.pos(0) = x;
+ p.pos(1) = y;
+ p.pos(2) = z;
#elif (AMREX_SPACEDIM == 2)
-#ifdef WARPX_RZ
- attribs[PIdx::theta] = theta;
+#ifdef WARPX_DIM_RZ
+ pa[PIdx::theta][ip] = theta;
#endif
- p.pos(0) = xb;
- p.pos(1) = z;
+ p.pos(0) = xb;
+ p.pos(1) = z;
#endif
-
- host_particles.push_back(p);
- for (int kk = 0; kk < PIdx::nattribs; ++kk)
- host_attribs[kk].push_back(attribs[kk]);
- }
- }
-
- auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)];
- auto old_size = particle_tile.GetArrayOfStructs().size();
- auto new_size = old_size + host_particles.size();
- particle_tile.resize(new_size);
-
- Cuda::thrust_copy(host_particles.begin(),
- host_particles.end(),
- particle_tile.GetArrayOfStructs().begin() + old_size);
-
- for (int kk = 0; kk < PIdx::nattribs; ++kk) {
- Cuda::thrust_copy(host_attribs[kk].begin(),
- host_attribs[kk].end(),
- particle_tile.GetStructOfArrays().GetRealData(kk).begin() + old_size);
- }
-
- if (cost) {
- wt = (amrex::second() - wt) / tile_box.d_numPts();
- Array4<Real> const& costarr = cost->array(mfi);
- amrex::ParallelFor(tile_box,
- [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
- {
- costarr(i,j,k) += wt;
- });
- }
- }
+ }, shared_mem_bytes);
+
+ if (cost) {
+ wt = (amrex::second() - wt) / tile_box.d_numPts();
+ Array4<Real> const& costarr = cost->array(mfi);
+ amrex::ParallelFor(tile_box,
+ [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+ {
+ costarr(i,j,k) += wt;
+ });
+ }
}
+
+ // The function that calls this is responsible for redistributing particles.
}
-#endif
#ifdef WARPX_DO_ELECTROSTATIC
void
@@ -1066,11 +848,14 @@ PhysicalParticleContainer::FieldGather (int lev,
MultiFab* cost = WarpX::getCosts(lev);
#ifdef _OPENMP
-#pragma omp parallel
+#pragma omp parallel
#endif
{
- Cuda::ManagedDeviceVector<Real> xp, yp, zp;
-
+#ifdef _OPENMP
+ int thread_num = omp_get_thread_num();
+#else
+ int thread_num = 0;
+#endif
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
Real wt = amrex::second();
@@ -1106,35 +891,15 @@ PhysicalParticleContainer::FieldGather (int lev,
//
// copy data from particle container to temp arrays
//
- pti.GetPosition(xp, yp, zp);
-
- const std::array<Real,3>& xyzmin = WarpX::LowerCorner(box, lev);
- const int* ixyzmin = box.loVect();
+ pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
//
// Field Gather
//
- const int ll4symtry = false;
- long lvect_fieldgathe = 64;
- warpx_geteb_energy_conserving(
- &np,
- xp.dataPtr(),
- yp.dataPtr(),
- zp.dataPtr(),
- Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
- Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
- ixyzmin,
- &xyzmin[0], &xyzmin[1], &xyzmin[2],
- &dx[0], &dx[1], &dx[2],
- &WarpX::nox, &WarpX::noy, &WarpX::noz,
- BL_TO_FORTRAN_ANYD(exfab),
- BL_TO_FORTRAN_ANYD(eyfab),
- BL_TO_FORTRAN_ANYD(ezfab),
- BL_TO_FORTRAN_ANYD(bxfab),
- BL_TO_FORTRAN_ANYD(byfab),
- BL_TO_FORTRAN_ANYD(bzfab),
- &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
- &lvect_fieldgathe, &WarpX::field_gathering_algo);
+ int e_is_nodal = Ex.is_nodal() and Ey.is_nodal() and Ez.is_nodal();
+ FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp,
+ &exfab, &eyfab, &ezfab, &bxfab, &byfab, &bzfab,
+ Ex.nGrow(), e_is_nodal, 0, np, thread_num, lev, lev);
if (cost) {
const Box& tbx = pti.tilebox();
@@ -1164,7 +929,7 @@ PhysicalParticleContainer::Evolve (int lev,
BL_PROFILE("PPC::Evolve()");
BL_PROFILE_VAR_NS("PPC::Evolve::Copy", blp_copy);
BL_PROFILE_VAR_NS("PICSAR::FieldGather", blp_pxr_fg);
- BL_PROFILE_VAR_NS("PICSAR::ParticlePush", blp_pxr_pp);
+ BL_PROFILE_VAR_NS("PPC::ParticlePush", blp_ppc_pp);
BL_PROFILE_VAR_NS("PPC::Evolve::partition", blp_partition);
const std::array<Real,3>& dx = WarpX::CellSize(lev);
@@ -1391,57 +1156,40 @@ PhysicalParticleContainer::Evolve (int lev,
pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
BL_PROFILE_VAR_STOP(blp_copy);
- if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev);
+ if (rho) {
+ DepositCharge(pti, wp, rho, 0, 0, np_current, thread_num, lev, lev);
+ if (has_buffer){
+ DepositCharge(pti, wp, crho, 0, np_current, np-np_current, thread_num, lev, lev-1);
+ }
+ }
if (! do_not_push)
{
+ const long np_gather = (cEx) ? nfine_gather : np;
+
+ int e_is_nodal = Ex.is_nodal() and Ey.is_nodal() and Ez.is_nodal();
+
//
// Field Gather of Aux Data (i.e., the full solution)
//
- const int ll4symtry = false;
- long lvect_fieldgathe = 64;
-
- const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
- const int* ixyzmin_grid = box.loVect();
-
- const long np_gather = (cEx) ? nfine_gather : np;
-
BL_PROFILE_VAR_START(blp_pxr_fg);
-
- warpx_geteb_energy_conserving(
- &np_gather,
- m_xp[thread_num].dataPtr(),
- m_yp[thread_num].dataPtr(),
- m_zp[thread_num].dataPtr(),
- Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
- Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
- ixyzmin_grid,
- &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2],
- &dx[0], &dx[1], &dx[2],
- &WarpX::nox, &WarpX::noy, &WarpX::noz,
- BL_TO_FORTRAN_ANYD(*exfab),
- BL_TO_FORTRAN_ANYD(*eyfab),
- BL_TO_FORTRAN_ANYD(*ezfab),
- BL_TO_FORTRAN_ANYD(*bxfab),
- BL_TO_FORTRAN_ANYD(*byfab),
- BL_TO_FORTRAN_ANYD(*bzfab),
- &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
- &lvect_fieldgathe, &WarpX::field_gathering_algo);
+ FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp,
+ exfab, eyfab, ezfab, bxfab, byfab, bzfab,
+ Ex.nGrow(), e_is_nodal, 0, np_gather, thread_num, lev, lev);
if (np_gather < np)
{
const IntVect& ref_ratio = WarpX::RefRatio(lev-1);
const Box& cbox = amrex::coarsen(box,ref_ratio);
- const std::array<Real,3>& cxyzmin_grid = WarpX::LowerCorner(cbox, lev-1);
- const int* cixyzmin_grid = cbox.loVect();
-
- const FArrayBox* cexfab = &(*cEx)[pti];
- const FArrayBox* ceyfab = &(*cEy)[pti];
- const FArrayBox* cezfab = &(*cEz)[pti];
- const FArrayBox* cbxfab = &(*cBx)[pti];
- const FArrayBox* cbyfab = &(*cBy)[pti];
- const FArrayBox* cbzfab = &(*cBz)[pti];
+ // Data on the grid
+ FArrayBox const* cexfab = &(*cEx)[pti];
+ FArrayBox const* ceyfab = &(*cEy)[pti];
+ FArrayBox const* cezfab = &(*cEz)[pti];
+ FArrayBox const* cbxfab = &(*cBx)[pti];
+ FArrayBox const* cbyfab = &(*cBy)[pti];
+ FArrayBox const* cbzfab = &(*cBz)[pti];
+
if (WarpX::use_fdtd_nci_corr)
{
#if (AMREX_SPACEDIM == 2)
@@ -1494,26 +1242,14 @@ PhysicalParticleContainer::Evolve (int lev,
#endif
}
- long ncrse = np - nfine_gather;
- warpx_geteb_energy_conserving(
- &ncrse,
- m_xp[thread_num].dataPtr()+nfine_gather,
- m_yp[thread_num].dataPtr()+nfine_gather,
- m_zp[thread_num].dataPtr()+nfine_gather,
- Exp.dataPtr()+nfine_gather, Eyp.dataPtr()+nfine_gather, Ezp.dataPtr()+nfine_gather,
- Bxp.dataPtr()+nfine_gather, Byp.dataPtr()+nfine_gather, Bzp.dataPtr()+nfine_gather,
- cixyzmin_grid,
- &cxyzmin_grid[0], &cxyzmin_grid[1], &cxyzmin_grid[2],
- &cdx[0], &cdx[1], &cdx[2],
- &WarpX::nox, &WarpX::noy, &WarpX::noz,
- BL_TO_FORTRAN_ANYD(*cexfab),
- BL_TO_FORTRAN_ANYD(*ceyfab),
- BL_TO_FORTRAN_ANYD(*cezfab),
- BL_TO_FORTRAN_ANYD(*cbxfab),
- BL_TO_FORTRAN_ANYD(*cbyfab),
- BL_TO_FORTRAN_ANYD(*cbzfab),
- &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
- &lvect_fieldgathe, &WarpX::field_gathering_algo);
+ // Field gather for particles in gather buffers
+ e_is_nodal = cEx->is_nodal() and cEy->is_nodal() and cEz->is_nodal();
+ FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp,
+ cexfab, ceyfab, cezfab,
+ cbxfab, cbyfab, cbzfab,
+ cEx->nGrow(), e_is_nodal,
+ nfine_gather, np-nfine_gather,
+ thread_num, lev, lev-1);
}
BL_PROFILE_VAR_STOP(blp_pxr_fg);
@@ -1521,10 +1257,10 @@ PhysicalParticleContainer::Evolve (int lev,
//
// Particle Push
//
- BL_PROFILE_VAR_START(blp_pxr_pp);
+ BL_PROFILE_VAR_START(blp_ppc_pp);
PushPX(pti, m_xp[thread_num], m_yp[thread_num], m_zp[thread_num],
m_giv[thread_num], dt);
- BL_PROFILE_VAR_STOP(blp_pxr_pp);
+ BL_PROFILE_VAR_STOP(blp_ppc_pp);
//
// Current Deposition
@@ -1561,7 +1297,12 @@ PhysicalParticleContainer::Evolve (int lev,
BL_PROFILE_VAR_STOP(blp_copy);
}
- if (rho) DepositCharge(pti, wp, rho, crho, 1, np_current, np, thread_num, lev);
+ if (rho) {
+ DepositCharge(pti, wp, rho, 1, 0, np_current, thread_num, lev, lev);
+ if (has_buffer){
+ DepositCharge(pti, wp, crho, 1, np_current, np-np_current, thread_num, lev, lev-1);
+ }
+ }
if (cost) {
const Box& tbx = pti.tilebox();
@@ -1742,36 +1483,52 @@ PhysicalParticleContainer::PushPX(WarpXParIter& pti,
Real dt)
{
+ // This wraps the momentum and position advance so that inheritors can modify the call.
+ auto& attribs = pti.GetAttribs();
+ // Extract pointers to the different particle quantities
+ Real* const AMREX_RESTRICT x = xp.dataPtr();
+ Real* const AMREX_RESTRICT y = yp.dataPtr();
+ Real* const AMREX_RESTRICT z = zp.dataPtr();
+ Real* const AMREX_RESTRICT gi = giv.dataPtr();
+ Real* const AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
+ Real* const AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
+ Real* const AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
+ const Real* const AMREX_RESTRICT Ex = attribs[PIdx::Ex].dataPtr();
+ const Real* const AMREX_RESTRICT Ey = attribs[PIdx::Ey].dataPtr();
+ const Real* const AMREX_RESTRICT Ez = attribs[PIdx::Ez].dataPtr();
+ const Real* const AMREX_RESTRICT Bx = attribs[PIdx::Bx].dataPtr();
+ const Real* const AMREX_RESTRICT By = attribs[PIdx::By].dataPtr();
+ const Real* const AMREX_RESTRICT Bz = attribs[PIdx::Bz].dataPtr();
+
if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
{
- copy_attribs(pti, xp.dataPtr(), yp.dataPtr(), zp.dataPtr());
+ copy_attribs(pti, x, y, z);
}
- // The following attributes should be included in CPP version of warpx_particle_pusher
- // This wraps the call to warpx_particle_pusher so that inheritors can modify the call.
- auto& attribs = pti.GetAttribs();
- auto& uxp = attribs[PIdx::ux];
- auto& uyp = attribs[PIdx::uy];
- auto& uzp = attribs[PIdx::uz];
- auto& Exp = attribs[PIdx::Ex];
- auto& Eyp = attribs[PIdx::Ey];
- auto& Ezp = attribs[PIdx::Ez];
- auto& Bxp = attribs[PIdx::Bx];
- auto& Byp = attribs[PIdx::By];
- auto& Bzp = attribs[PIdx::Bz];
- const long np = pti.numParticles();
-
- warpx_particle_pusher(&np,
- xp.dataPtr(),
- yp.dataPtr(),
- zp.dataPtr(),
- uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
- giv.dataPtr(),
- Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
- Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
- &this->charge, &this->mass, &dt,
- &WarpX::particle_pusher_algo);
-
+ // Loop over the particles and update their momentum
+ const Real q = this->charge;
+ const Real m = this-> mass;
+ if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Boris){
+ amrex::ParallelFor( pti.numParticles(),
+ [=] AMREX_GPU_DEVICE (long i) {
+ UpdateMomentumBoris( ux[i], uy[i], uz[i], gi[i],
+ Ex[i], Ey[i], Ez[i], Bx[i], By[i], Bz[i], q, m, dt);
+ UpdatePosition( x[i], y[i], z[i],
+ ux[i], uy[i], uz[i], dt );
+ }
+ );
+ } else if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Vay) {
+ amrex::ParallelFor( pti.numParticles(),
+ [=] AMREX_GPU_DEVICE (long i) {
+ UpdateMomentumVay( ux[i], uy[i], uz[i], gi[i],
+ Ex[i], Ey[i], Ez[i], Bx[i], By[i], Bz[i], q, m, dt);
+ UpdatePosition( x[i], y[i], z[i],
+ ux[i], uy[i], uz[i], dt );
+ }
+ );
+ } else {
+ amrex::Abort("Unknown particle pusher");
+ };
}
void
@@ -1800,9 +1557,6 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
auto& attribs = pti.GetAttribs();
- auto& uxp = attribs[PIdx::ux];
- auto& uyp = attribs[PIdx::uy];
- auto& uzp = attribs[PIdx::uz];
auto& Exp = attribs[PIdx::Ex];
auto& Eyp = attribs[PIdx::Ey];
auto& Ezp = attribs[PIdx::Ez];
@@ -1834,42 +1588,44 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
//
pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
- const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
- const int* ixyzmin_grid = box.loVect();
-
- const int ll4symtry = false;
- long lvect_fieldgathe = 64;
-
- warpx_geteb_energy_conserving(
- &np,
- m_xp[thread_num].dataPtr(),
- m_yp[thread_num].dataPtr(),
- m_zp[thread_num].dataPtr(),
- Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
- Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
- ixyzmin_grid,
- &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2],
- &dx[0], &dx[1], &dx[2],
- &WarpX::nox, &WarpX::noy, &WarpX::noz,
- BL_TO_FORTRAN_ANYD(exfab),
- BL_TO_FORTRAN_ANYD(eyfab),
- BL_TO_FORTRAN_ANYD(ezfab),
- BL_TO_FORTRAN_ANYD(bxfab),
- BL_TO_FORTRAN_ANYD(byfab),
- BL_TO_FORTRAN_ANYD(bzfab),
- &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
- &lvect_fieldgathe, &WarpX::field_gathering_algo);
-
- warpx_particle_pusher_momenta(&np,
- m_xp[thread_num].dataPtr(),
- m_yp[thread_num].dataPtr(),
- m_zp[thread_num].dataPtr(),
- uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
- m_giv[thread_num].dataPtr(),
- Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
- Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
- &this->charge, &this->mass, &dt,
- &WarpX::particle_pusher_algo);
+ int e_is_nodal = Ex.is_nodal() and Ey.is_nodal() and Ez.is_nodal();
+ FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp,
+ &exfab, &eyfab, &ezfab, &bxfab, &byfab, &bzfab,
+ Ex.nGrow(), e_is_nodal, 0, np, thread_num, lev, lev);
+
+ // This wraps the momentum advance so that inheritors can modify the call.
+ // Extract pointers to the different particle quantities
+ Real* const AMREX_RESTRICT gi = m_giv[thread_num].dataPtr();
+ Real* const AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
+ Real* const AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
+ Real* const AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
+ const Real* const AMREX_RESTRICT Expp = Exp.dataPtr();
+ const Real* const AMREX_RESTRICT Eypp = Eyp.dataPtr();
+ const Real* const AMREX_RESTRICT Ezpp = Ezp.dataPtr();
+ const Real* const AMREX_RESTRICT Bxpp = Bxp.dataPtr();
+ const Real* const AMREX_RESTRICT Bypp = Byp.dataPtr();
+ const Real* const AMREX_RESTRICT Bzpp = Bzp.dataPtr();
+
+ // Loop over the particles and update their momentum
+ const Real q = this->charge;
+ const Real m = this-> mass;
+ if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Boris){
+ amrex::ParallelFor( pti.numParticles(),
+ [=] AMREX_GPU_DEVICE (long i) {
+ UpdateMomentumBoris( ux[i], uy[i], uz[i], gi[i],
+ Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt);
+ }
+ );
+ } else if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Vay) {
+ amrex::ParallelFor( pti.numParticles(),
+ [=] AMREX_GPU_DEVICE (long i) {
+ UpdateMomentumVay( ux[i], uy[i], uz[i], gi[i],
+ Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt);
+ }
+ );
+ } else {
+ amrex::Abort("Unknown particle pusher");
+ };
}
}
}
@@ -2034,74 +1790,6 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real
}
}
-int PhysicalParticleContainer::GetRefineFac(const Real x, const Real y, const Real z)
-{
- if (finestLevel() == 0) return 1;
- if (not WarpX::refine_plasma) return 1;
-
- IntVect iv;
- const Geometry& geom = Geom(0);
-
- std::array<Real, 3> offset;
-
-#if ( AMREX_SPACEDIM == 3)
- offset[0] = geom.ProbLo(0);
- offset[1] = geom.ProbLo(1);
- offset[2] = geom.ProbLo(2);
-#elif ( AMREX_SPACEDIM == 2 )
- offset[0] = geom.ProbLo(0);
- offset[1] = 0.0;
- offset[2] = geom.ProbLo(1);
-#endif
-
- AMREX_D_TERM(iv[0]=static_cast<int>(floor((x-offset[0])*geom.InvCellSize(0)));,
- iv[1]=static_cast<int>(floor((y-offset[1])*geom.InvCellSize(1)));,
- iv[2]=static_cast<int>(floor((z-offset[2])*geom.InvCellSize(2))););
-
- iv += geom.Domain().smallEnd();
-
- const int dir = WarpX::moving_window_dir;
-
- IntVect iv2 = iv;
- iv2[dir] = 0;
-
- if ( (*m_refined_injection_mask)(iv2) != -1) return (*m_refined_injection_mask)(iv2);
-
- int ref_fac = 1;
- for (int lev = 0; lev < finestLevel(); ++lev)
- {
- const IntVect rr = m_gdb->refRatio(lev);
- const BoxArray& fine_ba = this->ParticleBoxArray(lev+1);
- const int num_boxes = fine_ba.size();
- Vector<Box> stretched_boxes;
- const int safety_factor = 4;
- for (int i = 0; i < num_boxes; ++i)
- {
- Box bx = fine_ba[i];
- bx.coarsen(ref_fac*rr[dir]);
- bx.setSmall(dir, std::numeric_limits<int>::min()/safety_factor);
- bx.setBig(dir, std::numeric_limits<int>::max()/safety_factor);
- stretched_boxes.push_back(bx);
- }
-
- BoxArray stretched_ba(stretched_boxes.dataPtr(), stretched_boxes.size());
-
- const int num_ghost = 0;
- if ( stretched_ba.intersects(Box(iv, iv), num_ghost) )
- {
- ref_fac *= rr[dir];
- }
- else
- {
- break;
- }
- }
-
- (*m_refined_injection_mask)(iv2) = ref_fac;
-
- return ref_fac;
-}
-
/* \brief Inject particles during the simulation
* \param injection_box: domain where particles should be injected.
*/
@@ -2112,3 +1800,134 @@ PhysicalParticleContainer::ContinuousInjection(const RealBox& injection_box)
const int lev=0;
AddPlasma(lev, injection_box);
}
+
+/* \brief Gather fields from FArrayBox exfab, eyfab, ezfab, bxfab, byfab,
+ * bzfab into arrays of fields on particles Exp, Eyp, Ezp, Bxp, Byp, Bzp.
+ * \param Exp-Bzp: fields on particles.
+ * \param exfab-bzfab: FAB of electric and magnetic fields for particles in pti
+ * \param ngE: number of guard cells for E
+ * \param e_is_nodal: 0 if E is staggered, 1 if E is nodal
+ * \param offset: index of first particle for which fields are gathered
+ * \param np_to_gather: number of particles onto which fields are gathered
+ * \param thread_num: if using OpenMP, thread number
+ * \param lev: level on which particles are located
+ * \param gather_lev: level from which particles gather fields (lev-1) for
+ particles in buffers.
+ */
+void
+PhysicalParticleContainer::FieldGather (WarpXParIter& pti,
+ RealVector& Exp,
+ RealVector& Eyp,
+ RealVector& Ezp,
+ RealVector& Bxp,
+ RealVector& Byp,
+ RealVector& Bzp,
+ FArrayBox const * exfab,
+ FArrayBox const * eyfab,
+ FArrayBox const * ezfab,
+ FArrayBox const * bxfab,
+ FArrayBox const * byfab,
+ FArrayBox const * bzfab,
+ const int ngE, const int e_is_nodal,
+ const long offset,
+ const long np_to_gather,
+ int thread_num,
+ int lev,
+ int gather_lev)
+{
+ AMREX_ALWAYS_ASSERT_WITH_MESSAGE((gather_lev==(lev-1)) ||
+ (gather_lev==(lev )),
+ "Gather buffers only work for lev-1");
+
+ // If no particles, do not do anything
+ if (np_to_gather == 0) return;
+ // Get cell size on gather_lev
+ const std::array<Real,3>& dx = WarpX::CellSize(std::max(gather_lev,0));
+ // Set staggering shift depending on e_is_nodal
+ const Real stagger_shift = e_is_nodal ? 0.0 : 0.5;
+
+ // Get box from which field is gathered.
+ // If not gathering from the finest level, the box is coarsened.
+ Box box;
+ if (lev == gather_lev) {
+ box = pti.tilebox();
+ } else {
+ const IntVect& ref_ratio = WarpX::RefRatio(gather_lev);
+ box = amrex::coarsen(pti.tilebox(),ref_ratio);
+ }
+
+ // Add guard cells to the box.
+ box.grow(ngE);
+
+ const Array4<const Real>& ex_arr = exfab->array();
+ const Array4<const Real>& ey_arr = eyfab->array();
+ const Array4<const Real>& ez_arr = ezfab->array();
+ const Array4<const Real>& bx_arr = bxfab->array();
+ const Array4<const Real>& by_arr = byfab->array();
+ const Array4<const Real>& bz_arr = bzfab->array();
+
+ const Real * const AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
+ const Real * const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
+ const Real * const AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
+
+ // Lower corner of tile box physical domain
+ const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(box, gather_lev);
+
+ const Dim3 lo = lbound(box);
+
+ // Depending on l_lower_in_v and WarpX::nox, call
+ // different versions of template function doGatherShapeN
+ if (WarpX::l_lower_order_in_v){
+ if (WarpX::nox == 1){
+ doGatherShapeN<1,1>(xp, yp, zp,
+ Exp.dataPtr() + offset, Eyp.dataPtr() + offset,
+ Ezp.dataPtr() + offset, Bxp.dataPtr() + offset,
+ Byp.dataPtr() + offset, Bzp.dataPtr() + offset,
+ ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr,
+ np_to_gather, dx,
+ xyzmin, lo, stagger_shift);
+ } else if (WarpX::nox == 2){
+ doGatherShapeN<2,1>(xp, yp, zp,
+ Exp.dataPtr() + offset, Eyp.dataPtr() + offset,
+ Ezp.dataPtr() + offset, Bxp.dataPtr() + offset,
+ Byp.dataPtr() + offset, Bzp.dataPtr() + offset,
+ ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr,
+ np_to_gather, dx,
+ xyzmin, lo, stagger_shift);
+ } else if (WarpX::nox == 3){
+ doGatherShapeN<3,1>(xp, yp, zp,
+ Exp.dataPtr() + offset, Eyp.dataPtr() + offset,
+ Ezp.dataPtr() + offset, Bxp.dataPtr() + offset,
+ Byp.dataPtr() + offset, Bzp.dataPtr() + offset,
+ ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr,
+ np_to_gather, dx,
+ xyzmin, lo, stagger_shift);
+ }
+ } else {
+ if (WarpX::nox == 1){
+ doGatherShapeN<1,0>(xp, yp, zp,
+ Exp.dataPtr() + offset, Eyp.dataPtr() + offset,
+ Ezp.dataPtr() + offset, Bxp.dataPtr() + offset,
+ Byp.dataPtr() + offset, Bzp.dataPtr() + offset,
+ ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr,
+ np_to_gather, dx,
+ xyzmin, lo, stagger_shift);
+ } else if (WarpX::nox == 2){
+ doGatherShapeN<2,0>(xp, yp, zp,
+ Exp.dataPtr() + offset, Eyp.dataPtr() + offset,
+ Ezp.dataPtr() + offset, Bxp.dataPtr() + offset,
+ Byp.dataPtr() + offset, Bzp.dataPtr() + offset,
+ ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr,
+ np_to_gather, dx,
+ xyzmin, lo, stagger_shift);
+ } else if (WarpX::nox == 3){
+ doGatherShapeN<3,0>(xp, yp, zp,
+ Exp.dataPtr() + offset, Eyp.dataPtr() + offset,
+ Ezp.dataPtr() + offset, Bxp.dataPtr() + offset,
+ Byp.dataPtr() + offset, Bzp.dataPtr() + offset,
+ ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr,
+ np_to_gather, dx,
+ xyzmin, lo, stagger_shift);
+ }
+ }
+}
diff --git a/Source/Particles/Pusher/GetAndSetPosition.H b/Source/Particles/Pusher/GetAndSetPosition.H
index 42c61343e..3c74baeb2 100644
--- a/Source/Particles/Pusher/GetAndSetPosition.H
+++ b/Source/Particles/Pusher/GetAndSetPosition.H
@@ -5,7 +5,7 @@
#include <WarpXParticleContainer.H>
#include <AMReX_REAL.H>
-#ifndef WARPX_RZ
+#ifndef WARPX_DIM_RZ
/* \brief Extract the particle's coordinates from the ParticleType struct `p`,
* and stores them in the variables `x`, `y`, `z`. */
@@ -42,7 +42,7 @@ void SetPosition(
#endif
}
-# else // if WARPX_RZ is True
+# elif defined WARPX_DIM_RZ
/* \brief Extract the particle's coordinates from `theta` and the attributes
* of the ParticleType struct `p` (which contains the radius),
@@ -71,6 +71,6 @@ void SetCylindricalPositionFromCartesian(
p.pos(1) = z;
}
-#endif // WARPX_RZ
+#endif // WARPX_DIM_RZ
#endif // WARPX_PARTICLES_PUSHER_GETANDSETPOSITION_H_
diff --git a/Source/Particles/Pusher/Make.package b/Source/Particles/Pusher/Make.package
index 8c8e77905..95a38fa2d 100644
--- a/Source/Particles/Pusher/Make.package
+++ b/Source/Particles/Pusher/Make.package
@@ -1,4 +1,6 @@
CEXE_headers += GetAndSetPosition.H
CEXE_headers += UpdatePosition.H
+CEXE_headers += UpdateMomentumBoris.H
+CEXE_headers += UpdateMomentumVay.H
INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Pusher
VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles/Pusher
diff --git a/Source/Particles/Pusher/UpdateMomentumBoris.H b/Source/Particles/Pusher/UpdateMomentumBoris.H
new file mode 100644
index 000000000..71e9a8ed1
--- /dev/null
+++ b/Source/Particles/Pusher/UpdateMomentumBoris.H
@@ -0,0 +1,47 @@
+#ifndef WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_BORIS_H_
+#define WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_BORIS_H_
+
+#include <AMReX_REAL.H>
+
+/* \brief Push the particle's positions over one timestep,
+ * given the value of its momenta `ux`, `uy`, `uz` */
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+void UpdateMomentumBoris(
+ amrex::Real& ux, amrex::Real& uy, amrex::Real& uz, amrex::Real& gaminv,
+ const amrex::Real Ex, const amrex::Real Ey, const amrex::Real Ez,
+ const amrex::Real Bx, const amrex::Real By, const amrex::Real Bz,
+ const amrex::Real q, const amrex::Real m, const amrex::Real dt )
+{
+ const amrex::Real econst = 0.5*q*dt/m;
+
+ // First half-push for E
+ ux += econst*Ex;
+ uy += econst*Ey;
+ uz += econst*Ez;
+ // Compute temporary gamma factor
+ constexpr amrex::Real inv_c2 = 1./(PhysConst::c*PhysConst::c);
+ const amrex::Real inv_gamma = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*inv_c2);
+ // Magnetic rotation
+ // - Compute temporary variables
+ const amrex::Real tx = econst*inv_gamma*Bx;
+ const amrex::Real ty = econst*inv_gamma*By;
+ const amrex::Real tz = econst*inv_gamma*Bz;
+ const amrex::Real tsqi = 2./(1. + tx*tx + ty*ty + tz*tz);
+ const amrex::Real sx = tx*tsqi;
+ const amrex::Real sy = ty*tsqi;
+ const amrex::Real sz = tz*tsqi;
+ const amrex::Real ux_p = ux + uy*tz - uz*ty;
+ const amrex::Real uy_p = uy + uz*tx - ux*tz;
+ const amrex::Real uz_p = uz + ux*ty - uy*tx;
+ // - Update momentum
+ ux += uy_p*sz - uz_p*sy;
+ uy += uz_p*sx - ux_p*sz;
+ uz += ux_p*sy - uy_p*sx;
+ // Second half-push for E
+ ux += econst*Ex;
+ uy += econst*Ey;
+ uz += econst*Ez;
+ gaminv = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*inv_c2);
+}
+
+#endif // WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_BORIS_H_
diff --git a/Source/Particles/Pusher/UpdateMomentumVay.H b/Source/Particles/Pusher/UpdateMomentumVay.H
new file mode 100644
index 000000000..044297e22
--- /dev/null
+++ b/Source/Particles/Pusher/UpdateMomentumVay.H
@@ -0,0 +1,54 @@
+#ifndef WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_VAY_H_
+#define WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_VAY_H_
+
+#include <AMReX_FArrayBox.H>
+#include <WarpXConst.H>
+#include <AMReX_REAL.H>
+
+/* \brief Push the particle's positions over one timestep,
+ * given the value of its momenta `ux`, `uy`, `uz` */
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+void UpdateMomentumVay(
+ amrex::Real& ux, amrex::Real& uy, amrex::Real& uz, amrex::Real& gaminv,
+ const amrex::Real Ex, const amrex::Real Ey, const amrex::Real Ez,
+ const amrex::Real Bx, const amrex::Real By, const amrex::Real Bz,
+ const amrex::Real q, const amrex::Real m, const amrex::Real dt )
+{
+ // Constants
+ const amrex::Real econst = q*dt/m;
+ const amrex::Real bconst = 0.5*q*dt/m;
+ constexpr amrex::Real invclight = 1./PhysConst::c;
+ constexpr amrex::Real invclightsq = 1./(PhysConst::c*PhysConst::c);
+ // Compute initial gamma
+ const amrex::Real inv_gamma = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*invclightsq);
+ // Get tau
+ const amrex::Real taux = bconst*Bx;
+ const amrex::Real tauy = bconst*By;
+ const amrex::Real tauz = bconst*Bz;
+ const amrex::Real tausq = taux*taux+tauy*tauy+tauz*tauz;
+ // Get U', gamma'^2
+ const amrex::Real uxpr = ux + econst*Ex + (uy*tauz-uz*tauy)*inv_gamma;
+ const amrex::Real uypr = uy + econst*Ey + (uz*taux-ux*tauz)*inv_gamma;
+ const amrex::Real uzpr = uz + econst*Ez + (ux*tauy-uy*taux)*inv_gamma;
+ const amrex::Real gprsq = (1. + (uxpr*uxpr + uypr*uypr + uzpr*uzpr)*invclightsq);
+ // Get u*
+ const amrex::Real ust = (uxpr*taux + uypr*tauy + uzpr*tauz)*invclight;
+ // Get new gamma
+ const amrex::Real sigma = gprsq-tausq;
+ const amrex::Real gisq = 2./(sigma + std::sqrt(sigma*sigma + 4.*(tausq + ust*ust)) );
+ // Get t, s
+ const amrex::Real bg = bconst*std::sqrt(gisq);
+ const amrex::Real tx = bg*Bx;
+ const amrex::Real ty = bg*By;
+ const amrex::Real tz = bg*Bz;
+ const amrex::Real s = 1./(1.+tausq*gisq);
+ // Get t.u'
+ const amrex::Real tu = tx*uxpr + ty*uypr + tz*uzpr;
+ // Get new U
+ ux = s*(uxpr+tx*tu+uypr*tz-uzpr*ty);
+ uy = s*(uypr+ty*tu+uzpr*tx-uxpr*tz);
+ uz = s*(uzpr+tz*tu+uxpr*ty-uypr*tx);
+ gaminv = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*invclightsq);
+}
+
+#endif // WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_VAY_H_
diff --git a/Source/Particles/Pusher/UpdatePosition.H b/Source/Particles/Pusher/UpdatePosition.H
index 0a4f579f4..a9df63a30 100644
--- a/Source/Particles/Pusher/UpdatePosition.H
+++ b/Source/Particles/Pusher/UpdatePosition.H
@@ -20,7 +20,7 @@ void UpdatePosition(
const amrex::Real inv_gamma = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*inv_c2);
// Update positions over one time step
x += ux * inv_gamma * dt;
-#if (AMREX_SPACEDIM == 3) || (defined WARPX_RZ) // RZ pushes particles in 3D
+#if (AMREX_SPACEDIM == 3) || (defined WARPX_DIM_RZ) // RZ pushes particles in 3D
y += uy * inv_gamma * dt;
#endif
z += uz * inv_gamma * dt;
diff --git a/Source/Particles/RigidInjectedParticleContainer.H b/Source/Particles/RigidInjectedParticleContainer.H
index 0b27a2f2f..b920ece0a 100644
--- a/Source/Particles/RigidInjectedParticleContainer.H
+++ b/Source/Particles/RigidInjectedParticleContainer.H
@@ -43,7 +43,7 @@ public:
amrex::Real dt) override;
virtual void PushPX(WarpXParIter& pti,
- amrex::Cuda::ManagedDeviceVector<amrex::Real>& xp,
+ amrex::Cuda::ManagedDeviceVector<amrex::Real>& xp,
amrex::Cuda::ManagedDeviceVector<amrex::Real>& yp,
amrex::Cuda::ManagedDeviceVector<amrex::Real>& zp,
amrex::Cuda::ManagedDeviceVector<amrex::Real>& giv,
@@ -77,7 +77,6 @@ private:
// Temporary quantites
amrex::Real zinject_plane_lev;
amrex::Real zinject_plane_lev_previous;
- amrex::Vector<int> done_injecting_temp;
bool done_injecting_lev;
};
diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp
index 9bd4cb4fc..36cb9d224 100644
--- a/Source/Particles/RigidInjectedParticleContainer.cpp
+++ b/Source/Particles/RigidInjectedParticleContainer.cpp
@@ -10,6 +10,9 @@
#include <WarpX_f.H>
#include <WarpX.H>
#include <WarpXConst.H>
+#include <WarpXAlgorithmSelection.H>
+#include <UpdateMomentumBoris.H>
+#include <UpdateMomentumVay.H>
using namespace amrex;
@@ -204,48 +207,58 @@ RigidInjectedParticleContainer::BoostandRemapParticles()
void
RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
- Cuda::ManagedDeviceVector<Real>& xp,
+ Cuda::ManagedDeviceVector<Real>& xp,
Cuda::ManagedDeviceVector<Real>& yp,
Cuda::ManagedDeviceVector<Real>& zp,
Cuda::ManagedDeviceVector<Real>& giv,
Real dt)
{
- if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
- {
- copy_attribs(pti, xp.dataPtr(), yp.dataPtr(), zp.dataPtr());
- }
-
- // This wraps the call to warpx_particle_pusher so that inheritors can modify the call.
+ // This wraps the momentum and position advance so that inheritors can modify the call.
auto& attribs = pti.GetAttribs();
auto& uxp = attribs[PIdx::ux];
auto& uyp = attribs[PIdx::uy];
auto& uzp = attribs[PIdx::uz];
- auto& Exp = attribs[PIdx::Ex];
- auto& Eyp = attribs[PIdx::Ey];
- auto& Ezp = attribs[PIdx::Ez];
- auto& Bxp = attribs[PIdx::Bx];
- auto& Byp = attribs[PIdx::By];
- auto& Bzp = attribs[PIdx::Bz];
- const long np = pti.numParticles();
// Save the position and momenta, making copies
Cuda::ManagedDeviceVector<Real> xp_save, yp_save, zp_save;
RealVector uxp_save, uyp_save, uzp_save;
+ Real* const AMREX_RESTRICT x = xp.dataPtr();
+ Real* const AMREX_RESTRICT y = yp.dataPtr();
+ Real* const AMREX_RESTRICT z = zp.dataPtr();
+ Real* const AMREX_RESTRICT gi = giv.dataPtr();
+ Real* const AMREX_RESTRICT ux = uxp.dataPtr();
+ Real* const AMREX_RESTRICT uy = uyp.dataPtr();
+ Real* const AMREX_RESTRICT uz = uzp.dataPtr();
+ Real* const AMREX_RESTRICT Exp = attribs[PIdx::Ex].dataPtr();
+ Real* const AMREX_RESTRICT Eyp = attribs[PIdx::Ey].dataPtr();
+ Real* const AMREX_RESTRICT Ezp = attribs[PIdx::Ez].dataPtr();
+ Real* const AMREX_RESTRICT Bxp = attribs[PIdx::Bx].dataPtr();
+ Real* const AMREX_RESTRICT Byp = attribs[PIdx::By].dataPtr();
+ Real* const AMREX_RESTRICT Bzp = attribs[PIdx::Bz].dataPtr();
+
if (!done_injecting_lev) {
- xp_save = xp;
- yp_save = yp;
- zp_save = zp;
- uxp_save = uxp;
- uyp_save = uyp;
- uzp_save = uzp;
+ if (!(WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)) {
+ // If the old values are not already saved, create copies here.
+ xp_save = xp;
+ yp_save = yp;
+ zp_save = zp;
+ uxp_save = uxp;
+ uyp_save = uyp;
+ uzp_save = uzp;
+ }
+
// Scale the fields of particles about to cross the injection plane.
// This only approximates what should be happening. The particles
// should by advanced a fraction of a time step instead.
// Scaling the fields is much easier and may be good enough.
- for (int i=0 ; i < zp.size() ; i++) {
- const Real dtscale = dt - (zinject_plane_lev_previous - zp[i])/(vzbeam_ave_boosted + WarpX::beta_boost*PhysConst::c);
+ const Real v_boost = WarpX::beta_boost*PhysConst::c;
+ const Real z_plane_previous = zinject_plane_lev_previous;
+ const Real vz_ave_boosted = vzbeam_ave_boosted;
+ amrex::ParallelFor( pti.numParticles(),
+ [=] AMREX_GPU_DEVICE (long i) {
+ const Real dtscale = dt - (z_plane_previous - z[i])/(vz_ave_boosted + v_boost);
if (0. < dtscale && dtscale < dt) {
Exp[i] *= dtscale;
Eyp[i] *= dtscale;
@@ -255,46 +268,60 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
Bzp[i] *= dtscale;
}
}
+ );
}
- warpx_particle_pusher(&np,
- xp.dataPtr(),
- yp.dataPtr(),
- zp.dataPtr(),
- uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
- giv.dataPtr(),
- Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
- Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
- &this->charge, &this->mass, &dt,
- &WarpX::particle_pusher_algo);
+ PhysicalParticleContainer::PushPX(pti, xp, yp, zp, giv, dt);
if (!done_injecting_lev) {
-#ifdef _OPENMP
- const int tid = omp_get_thread_num();
-#else
- const int tid = 0;
-#endif
+
+ Real* AMREX_RESTRICT x_save;
+ Real* AMREX_RESTRICT y_save;
+ Real* AMREX_RESTRICT z_save;
+ Real* AMREX_RESTRICT ux_save;
+ Real* AMREX_RESTRICT uy_save;
+ Real* AMREX_RESTRICT uz_save;
+ if (!(WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)) {
+ x_save = xp_save.dataPtr();
+ y_save = yp_save.dataPtr();
+ z_save = zp_save.dataPtr();
+ ux_save = uxp_save.dataPtr();
+ uy_save = uyp_save.dataPtr();
+ uz_save = uzp_save.dataPtr();
+ } else {
+ x_save = pti.GetAttribs(particle_comps["xold"]).dataPtr();
+ y_save = pti.GetAttribs(particle_comps["yold"]).dataPtr();
+ z_save = pti.GetAttribs(particle_comps["zold"]).dataPtr();
+ ux_save = pti.GetAttribs(particle_comps["uxold"]).dataPtr();
+ uy_save = pti.GetAttribs(particle_comps["uyold"]).dataPtr();
+ uz_save = pti.GetAttribs(particle_comps["uzold"]).dataPtr();
+ }
+
// Undo the push for particles not injected yet.
// The zp are advanced a fixed amount.
- for (int i=0 ; i < zp.size() ; i++) {
- if (zp[i] <= zinject_plane_lev) {
- uxp[i] = uxp_save[i];
- uyp[i] = uyp_save[i];
- uzp[i] = uzp_save[i];
- giv[i] = 1./std::sqrt(1. + (uxp[i]*uxp[i] + uyp[i]*uyp[i] + uzp[i]*uzp[i])/(PhysConst::c*PhysConst::c));
- xp[i] = xp_save[i];
- yp[i] = yp_save[i];
- if (rigid_advance) {
- zp[i] = zp_save[i] + dt*vzbeam_ave_boosted;
+ const Real z_plane_lev = zinject_plane_lev;
+ const Real vz_ave_boosted = vzbeam_ave_boosted;
+ const bool rigid = rigid_advance;
+ const Real inv_csq = 1./(PhysConst::c*PhysConst::c);
+ amrex::ParallelFor( pti.numParticles(),
+ [=] AMREX_GPU_DEVICE (long i) {
+ if (z[i] <= z_plane_lev) {
+ ux[i] = ux_save[i];
+ uy[i] = uy_save[i];
+ uz[i] = uz_save[i];
+ gi[i] = 1./std::sqrt(1. + (ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i])*inv_csq);
+ x[i] = x_save[i];
+ y[i] = y_save[i];
+ if (rigid) {
+ z[i] = z_save[i] + dt*vz_ave_boosted;
}
else {
- zp[i] = zp_save[i] + dt*uzp[i]*giv[i];
+ z[i] = z_save[i] + dt*uz[i]*gi[i];
}
- done_injecting_temp[tid] = 0;
}
}
+ );
}
-
}
void
@@ -314,28 +341,26 @@ RigidInjectedParticleContainer::Evolve (int lev,
zinject_plane_levels[lev] -= dt*WarpX::beta_boost*PhysConst::c;
zinject_plane_lev = zinject_plane_levels[lev];
- // Setup check of whether more particles need to be injected
-#ifdef _OPENMP
- const int nthreads = omp_get_max_threads();
-#else
- const int nthreads = 1;
-#endif
- done_injecting_temp.assign(nthreads, 1); // We do not use bool because vector<bool> is special.
+ // Set the done injecting flag whan the inject plane moves out of the
+ // simulation domain.
+ // It is much easier to do this check, rather than checking if all of the
+ // particles have crossed the inject plane.
+ const Real* plo = Geom(lev).ProbLo();
+ const Real* phi = Geom(lev).ProbHi();
+ const int zdir = AMREX_SPACEDIM-1;
+ done_injecting[lev] = ((zinject_plane_levels[lev] < plo[zdir] && WarpX::moving_window_v + WarpX::beta_boost*PhysConst::c >= 0.) ||
+ (zinject_plane_levels[lev] > phi[zdir] && WarpX::moving_window_v + WarpX::beta_boost*PhysConst::c <= 0.));
done_injecting_lev = done_injecting[lev];
PhysicalParticleContainer::Evolve (lev,
- Ex, Ey, Ez,
- Bx, By, Bz,
- jx, jy, jz,
+ Ex, Ey, Ez,
+ Bx, By, Bz,
+ jx, jy, jz,
cjx, cjy, cjz,
rho, crho,
cEx, cEy, cEz,
cBx, cBy, cBz,
t, dt);
-
- // Check if all done_injecting_temp are still true.
- done_injecting[lev] = std::all_of(done_injecting_temp.begin(), done_injecting_temp.end(),
- [](int i) -> bool { return i; });
}
void
@@ -343,6 +368,8 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz)
{
+ BL_PROFILE("RigidInjectedParticleContainer::PushP");
+
if (do_not_push) return;
const std::array<Real,3>& dx = WarpX::CellSize(lev);
@@ -351,8 +378,11 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
#pragma omp parallel
#endif
{
- Cuda::ManagedDeviceVector<Real> xp, yp, zp, giv;
-
+#ifdef _OPENMP
+ int thread_num = omp_get_thread_num();
+#else
+ int thread_num = 0;
+#endif
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
const Box& box = pti.validbox();
@@ -386,65 +416,74 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
Byp.assign(np,WarpX::B_external[1]);
Bzp.assign(np,WarpX::B_external[2]);
- giv.resize(np);
+ m_giv[thread_num].resize(np);
//
// copy data from particle container to temp arrays
//
- pti.GetPosition(xp, yp, zp);
+ pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
- const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
- const int* ixyzmin_grid = box.loVect();
-
- const int ll4symtry = false;
- const int l_lower_order_in_v = true;
- long lvect_fieldgathe = 64;
- warpx_geteb_energy_conserving(
- &np,
- xp.dataPtr(),
- yp.dataPtr(),
- zp.dataPtr(),
- Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
- Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
- ixyzmin_grid,
- &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2],
- &dx[0], &dx[1], &dx[2],
- &WarpX::nox, &WarpX::noy, &WarpX::noz,
- BL_TO_FORTRAN_ANYD(exfab),
- BL_TO_FORTRAN_ANYD(eyfab),
- BL_TO_FORTRAN_ANYD(ezfab),
- BL_TO_FORTRAN_ANYD(bxfab),
- BL_TO_FORTRAN_ANYD(byfab),
- BL_TO_FORTRAN_ANYD(bzfab),
- &ll4symtry, &l_lower_order_in_v, &WarpX::do_nodal,
- &lvect_fieldgathe, &WarpX::field_gathering_algo);
+ int e_is_nodal = Ex.is_nodal() and Ey.is_nodal() and Ez.is_nodal();
+ FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp,
+ &exfab, &eyfab, &ezfab, &bxfab, &byfab, &bzfab,
+ Ex.nGrow(), e_is_nodal, 0, np, thread_num, lev, lev);
// Save the position and momenta, making copies
auto uxp_save = uxp;
auto uyp_save = uyp;
auto uzp_save = uzp;
- warpx_particle_pusher_momenta(&np,
- xp.dataPtr(),
- yp.dataPtr(),
- zp.dataPtr(),
- uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
- giv.dataPtr(),
- Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
- Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
- &this->charge, &this->mass, &dt,
- &WarpX::particle_pusher_algo);
+ // This wraps the momentum advance so that inheritors can modify the call.
+ // Extract pointers to the different particle quantities
+ const Real* const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr();
+ Real* const AMREX_RESTRICT gi = m_giv[thread_num].dataPtr();
+ Real* const AMREX_RESTRICT uxpp = uxp.dataPtr();
+ Real* const AMREX_RESTRICT uypp = uyp.dataPtr();
+ Real* const AMREX_RESTRICT uzpp = uzp.dataPtr();
+ const Real* const AMREX_RESTRICT Expp = Exp.dataPtr();
+ const Real* const AMREX_RESTRICT Eypp = Eyp.dataPtr();
+ const Real* const AMREX_RESTRICT Ezpp = Ezp.dataPtr();
+ const Real* const AMREX_RESTRICT Bxpp = Bxp.dataPtr();
+ const Real* const AMREX_RESTRICT Bypp = Byp.dataPtr();
+ const Real* const AMREX_RESTRICT Bzpp = Bzp.dataPtr();
+
+ // Loop over the particles and update their momentum
+ const Real q = this->charge;
+ const Real m = this->mass;
+ if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Boris){
+ amrex::ParallelFor( pti.numParticles(),
+ [=] AMREX_GPU_DEVICE (long i) {
+ UpdateMomentumBoris( uxpp[i], uypp[i], uzpp[i], gi[i],
+ Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt);
+ }
+ );
+ } else if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Vay) {
+ amrex::ParallelFor( pti.numParticles(),
+ [=] AMREX_GPU_DEVICE (long i) {
+ UpdateMomentumVay( uxpp[i], uypp[i], uzpp[i], gi[i],
+ Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt);
+ }
+ );
+ } else {
+ amrex::Abort("Unknown particle pusher");
+ };
// Undo the push for particles not injected yet.
// It is assumed that PushP will only be called on the first and last steps
// and that no particles will cross zinject_plane.
- for (int i=0 ; i < zp.size() ; i++) {
- if (zp[i] <= zinject_plane_levels[lev]) {
- uxp[i] = uxp_save[i];
- uyp[i] = uyp_save[i];
- uzp[i] = uzp_save[i];
+ const Real* const AMREX_RESTRICT ux_save = uxp_save.dataPtr();
+ const Real* const AMREX_RESTRICT uy_save = uyp_save.dataPtr();
+ const Real* const AMREX_RESTRICT uz_save = uzp_save.dataPtr();
+ const Real zz = zinject_plane_levels[lev];
+ amrex::ParallelFor( pti.numParticles(),
+ [=] AMREX_GPU_DEVICE (long i) {
+ if (zp[i] <= zz) {
+ uxpp[i] = ux_save[i];
+ uypp[i] = uy_save[i];
+ uzpp[i] = uz_save[i];
}
}
+ );
}
}
diff --git a/Source/Particles/ShapeFactors.H b/Source/Particles/ShapeFactors.H
new file mode 100644
index 000000000..9d185714a
--- /dev/null
+++ b/Source/Particles/ShapeFactors.H
@@ -0,0 +1,117 @@
+#ifndef SHAPEFACTORS_H_
+#define SHAPEFACTORS_H_
+
+// Compute shape factor and return index of leftmost cell where
+// particle writes.
+// Specialized templates are defined below for orders 0 to 3.
+template <int depos_order>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor(amrex::Real* const sx, amrex::Real xint)
+{
+ return 0;
+};
+
+// Compute shape factor for order 0.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor <0> (amrex::Real* const sx, amrex::Real xmid){
+ const int j = (int) (xmid+0.5);
+ sx[0] = 1.0;
+ return j;
+}
+
+// Compute shape factor for order 1.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor <1> (amrex::Real* const sx, amrex::Real xmid){
+ const int j = (int) xmid;
+ const amrex::Real xint = xmid-j;
+ sx[0] = 1.0 - xint;
+ sx[1] = xint;
+ return j;
+}
+
+// Compute shape factor for order 2.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor <2> (amrex::Real* const sx, amrex::Real xmid){
+ const int j = (int) (xmid+0.5);
+ const amrex::Real xint = xmid-j;
+ sx[0] = 0.5*(0.5-xint)*(0.5-xint);
+ sx[1] = 0.75-xint*xint;
+ sx[2] = 0.5*(0.5+xint)*(0.5+xint);
+ // index of the leftmost cell where particle deposits
+ return j-1;
+}
+
+// Compute shape factor for order 3.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor <3> (amrex::Real* const sx, amrex::Real xmid){
+ const int j = (int) xmid;
+ const amrex::Real xint = xmid-j;
+ sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint);
+ sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0);
+ sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint));
+ sx[3] = 1.0/6.0*xint*xint*xint;
+ // index of the leftmost cell where particle deposits
+ return j-1;
+}
+
+// Compute shifted shape factor and return index of leftmost cell where
+// particle writes, for Esirkepov algorithm.
+// Specialized templates are defined below for orders 1, 2 and 3.
+template <int depos_order>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shifted_shape_factor (amrex::Real* const sx,
+ const amrex::Real x_old,
+ const int i_new);
+
+// Compute shape factor for order 1.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shifted_shape_factor <1> (amrex::Real* const sx,
+ const amrex::Real x_old,
+ const int i_new){
+ const int i = (int) x_old;
+ const int i_shift = i - i_new;
+ const amrex::Real xint = x_old - i;
+ sx[1+i_shift] = 1.0 - xint;
+ sx[2+i_shift] = xint;
+ return i;
+}
+
+// Compute shape factor for order 2.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shifted_shape_factor <2> (amrex::Real* const sx,
+ const amrex::Real x_old,
+ const int i_new){
+ const int i = (int) (x_old+0.5);
+ const int i_shift = i - (i_new + 1);
+ const amrex::Real xint = x_old - i;
+ sx[1+i_shift] = 0.5*(0.5-xint)*(0.5-xint);
+ sx[2+i_shift] = 0.75-xint*xint;
+ sx[3+i_shift] = 0.5*(0.5+xint)*(0.5+xint);
+ // index of the leftmost cell where particle deposits
+ return i-1;
+}
+
+// Compute shape factor for order 3.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shifted_shape_factor <3> (amrex::Real* const sx,
+ const amrex::Real x_old,
+ const int i_new){
+ const int i = (int) x_old;
+ const int i_shift = i - (i_new + 1);
+ const amrex::Real xint = x_old - i;
+ sx[1+i_shift] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint);
+ sx[2+i_shift] = 2.0/3.0-xint*xint*(1-xint/2.0);
+ sx[3+i_shift] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint));
+ sx[4+i_shift] = 1.0/6.0*xint*xint*xint;
+ // index of the leftmost cell where particle deposits
+ return i-1;
+}
+
+#endif // SHAPEFACTORS_H_
diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H
index 662b2e1b8..ac5b47ada 100644
--- a/Source/Particles/WarpXParticleContainer.H
+++ b/Source/Particles/WarpXParticleContainer.H
@@ -13,7 +13,7 @@ struct PIdx
enum { // Particle Attributes stored in amrex::ParticleContainer's struct of array
w = 0, // weight
ux, uy, uz, Ex, Ey, Ez, Bx, By, Bz,
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
theta, // RZ needs all three position components
#endif
nattribs
@@ -104,8 +104,9 @@ public:
const amrex::Vector<std::unique_ptr<amrex::FabArray<amrex::BaseFab<int> > > >& masks) {}
virtual void FieldGather (int lev,
- const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, const amrex::MultiFab& Ez,
- const amrex::MultiFab& Bx, const amrex::MultiFab& By, const amrex::MultiFab& Bz) {}
+ const amrex::MultiFab& Ex, const amrex::MultiFab& Ey,
+ const amrex::MultiFab& Ez, const amrex::MultiFab& Bx,
+ const amrex::MultiFab& By, const amrex::MultiFab& Bz) {}
#ifdef WARPX_DO_ELECTROSTATIC
virtual void EvolveES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E,
@@ -166,13 +167,13 @@ public:
virtual void DepositCharge(WarpXParIter& pti,
RealVector& wp,
- amrex::MultiFab* rhomf,
- amrex::MultiFab* crhomf,
+ amrex::MultiFab* rho,
int icomp,
- const long np_current,
- const long np,
+ const long offset,
+ const long np_to_depose,
int thread_num,
- int lev );
+ int lev,
+ int depos_lev);
virtual void DepositCurrent(WarpXParIter& pti,
RealVector& wp,
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index a20f0035e..befa5cfed 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -12,6 +12,7 @@
#include <GetAndSetPosition.H>
#include <UpdatePosition.H>
#include <CurrentDeposition.H>
+#include <ChargeDeposition.H>
using namespace amrex;
@@ -27,7 +28,7 @@ void
WarpXParIter::GetPosition (Cuda::ManagedDeviceVector<Real>& x, Cuda::ManagedDeviceVector<Real>& y, Cuda::ManagedDeviceVector<Real>& z) const
{
amrex::ParIter<0,0,PIdx::nattribs>::GetPosition(x, z);
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
const auto& attribs = GetAttribs();
const auto& theta = attribs[PIdx::theta];
y.resize(x.size());
@@ -44,10 +45,10 @@ WarpXParIter::GetPosition (Cuda::ManagedDeviceVector<Real>& x, Cuda::ManagedDevi
void
WarpXParIter::SetPosition (const Cuda::ManagedDeviceVector<Real>& x, const Cuda::ManagedDeviceVector<Real>& y, const Cuda::ManagedDeviceVector<Real>& z)
{
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
auto& attribs = GetAttribs();
auto& theta = attribs[PIdx::theta];
- Cuda::DeviceVector<Real> r(x.size());
+ Cuda::ManagedDeviceVector<Real> r(x.size());
for (unsigned int i=0 ; i < x.size() ; i++) {
theta[i] = std::atan2(y[i], x[i]);
r[i] = std::sqrt(x[i]*x[i] + y[i]*y[i]);
@@ -80,7 +81,7 @@ WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies)
particle_comps["Bx"] = PIdx::Bx;
particle_comps["By"] = PIdx::By;
particle_comps["Bz"] = PIdx::Bz;
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
particle_comps["theta"] = PIdx::theta;
#endif
@@ -163,7 +164,7 @@ WarpXParticleContainer::AddOneParticle (ParticleTileType& particle_tile,
p.pos(1) = y;
p.pos(2) = z;
#elif (AMREX_SPACEDIM == 2)
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
attribs[PIdx::theta] = std::atan2(y, x);
x = std::sqrt(x*x + y*y);
#endif
@@ -209,7 +210,7 @@ WarpXParticleContainer::AddNParticles (int lev,
std::size_t np = iend-ibegin;
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
Vector<Real> theta(np);
#endif
@@ -228,7 +229,7 @@ WarpXParticleContainer::AddNParticles (int lev,
p.pos(1) = y[i];
p.pos(2) = z[i];
#elif (AMREX_SPACEDIM == 2)
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
theta[i-ibegin] = std::atan2(y[i], x[i]);
p.pos(0) = std::sqrt(x[i]*x[i] + y[i]*y[i]);
#else
@@ -265,7 +266,7 @@ WarpXParticleContainer::AddNParticles (int lev,
for (int comp = PIdx::uz+1; comp < PIdx::nattribs; ++comp)
{
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
if (comp == PIdx::theta) {
particle_tile.push_back_real(comp, theta.front(), theta.back());
}
@@ -394,14 +395,6 @@ WarpXParticleContainer::DepositCurrentFortran(WarpXParIter& pti,
&WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal,
&lvect,&WarpX::current_deposition_algo);
-#ifdef WARPX_RZ
- // Rescale current in r-z mode
- warpx_current_deposition_rz_volume_scaling(
- jx_ptr, &ngJ, jxntot.getVect(),
- jy_ptr, &ngJ, jyntot.getVect(),
- jz_ptr, &ngJ, jzntot.getVect(),
- &xyzmin[0], &dx[0]);
-#endif
BL_PROFILE_VAR_STOP(blp_pxr_cd);
#ifndef AMREX_USE_GPU
@@ -503,7 +496,8 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
// Lower corner of tile box physical domain
- const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);;
+ // Note that this includes guard cells since it is after tilebox.ngrow
+ const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);
// xyzmin is built on pti.tilebox(), so it does
// not include staggering, so the stagger_shift has to be done by hand.
// Alternatively, we could define xyzminx from tbx (and the same for 3
@@ -513,36 +507,36 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) {
if (WarpX::nox == 1){
- doEsirkepovDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(),
- uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr,
+ doEsirkepovDepositionShapeN<1>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr,
jz_arr, np_to_depose, dt, dx,
xyzmin, lo, q);
} else if (WarpX::nox == 2){
- doEsirkepovDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(),
- uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr,
+ doEsirkepovDepositionShapeN<2>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr,
jz_arr, np_to_depose, dt, dx,
xyzmin, lo, q);
} else if (WarpX::nox == 3){
- doEsirkepovDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(),
- uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr,
+ doEsirkepovDepositionShapeN<3>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr,
jz_arr, np_to_depose, dt, dx,
xyzmin, lo, q);
}
} else {
if (WarpX::nox == 1){
- doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(),
- uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr,
- jz_arr, offset, np_to_depose, dt, dx,
+ doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr,
+ jz_arr, np_to_depose, dt, dx,
xyzmin, lo, stagger_shift, q);
} else if (WarpX::nox == 2){
- doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(),
- uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr,
- jz_arr, offset, np_to_depose, dt, dx,
+ doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr,
+ jz_arr, np_to_depose, dt, dx,
xyzmin, lo, stagger_shift, q);
} else if (WarpX::nox == 3){
- doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(),
- uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr,
- jz_arr, offset, np_to_depose, dt, dx,
+ doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr,
+ jz_arr, np_to_depose, dt, dx,
xyzmin, lo, stagger_shift, q);
}
}
@@ -559,140 +553,87 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
}
void
-WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
- MultiFab* rhomf, MultiFab* crhomf, int icomp,
- const long np_current,
- const long np, int thread_num, int lev )
+WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector& wp,
+ MultiFab* rho, int icomp,
+ const long offset, const long np_to_depose,
+ int thread_num, int lev, int depos_lev)
{
+ AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) ||
+ (depos_lev==(lev )),
+ "Deposition buffers only work for lev-1");
- BL_PROFILE_VAR_NS("PICSAR::ChargeDeposition", blp_pxr_chd);
- BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
-
- const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev);
- const long lvect = 8;
+ // If no particles, do not do anything
+ if (np_to_depose == 0) return;
- long ngRho = rhomf->nGrow();
- Real* data_ptr;
- Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector());
+ const long ngRho = rho->nGrow();
+ const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0));
+ const Real q = this->charge;
- const std::array<Real,3>& dx = WarpX::CellSize(lev);
- const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0));
+ BL_PROFILE_VAR_NS("PPC::ChargeDeposition", blp_ppc_chd);
+ BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
- // Deposit charge for particles that are not in the current buffers
- if (np_current > 0)
- {
- const std::array<Real, 3>& xyzmin = xyzmin_tile;
+ // Get tile box where charge is deposited.
+ // The tile box is different when depositing in the buffers (depos_lev<lev)
+ // or when depositing inside the level (depos_lev=lev)
+ Box tilebox;
+ if (lev == depos_lev) {
+ tilebox = pti.tilebox();
+ } else {
+ const IntVect& ref_ratio = WarpX::RefRatio(depos_lev);
+ tilebox = amrex::coarsen(pti.tilebox(),ref_ratio);
+ }
+
+ tilebox.grow(ngRho);
#ifdef AMREX_USE_GPU
- data_ptr = (*rhomf)[pti].dataPtr(icomp);
- auto rholen = (*rhomf)[pti].length();
+ // No tiling on GPU: rho_arr points to the full rho array.
+ MultiFab rhoi(*rho, amrex::make_alias, icomp, 1);
+ Array4<Real> const& rho_arr = rhoi.array(pti);
#else
- tile_box.grow(ngRho);
- local_rho[thread_num].resize(tile_box);
+ // Tiling is on: rho_arr points to local_rho[thread_num]
+ const Box tb = amrex::convert(tilebox, IntVect::TheUnitVector());
- data_ptr = local_rho[thread_num].dataPtr();
- auto rholen = local_rho[thread_num].length();
+ local_rho[thread_num].resize(tb);
- local_rho[thread_num].setVal(0.0);
-#endif
+ // local_rho[thread_num] is set to zero
+ local_rho[thread_num].setVal(0.0);
-#if (AMREX_SPACEDIM == 3)
- const long nx = rholen[0]-1-2*ngRho;
- const long ny = rholen[1]-1-2*ngRho;
- const long nz = rholen[2]-1-2*ngRho;
-#else
- const long nx = rholen[0]-1-2*ngRho;
- const long ny = 0;
- const long nz = rholen[1]-1-2*ngRho;
+ Array4<Real> const& rho_arr = local_rho[thread_num].array();
#endif
- BL_PROFILE_VAR_START(blp_pxr_chd);
- warpx_charge_deposition(data_ptr, &np_current,
- m_xp[thread_num].dataPtr(),
- m_yp[thread_num].dataPtr(),
- m_zp[thread_num].dataPtr(),
- wp.dataPtr(),
- &this->charge,
- &xyzmin[0], &xyzmin[1], &xyzmin[2],
- &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
- &ngRho, &ngRho, &ngRho,
- &WarpX::nox,&WarpX::noy,&WarpX::noz,
- &lvect, &WarpX::charge_deposition_algo);
-#ifdef WARPX_RZ
- warpx_charge_deposition_rz_volume_scaling(
- data_ptr, &ngRho, rholen.getVect(),
- &xyzmin[0], &dx[0]);
-#endif
- BL_PROFILE_VAR_STOP(blp_pxr_chd);
-
-#ifndef AMREX_USE_GPU
- BL_PROFILE_VAR_START(blp_accumulate);
-
- (*rhomf)[pti].atomicAdd(local_rho[thread_num], tile_box, tile_box, 0, icomp, 1);
-
- BL_PROFILE_VAR_STOP(blp_accumulate);
-#endif
- }
-
- // Deposit charge for particles that are in the current buffers
- if (np_current < np)
- {
- const IntVect& ref_ratio = WarpX::RefRatio(lev-1);
- const Box& ctilebox = amrex::coarsen(pti.tilebox(), ref_ratio);
- const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1);
-
-#ifdef AMREX_USE_GPU
- data_ptr = (*crhomf)[pti].dataPtr(icomp);
- auto rholen = (*crhomf)[pti].length();
-#else
- tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector());
- tile_box.grow(ngRho);
- local_rho[thread_num].resize(tile_box);
-
- data_ptr = local_rho[thread_num].dataPtr();
- auto rholen = local_rho[thread_num].length();
+ // GPU, no tiling: deposit directly in rho
+ // CPU, tiling: deposit into local_rho
- local_rho[thread_num].setVal(0.0);
-#endif
+ Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
+ Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
+ Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
-#if (AMREX_SPACEDIM == 3)
- const long nx = rholen[0]-1-2*ngRho;
- const long ny = rholen[1]-1-2*ngRho;
- const long nz = rholen[2]-1-2*ngRho;
-#else
- const long nx = rholen[0]-1-2*ngRho;
- const long ny = 0;
- const long nz = rholen[1]-1-2*ngRho;
-#endif
+ // Lower corner of tile box physical domain
+ // Note that this includes guard cells since it is after tilebox.ngrow
+ const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);
+ // Indices of the lower bound
+ const Dim3 lo = lbound(tilebox);
- long ncrse = np - np_current;
- BL_PROFILE_VAR_START(blp_pxr_chd);
- warpx_charge_deposition(data_ptr, &ncrse,
- m_xp[thread_num].dataPtr() + np_current,
- m_yp[thread_num].dataPtr() + np_current,
- m_zp[thread_num].dataPtr() + np_current,
- wp.dataPtr() + np_current,
- &this->charge,
- &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
- &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz,
- &ngRho, &ngRho, &ngRho,
- &WarpX::nox,&WarpX::noy,&WarpX::noz,
- &lvect, &WarpX::charge_deposition_algo);
-#ifdef WARPX_RZ
- warpx_charge_deposition_rz_volume_scaling(
- data_ptr, &ngRho, rholen.getVect(),
- &cxyzmin_tile[0], &cdx[0]);
-#endif
- BL_PROFILE_VAR_STOP(blp_pxr_chd);
+ BL_PROFILE_VAR_START(blp_ppc_chd);
+ if (WarpX::nox == 1){
+ doChargeDepositionShapeN<1>(xp, yp, zp, wp.dataPtr()+offset, rho_arr,
+ np_to_depose, dx, xyzmin, lo, q);
+ } else if (WarpX::nox == 2){
+ doChargeDepositionShapeN<2>(xp, yp, zp, wp.dataPtr()+offset, rho_arr,
+ np_to_depose, dx, xyzmin, lo, q);
+ } else if (WarpX::nox == 3){
+ doChargeDepositionShapeN<3>(xp, yp, zp, wp.dataPtr()+offset, rho_arr,
+ np_to_depose, dx, xyzmin, lo, q);
+ }
+ BL_PROFILE_VAR_STOP(blp_ppc_chd);
#ifndef AMREX_USE_GPU
- BL_PROFILE_VAR_START(blp_accumulate);
+ BL_PROFILE_VAR_START(blp_accumulate);
- (*crhomf)[pti].atomicAdd(local_rho[thread_num], tile_box, tile_box, 0, icomp, 1);
+ (*rho)[pti].atomicAdd(local_rho[thread_num], tb, tb, 0, icomp, 1);
- BL_PROFILE_VAR_STOP(blp_accumulate);
+ BL_PROFILE_VAR_STOP(blp_accumulate);
#endif
- }
-};
+}
void
WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, bool local)
@@ -769,8 +710,6 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local)
BoxArray nba = ba;
nba.surroundingNodes();
- const std::array<Real,3>& dx = WarpX::CellSize(lev);
-
const int ng = WarpX::nox;
auto rho = std::unique_ptr<MultiFab>(new MultiFab(nba,dm,1,ng));
@@ -780,75 +719,28 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local)
#pragma omp parallel
{
#endif
- Cuda::ManagedDeviceVector<Real> xp, yp, zp;
#ifdef _OPENMP
- FArrayBox rho_loc;
+ int thread_num = omp_get_thread_num();
+#else
+ int thread_num = 0;
#endif
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
+ const long np = pti.numParticles();
auto& wp = pti.GetAttribs(PIdx::w);
- const long np = pti.numParticles();
-
- pti.GetPosition(xp, yp, zp);
+ pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
- // Data on the grid
- Real* data_ptr;
- FArrayBox& rhofab = (*rho)[pti];
+ DepositCharge(pti, wp, rho.get(), 0, 0, np, thread_num, lev, lev);
+ }
#ifdef _OPENMP
- const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev);
- Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector());
- const std::array<Real, 3>& xyzmin = xyzmin_tile;
- tile_box.grow(ng);
- rho_loc.resize(tile_box);
- rho_loc = 0.0;
- data_ptr = rho_loc.dataPtr();
- auto rholen = rho_loc.length();
-#else
- const Box& box = pti.validbox();
- const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
- const std::array<Real, 3>& xyzmin = xyzmin_grid;
- data_ptr = rhofab.dataPtr();
- auto rholen = rhofab.length();
-#endif
-
-#if (AMREX_SPACEDIM == 3)
- const long nx = rholen[0]-1-2*ng;
- const long ny = rholen[1]-1-2*ng;
- const long nz = rholen[2]-1-2*ng;
-#else
- const long nx = rholen[0]-1-2*ng;
- const long ny = 0;
- const long nz = rholen[1]-1-2*ng;
-#endif
-
- long nxg = ng;
- long nyg = ng;
- long nzg = ng;
- long lvect = 8;
-
- warpx_charge_deposition(data_ptr,
- &np,
- xp.dataPtr(),
- yp.dataPtr(),
- zp.dataPtr(), wp.dataPtr(),
- &this->charge, &xyzmin[0], &xyzmin[1], &xyzmin[2],
- &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
- &nxg, &nyg, &nzg, &WarpX::nox,&WarpX::noy,&WarpX::noz,
- &lvect, &WarpX::charge_deposition_algo);
-#ifdef WARPX_RZ
- long ngRho = WarpX::nox;
- warpx_charge_deposition_rz_volume_scaling(
- data_ptr, &ngRho, rholen.getVect(),
- &xyzmin[0], &dx[0]);
+ }
#endif
-#ifdef _OPENMP
- rhofab.atomicAdd(rho_loc);
- }
+#ifdef WARPX_DIM_RZ
+ WarpX::GetInstance().ApplyInverseVolumeScalingToChargeDensity(rho.get(), lev);
#endif
- }
if (!local) rho->SumBoundary(gm.periodicity());
@@ -1022,7 +914,7 @@ WarpXParticleContainer::PushX (int lev, Real dt)
Real* AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
Real* AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
Real* AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
Real* AMREX_RESTRICT theta = attribs[PIdx::theta].dataPtr();
#endif
// Loop over the particles and update their position
@@ -1030,12 +922,12 @@ WarpXParticleContainer::PushX (int lev, Real dt)
[=] AMREX_GPU_DEVICE (long i) {
ParticleType& p = pstructs[i]; // Particle object that gets updated
Real x, y, z; // Temporary variables
-#ifndef WARPX_RZ
+#ifndef WARPX_DIM_RZ
GetPosition( x, y, z, p ); // Initialize x, y, z
UpdatePosition( x, y, z, ux[i], uy[i], uz[i], dt);
SetPosition( p, x, y, z ); // Update the object p
#else
- // For WARPX_RZ, the particles are still pushed in 3D Cartesian
+ // For WARPX_DIM_RZ, the particles are still pushed in 3D Cartesian
GetCartesianPositionFromCylindrical( x, y, z, p, theta[i] );
UpdatePosition( x, y, z, ux[i], uy[i], uz[i], dt);
SetCylindricalPositionFromCartesian( p, theta[i], x, y, z );
diff --git a/Source/Utils/WarpXAlgorithmSelection.H b/Source/Utils/WarpXAlgorithmSelection.H
index 3fb23698a..6a32513b7 100644
--- a/Source/Utils/WarpXAlgorithmSelection.H
+++ b/Source/Utils/WarpXAlgorithmSelection.H
@@ -34,11 +34,9 @@ struct CurrentDepositionAlgo {
};
struct ChargeDepositionAlgo {
- // These numbers corresponds to the algorithm code in WarpX's
- // `warpx_charge_deposition` function
+ // Only the Standard algorithm is implemented
enum {
- Vectorized = 0,
- Standard = 1
+ Standard = 0
};
};
diff --git a/Source/Utils/WarpXAlgorithmSelection.cpp b/Source/Utils/WarpXAlgorithmSelection.cpp
index 2c8038ccd..842085a36 100644
--- a/Source/Utils/WarpXAlgorithmSelection.cpp
+++ b/Source/Utils/WarpXAlgorithmSelection.cpp
@@ -8,7 +8,7 @@
const std::map<std::string, int> maxwell_solver_algo_to_int = {
{"yee", MaxwellSolverAlgo::Yee },
-#ifndef WARPX_RZ // Not available in RZ
+#ifndef WARPX_DIM_RZ // Not available in RZ
{"ckc", MaxwellSolverAlgo::CKC },
#endif
{"default", MaxwellSolverAlgo::Yee }
@@ -31,12 +31,7 @@ const std::map<std::string, int> current_deposition_algo_to_int = {
const std::map<std::string, int> charge_deposition_algo_to_int = {
{"standard", ChargeDepositionAlgo::Standard },
-#if (!defined AMREX_USE_GPU)&&(AMREX_SPACEDIM == 3) // Only available on CPU and 3D
- {"vectorized", ChargeDepositionAlgo::Vectorized },
- {"default", ChargeDepositionAlgo::Vectorized }
-#else
{"default", ChargeDepositionAlgo::Standard }
-#endif
};
const std::map<std::string, int> gathering_algo_to_int = {
diff --git a/Source/WarpX.H b/Source/WarpX.H
index a25eef9e4..927cc1f32 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -152,12 +152,12 @@ public:
BilinearFilter bilinear_filter;
amrex::Vector< std::unique_ptr<NCIGodfreyFilter> > nci_godfrey_filter_exeybz;
amrex::Vector< std::unique_ptr<NCIGodfreyFilter> > nci_godfrey_filter_bxbyez;
-
+
static int num_mirrors;
amrex::Vector<amrex::Real> mirror_z;
amrex::Vector<amrex::Real> mirror_z_width;
amrex::Vector<int> mirror_z_npoints;
-
+
void applyMirrors(amrex::Real time);
void ComputeDt ();
@@ -178,6 +178,16 @@ public:
void EvolveE (int lev, PatchType patch_type, amrex::Real dt);
void EvolveF (int lev, PatchType patch_type, amrex::Real dt, DtType dt_type);
+#ifdef WARPX_DIM_RZ
+ void ApplyInverseVolumeScalingToCurrentDensity(amrex::MultiFab* Jx,
+ amrex::MultiFab* Jy,
+ amrex::MultiFab* Jz,
+ int lev);
+
+ void ApplyInverseVolumeScalingToChargeDensity(amrex::MultiFab* Rho,
+ int lev);
+#endif
+
void DampPML ();
void DampPML (int lev);
void DampPML (int lev, PatchType patch_type);
@@ -247,6 +257,7 @@ public:
static int do_moving_window;
static int moving_window_dir;
+ static amrex::Real moving_window_v;
// slice generation //
void InitializeSliceMultiFabs ();
@@ -489,17 +500,18 @@ private:
int do_pml = 1;
int pml_ncell = 10;
int pml_delta = 10;
+ amrex::IntVect do_pml_Lo = amrex::IntVect::TheUnitVector();
+ amrex::IntVect do_pml_Hi = amrex::IntVect::TheUnitVector();
amrex::Vector<std::unique_ptr<PML> > pml;
amrex::Real moving_window_x = std::numeric_limits<amrex::Real>::max();
- amrex::Real moving_window_v = std::numeric_limits<amrex::Real>::max();
amrex::Real current_injection_position = 0;
// Plasma injection parameters
int warpx_do_continuous_injection = 0;
int num_injected_species = -1;
amrex::Vector<int> injected_plasma_species;
-
+
int do_electrostatic = 0;
int n_buffer = 4;
amrex::Real const_dt = 0.5e-11;
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 1f5ade13a..1b653fd7f 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -30,6 +30,7 @@ Vector<Real> WarpX::B_external(3, 0.0);
int WarpX::do_moving_window = 0;
int WarpX::moving_window_dir = -1;
+Real WarpX::moving_window_v = std::numeric_limits<amrex::Real>::max();
Real WarpX::gamma_boost = 1.;
Real WarpX::beta_boost = 0.;
@@ -334,7 +335,19 @@ WarpX::ReadParameters ()
"The boosted frame diagnostic currently only works if the boost is in the z direction.");
pp.get("num_snapshots_lab", num_snapshots_lab);
- pp.get("dt_snapshots_lab", dt_snapshots_lab);
+
+ // Read either dz_snapshots_lab or dt_snapshots_lab
+ bool snapshot_interval_is_specified = 0;
+ Real dz_snapshots_lab = 0;
+ snapshot_interval_is_specified += pp.query("dt_snapshots_lab", dt_snapshots_lab);
+ if ( pp.query("dz_snapshots_lab", dz_snapshots_lab) ){
+ dt_snapshots_lab = dz_snapshots_lab/PhysConst::c;
+ snapshot_interval_is_specified = 1;
+ }
+ AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
+ snapshot_interval_is_specified,
+ "When using back-transformed diagnostics, user should specify either dz_snapshots_lab or dt_snapshots_lab.");
+
pp.get("gamma_boost", gamma_boost);
pp.query("do_boosted_frame_fields", do_boosted_frame_fields);
@@ -383,6 +396,22 @@ WarpX::ReadParameters ()
pp.query("pml_ncell", pml_ncell);
pp.query("pml_delta", pml_delta);
+ Vector<int> parse_do_pml_Lo(AMREX_SPACEDIM,1);
+ pp.queryarr("do_pml_Lo", parse_do_pml_Lo);
+ do_pml_Lo[0] = parse_do_pml_Lo[0];
+ do_pml_Lo[1] = parse_do_pml_Lo[1];
+#if (AMREX_SPACEDIM == 3)
+ do_pml_Lo[2] = parse_do_pml_Lo[2];
+#endif
+ Vector<int> parse_do_pml_Hi(AMREX_SPACEDIM,1);
+ pp.queryarr("do_pml_Hi", parse_do_pml_Hi);
+ do_pml_Hi[0] = parse_do_pml_Hi[0];
+ do_pml_Hi[1] = parse_do_pml_Hi[1];
+#if (AMREX_SPACEDIM == 3)
+ do_pml_Hi[2] = parse_do_pml_Hi[2];
+#endif
+
+
pp.query("dump_openpmd", dump_openpmd);
pp.query("dump_plotfiles", dump_plotfiles);
pp.query("plot_raw_fields", plot_raw_fields);
@@ -393,7 +422,7 @@ WarpX::ReadParameters ()
if (not user_fields_to_plot){
// If not specified, set default values
fields_to_plot = {"Ex", "Ey", "Ez", "Bx", "By",
- "Bz", "jx", "jy", "jz",
+ "Bz", "jx", "jy", "jz",
"part_per_cell"};
}
// set plot_rho to true of the users requests it, so that
@@ -411,9 +440,9 @@ WarpX::ReadParameters ()
// If user requests to plot proc_number for a serial run,
// delete proc_number from fields_to_plot
if (ParallelDescriptor::NProcs() == 1){
- fields_to_plot.erase(std::remove(fields_to_plot.begin(),
- fields_to_plot.end(),
- "proc_number"),
+ fields_to_plot.erase(std::remove(fields_to_plot.begin(),
+ fields_to_plot.end(),
+ "proc_number"),
fields_to_plot.end());
}
@@ -497,11 +526,9 @@ WarpX::ReadParameters ()
{
ParmParse pp("algo");
// If not in RZ mode, read use_picsar_deposition
- // In RZ mode, use_picsar_deposition is on, as the C++ version
+ // In RZ mode, use_picsar_deposition is on, as the C++ version
// of the deposition does not support RZ
-#ifndef WARPX_RZ
pp.query("use_picsar_deposition", use_picsar_deposition);
-#endif
current_deposition_algo = GetAlgorithmInteger(pp, "current_deposition");
charge_deposition_algo = GetAlgorithmInteger(pp, "charge_deposition");
field_gathering_algo = GetAlgorithmInteger(pp, "field_gathering");
@@ -876,6 +903,21 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm
rho_cp[lev].reset(new MultiFab(amrex::convert(cba,IntVect::TheUnitVector()),dm,2,ngRho));
rho_cp_owner_masks[lev] = std::move(rho_cp[lev]->OwnerMask(cperiod));
}
+ if (fft_hybrid_mpi_decomposition == false){
+ // Allocate and initialize the spectral solver
+ std::array<Real,3> cdx = CellSize(lev-1);
+ #if (AMREX_SPACEDIM == 3)
+ RealVect cdx_vect(cdx[0], cdx[1], cdx[2]);
+ #elif (AMREX_SPACEDIM == 2)
+ RealVect cdx_vect(cdx[0], cdx[2]);
+ #endif
+ // Get the cell-centered box, with guard cells
+ BoxArray realspace_ba = cba; // Copy box
+ realspace_ba.enclosedCells().grow(ngE); // cell-centered + guard cells
+ // Define spectral solver
+ spectral_solver_cp[lev].reset( new SpectralSolver( realspace_ba, dm,
+ nox_fft, noy_fft, noz_fft, do_nodal, cdx_vect, dt[lev] ) );
+ }
#endif
}
@@ -907,7 +949,7 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm
current_buf[lev][0].reset( new MultiFab(amrex::convert(cba,jx_nodal_flag),dm,1,ngJ));
current_buf[lev][1].reset( new MultiFab(amrex::convert(cba,jy_nodal_flag),dm,1,ngJ));
current_buf[lev][2].reset( new MultiFab(amrex::convert(cba,jz_nodal_flag),dm,1,ngJ));
- if (do_dive_cleaning || plot_rho) {
+ if (rho_cp[lev]) {
charge_buf[lev].reset( new MultiFab(amrex::convert(cba,IntVect::TheUnitVector()),dm,2,ngRho));
}
current_buffer_masks[lev].reset( new iMultiFab(ba, dm, 1, 1) );
@@ -995,7 +1037,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp,
{
Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2];
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
const Real rmin = GetInstance().Geom(0).ProbLo(0);
#endif
@@ -1014,7 +1056,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp,
[=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
{
warpx_computedivb(i, j, k, dcomp, divBfab, Bxfab, Byfab, Bzfab, dxinv, dyinv, dzinv
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
,rmin
#endif
);
@@ -1029,7 +1071,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp,
{
Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2];
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
const Real rmin = GetInstance().Geom(0).ProbLo(0);
#endif
@@ -1048,7 +1090,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp,
[=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
{
warpx_computedivb(i, j, k, dcomp, divBfab, Bxfab, Byfab, Bzfab, dxinv, dyinv, dzinv
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
,rmin
#endif
);
@@ -1063,7 +1105,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp,
{
Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2];
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
const Real rmin = GetInstance().Geom(0).ProbLo(0);
#endif
@@ -1082,7 +1124,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp,
[=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
{
warpx_computedive(i, j, k, dcomp, divEfab, Exfab, Eyfab, Ezfab, dxinv, dyinv, dzinv
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
,rmin
#endif
);
@@ -1097,7 +1139,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp,
{
Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2];
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
const Real rmin = GetInstance().Geom(0).ProbLo(0);
#endif
@@ -1116,7 +1158,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp,
[=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
{
warpx_computedive(i, j, k, dcomp, divEfab, Exfab, Eyfab, Ezfab, dxinv, dyinv, dzinv
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
,rmin
#endif
);