64 files changed, 3765 insertions, 2658 deletions
diff --git a/Source/.DS_Store b/Source/.DS_Store
deleted file mode 100644
index 01640e062..000000000
--- a/Source/.DS_Store
+++ /dev/null
diff --git a/Source/BoundaryConditions/PML.H b/Source/BoundaryConditions/PML.H
index 0cf367284..b34cbe88b 100644
--- a/Source/BoundaryConditions/PML.H
+++ b/Source/BoundaryConditions/PML.H
@@ -6,6 +6,10 @@
 #include <AMReX_MultiFab.H>
 #include <AMReX_Geometry.H>
 
+#ifdef WARPX_USE_PSATD
+#include <SpectralSolver.H>
+#endif
+
 #if (AMREX_SPACEDIM == 3)
 
 #define WRPX_PML_TO_FORTRAN(x)                              \
@@ -93,7 +97,12 @@ class PML
 public:
     PML (const amrex::BoxArray& ba, const amrex::DistributionMapping& dm,
          const amrex::Geometry* geom, const amrex::Geometry* cgeom,
-         int ncell, int delta, int ref_ratio, int do_dive_cleaning, int do_moving_window);
+         int ncell, int delta, int ref_ratio,
+#ifdef WARPX_USE_PSATD
+         amrex::Real dt, int nox_fft, int noy_fft, int noz_fft, bool do_nodal,
+#endif
+         int do_dive_cleaning, int do_moving_window,
+         const amrex::IntVect do_pml_Lo, const amrex::IntVect do_pml_Hi);
 
     void ComputePMLFactors (amrex::Real dt);
 
@@ -111,6 +120,10 @@ public:
     const MultiSigmaBox& GetMultiSigmaBox_cp () const
         { return *sigba_cp; }
 
+#ifdef WARPX_USE_PSATD
+    void PushPSATD ();
+#endif
+
     void ExchangeB (const std::array<amrex::MultiFab*,3>& B_fp,
                     const std::array<amrex::MultiFab*,3>& B_cp);
     void ExchangeE (const std::array<amrex::MultiFab*,3>& E_fp,
@@ -154,10 +167,23 @@ private:
     std::unique_ptr<MultiSigmaBox> sigba_fp;
     std::unique_ptr<MultiSigmaBox> sigba_cp;
 
+#ifdef WARPX_USE_PSATD
+    std::unique_ptr<SpectralSolver> spectral_solver_fp;
+    std::unique_ptr<SpectralSolver> spectral_solver_cp;
+#endif
+
     static amrex::BoxArray MakeBoxArray (const amrex::Geometry& geom,
-                                         const amrex::BoxArray& grid_ba, int ncell);
+                                         const amrex::BoxArray& grid_ba, int ncell,
+                                         const amrex::IntVect do_pml_Lo,
+                                         const amrex::IntVect do_pml_Hi);
 
     static void Exchange (amrex::MultiFab& pml, amrex::MultiFab& reg, const amrex::Geometry& geom);
 };
 
+#ifdef WARPX_USE_PSATD
+void PushPMLPSATDSinglePatch( SpectralSolver& solver,
+    std::array<std::unique_ptr<amrex::MultiFab>,3>& pml_E,
+    std::array<std::unique_ptr<amrex::MultiFab>,3>& pml_B );
+#endif
+
 #endif
diff --git a/Source/BoundaryConditions/PML.cpp b/Source/BoundaryConditions/PML.cpp
index f780f335c..21d348482 100644
--- a/Source/BoundaryConditions/PML.cpp
+++ b/Source/BoundaryConditions/PML.cpp
@@ -258,14 +258,7 @@ SigmaBox::ComputePMLFactorsB (const Real* dx, Real dt)
     {
         for (int i = 0, N = sigma_star[idim].size(); i < N; ++i)
         {
-            if (sigma_star[idim][i] == 0.0)
-            {
-                sigma_star_fac[idim][i] = 1.0;
-            }
-            else
-            {
-                sigma_star_fac[idim][i] = std::exp(-sigma_star[idim][i]*dt);
-            }
+            sigma_star_fac[idim][i] = std::exp(-sigma_star[idim][i]*dt);
         }
     }
 }
@@ -277,14 +270,7 @@ SigmaBox::ComputePMLFactorsE (const Real* dx, Real dt)
     {
         for (int i = 0, N = sigma[idim].size(); i < N; ++i)
         {
-            if (sigma[idim][i] == 0.0)
-            {
-                sigma_fac[idim][i] = 1.0;
-            }
-            else
-            {
-                sigma_fac[idim][i] = std::exp(-sigma[idim][i]*dt);
-            }
+            sigma_fac[idim][i] = std::exp(-sigma[idim][i]*dt);
         }
     }
 }
@@ -329,11 +315,16 @@ MultiSigmaBox::ComputePMLFactorsE (const Real* dx, Real dt)
 
 PML::PML (const BoxArray& grid_ba, const DistributionMapping& grid_dm,
           const Geometry* geom, const Geometry* cgeom,
-          int ncell, int delta, int ref_ratio, int do_dive_cleaning, int do_moving_window)
+          int ncell, int delta, int ref_ratio,
+#ifdef WARPX_USE_PSATD
+          Real dt, int nox_fft, int noy_fft, int noz_fft, bool do_nodal,
+#endif
+          int do_dive_cleaning, int do_moving_window,
+          const amrex::IntVect do_pml_Lo, const amrex::IntVect do_pml_Hi)
     : m_geom(geom),
       m_cgeom(cgeom)
 {
-    const BoxArray& ba = MakeBoxArray(*geom, grid_ba, ncell);
+    const BoxArray& ba = MakeBoxArray(*geom, grid_ba, ncell, do_pml_Lo, do_pml_Hi);
     if (ba.size() == 0) {
         m_ok = false;
         return;
@@ -343,10 +334,30 @@ PML::PML (const BoxArray& grid_ba, const DistributionMapping& grid_dm,
 
     DistributionMapping dm{ba};
 
-    int nge = 2;
-    int ngb = 2;
-    int ngf = (do_moving_window) ? 2 : 0;
-    if (WarpX::maxwell_fdtd_solver_id == 1) ngf = std::max( ngf, 1 );
+    // Define the number of guard cells in each direction, for E, B, and F
+    IntVect nge = IntVect(AMREX_D_DECL(2, 2, 2));
+    IntVect ngb = IntVect(AMREX_D_DECL(2, 2, 2));
+    int ngf_int = (do_moving_window) ? 2 : 0;
+    if (WarpX::maxwell_fdtd_solver_id == 1) ngf_int = std::max( ngf_int, 1 );
+    IntVect ngf = IntVect(AMREX_D_DECL(ngf_int, ngf_int, ngf_int));
+#ifdef WARPX_USE_PSATD
+    // Increase the number of guard cells, in order to fit the extent
+    // of the stencil for the spectral solver
+    IntVect ngFFT;
+    if (do_nodal) {
+        ngFFT = IntVect(AMREX_D_DECL(nox_fft, noy_fft, noz_fft));
+    } else {
+        ngFFT = IntVect(AMREX_D_DECL(nox_fft/2, noy_fft/2, noz_fft/2));
+    }
+    // Set the number of guard cells to the maximum of each field
+    // (all fields should have the same number of guard cells)
+    ngFFT = ngFFT.max(nge);
+    ngFFT = ngFFT.max(ngb);
+    ngFFT = ngFFT.max(ngf);
+    nge = ngFFT;
+    ngb = ngFFT;
+    ngf = ngFFT;
+ #endif
 
     pml_E_fp[0].reset(new MultiFab(amrex::convert(ba,WarpX::Ex_nodal_flag), dm, 3, nge));
     pml_E_fp[1].reset(new MultiFab(amrex::convert(ba,WarpX::Ey_nodal_flag), dm, 3, nge));
@@ -370,15 +381,26 @@ PML::PML (const BoxArray& grid_ba, const DistributionMapping& grid_dm,
 
     sigba_fp.reset(new MultiSigmaBox(ba, dm, grid_ba, geom->CellSize(), ncell, delta));
 
+#ifdef WARPX_USE_PSATD
+    const bool in_pml = true; // Tells spectral solver to use split-PML equations
+    const RealVect dx{AMREX_D_DECL(geom->CellSize(0), geom->CellSize(1), geom->CellSize(2))};
+    // Get the cell-centered box, with guard cells
+    BoxArray realspace_ba = ba;  // Copy box
+    realspace_ba.enclosedCells().grow(nge); // cell-centered + guard cells
+    spectral_solver_fp.reset( new SpectralSolver( realspace_ba, dm,
+        nox_fft, noy_fft, noz_fft, do_nodal, dx, dt, in_pml ) );
+#endif
+
     if (cgeom)
     {
-
-        nge = 1;
-        ngb = 1;
+#ifndef WARPX_USE_PSATD
+        nge = IntVect(AMREX_D_DECL(1, 1, 1));
+        ngb = IntVect(AMREX_D_DECL(1, 1, 1));
+#endif
 
         BoxArray grid_cba = grid_ba;
         grid_cba.coarsen(ref_ratio);
-        const BoxArray& cba = MakeBoxArray(*cgeom, grid_cba, ncell);
+        const BoxArray& cba = MakeBoxArray(*cgeom, grid_cba, ncell, do_pml_Lo, do_pml_Hi);
 
         DistributionMapping cdm{cba};
 
@@ -403,17 +425,32 @@ PML::PML (const BoxArray& grid_ba, const DistributionMapping& grid_dm,
         }
 
         sigba_cp.reset(new MultiSigmaBox(cba, cdm, grid_cba, cgeom->CellSize(), ncell, delta));
-    }
 
+#ifdef WARPX_USE_PSATD
+        const bool in_pml = true; // Tells spectral solver to use split-PML equations
+        const RealVect cdx{AMREX_D_DECL(cgeom->CellSize(0), cgeom->CellSize(1), cgeom->CellSize(2))};
+        // Get the cell-centered box, with guard cells
+        BoxArray realspace_cba = cba;  // Copy box
+        realspace_cba.enclosedCells().grow(nge); // cell-centered + guard cells
+        spectral_solver_cp.reset( new SpectralSolver( realspace_cba, cdm,
+            nox_fft, noy_fft, noz_fft, do_nodal, cdx, dt, in_pml ) );
+#endif
+    }
 }
 
 BoxArray
-PML::MakeBoxArray (const amrex::Geometry& geom, const amrex::BoxArray& grid_ba, int ncell)
+PML::MakeBoxArray (const amrex::Geometry& geom, const amrex::BoxArray& grid_ba, int ncell,
+                   const amrex::IntVect do_pml_Lo, const amrex::IntVect do_pml_Hi)
 {
     Box domain = geom.Domain();
     for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
         if ( ! geom.isPeriodic(idim) ) {
-            domain.grow(idim, ncell);
+            if (do_pml_Lo[idim]){
+                domain.growLo(idim, ncell);
+            }
+            if (do_pml_Hi[idim]){
+                domain.growHi(idim, ncell);
+            }
         }
     }
 
@@ -753,3 +790,57 @@ PML::Restart (const std::string& dir)
         VisMF::Read(*pml_B_cp[2], dir+"_Bz_cp");
     }
 }
+
+#ifdef WARPX_USE_PSATD
+void
+PML::PushPSATD () {
+
+    // Update the fields on the fine and coarse patch
+    PushPMLPSATDSinglePatch( *spectral_solver_fp, pml_E_fp, pml_B_fp );
+    if (spectral_solver_cp) {
+        PushPMLPSATDSinglePatch( *spectral_solver_cp, pml_E_cp, pml_B_cp );
+    }
+}
+
+void
+PushPMLPSATDSinglePatch (
+    SpectralSolver& solver,
+    std::array<std::unique_ptr<amrex::MultiFab>,3>& pml_E,
+    std::array<std::unique_ptr<amrex::MultiFab>,3>& pml_B ) {
+
+    using Idx = SpectralPMLIndex;
+
+    // Perform forward Fourier transform
+    // Note: the correspondance between the spectral PML index
+    // (Exy, Ezx, etc.) and the component (0 or 1) of the
+    // MultiFabs (e.g. pml_E) is dictated by the
+    // function that damps the PML
+    solver.ForwardTransform(*pml_E[0], Idx::Exy, 0);
+    solver.ForwardTransform(*pml_E[0], Idx::Exz, 1);
+    solver.ForwardTransform(*pml_E[1], Idx::Eyz, 0);
+    solver.ForwardTransform(*pml_E[1], Idx::Eyx, 1);
+    solver.ForwardTransform(*pml_E[2], Idx::Ezx, 0);
+    solver.ForwardTransform(*pml_E[2], Idx::Ezy, 1);
+    solver.ForwardTransform(*pml_B[0], Idx::Bxy, 0);
+    solver.ForwardTransform(*pml_B[0], Idx::Bxz, 1);
+    solver.ForwardTransform(*pml_B[1], Idx::Byz, 0);
+    solver.ForwardTransform(*pml_B[1], Idx::Byx, 1);
+    solver.ForwardTransform(*pml_B[2], Idx::Bzx, 0);
+    solver.ForwardTransform(*pml_B[2], Idx::Bzy, 1);
+    // Advance fields in spectral space
+    solver.pushSpectralFields();
+    // Perform backward Fourier Transform
+    solver.BackwardTransform(*pml_E[0], Idx::Exy, 0);
+    solver.BackwardTransform(*pml_E[0], Idx::Exz, 1);
+    solver.BackwardTransform(*pml_E[1], Idx::Eyz, 0);
+    solver.BackwardTransform(*pml_E[1], Idx::Eyx, 1);
+    solver.BackwardTransform(*pml_E[2], Idx::Ezx, 0);
+    solver.BackwardTransform(*pml_E[2], Idx::Ezy, 1);
+    solver.BackwardTransform(*pml_B[0], Idx::Bxy, 0);
+    solver.BackwardTransform(*pml_B[0], Idx::Bxz, 1);
+    solver.BackwardTransform(*pml_B[1], Idx::Byz, 0);
+    solver.BackwardTransform(*pml_B[1], Idx::Byx, 1);
+    solver.BackwardTransform(*pml_B[2], Idx::Bzx, 0);
+    solver.BackwardTransform(*pml_B[2], Idx::Bzy, 1);
+}
+#endif
diff --git a/Source/Diagnostics/ParticleIO.cpp b/Source/Diagnostics/ParticleIO.cpp
index f2a543ed5..f159e5302 100644
--- a/Source/Diagnostics/ParticleIO.cpp
+++ b/Source/Diagnostics/ParticleIO.cpp
@@ -98,7 +98,7 @@ MultiParticleContainer::WritePlotFile (const std::string& dir) const
             real_names.push_back("By");
             real_names.push_back("Bz");
             
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
             real_names.push_back("theta");
 #endif
             
diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp
index 32a4747db..16b5905d1 100644
--- a/Source/Evolve/WarpXEvolveEM.cpp
+++ b/Source/Evolve/WarpXEvolveEM.cpp
@@ -299,6 +299,7 @@ WarpX::OneStep_nosub (Real cur_time)
     // (And update guard cells immediately afterwards)
 #ifdef WARPX_USE_PSATD
     PushPSATD(dt[0]);
+    if (do_pml) DampPML();
     FillBoundaryE();
     FillBoundaryB();
 #else
@@ -481,6 +482,19 @@ WarpX::PushParticlesandDepose (int lev, Real cur_time)
                  Efield_cax[lev][0].get(), Efield_cax[lev][1].get(), Efield_cax[lev][2].get(),
                  Bfield_cax[lev][0].get(), Bfield_cax[lev][1].get(), Bfield_cax[lev][2].get(),
                  cur_time, dt[lev]);
+#ifdef WARPX_DIM_RZ
+    // This is called after all particles have deposited their current and charge.
+    ApplyInverseVolumeScalingToCurrentDensity(current_fp[lev][0].get(), current_fp[lev][1].get(), current_fp[lev][2].get(), lev);
+    if (current_buf[lev][0].get()) {
+        ApplyInverseVolumeScalingToCurrentDensity(current_buf[lev][0].get(), current_buf[lev][1].get(), current_buf[lev][2].get(), lev-1);
+    }
+    if (rho_fp[lev].get()) {
+        ApplyInverseVolumeScalingToChargeDensity(rho_fp[lev].get(), lev);
+        if (charge_buf[lev].get()) {
+            ApplyInverseVolumeScalingToChargeDensity(charge_buf[lev].get(), lev-1);
+        }
+    }
+#endif
 }
 
 void
@@ -491,7 +505,7 @@ WarpX::ComputeDt ()
 
     if (maxwell_fdtd_solver_id == 0) {
         // CFL time step Yee solver
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
         // Derived semi-analytically by R. Lehe
         deltat  = cfl * 1./( std::sqrt((1+0.2105)/(dx[0]*dx[0]) + 1./(dx[1]*dx[1])) * PhysConst::c );
 #else
@@ -536,10 +550,7 @@ WarpX::computeMaxStepBoostAccelerator(amrex::Geometry a_geom){
         WarpX::moving_window_dir == AMREX_SPACEDIM-1,
         "Can use zmax_plasma_to_compute_max_step only if " +
         "moving window along z. TODO: all directions.");
-    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
-        maxLevel() == 0,
-        "Can use zmax_plasma_to_compute_max_step only if " +
-        "max level = 0.");
+
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
         (WarpX::boost_direction[0]-0)*(WarpX::boost_direction[0]-0) +
         (WarpX::boost_direction[1]-0)*(WarpX::boost_direction[1]-0) +
@@ -560,7 +571,12 @@ WarpX::computeMaxStepBoostAccelerator(amrex::Geometry a_geom){
     const Real interaction_time_boost = (len_plasma_boost-zmin_domain_boost)/
         (moving_window_v-v_plasma_boost);
     // Divide by dt, and update value of max_step.
-    const int computed_max_step = interaction_time_boost/dt[0];
+    int computed_max_step;
+    if (do_subcycling){
+        computed_max_step = interaction_time_boost/dt[0];
+    } else {
+        computed_max_step = interaction_time_boost/dt[maxLevel()];
+    }
     max_step = computed_max_step;
     Print()<<"max_step computed in computeMaxStepBoostAccelerator: "
            <<computed_max_step<<std::endl;
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/Make.package b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/Make.package
index c62c21f44..ee8376865 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/Make.package
+++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/Make.package
@@ -1,6 +1,8 @@
 CEXE_headers += SpectralBaseAlgorithm.H
 CEXE_headers += PsatdAlgorithm.H
 CEXE_sources += PsatdAlgorithm.cpp
+CEXE_headers += PMLPsatdAlgorithm.H
+CEXE_sources += PMLPsatdAlgorithm.cpp
 
 INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/FieldSolver/SpectralSolver/SpectralAlgorithms
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/FieldSolver/SpectralSolver/SpectralAlgorithms
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.H b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.H
new file mode 100644
index 000000000..a2511b6b7
--- /dev/null
+++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.H
@@ -0,0 +1,34 @@
+#ifndef WARPX_PML_PSATD_ALGORITHM_H_
+#define WARPX_PML_PSATD_ALGORITHM_H_
+
+#include <SpectralBaseAlgorithm.H>
+
+/* \brief Class that updates the field in spectral space
+ * and stores the coefficients of the corresponding update equation.
+ */
+class PMLPsatdAlgorithm : public SpectralBaseAlgorithm
+{
+    public:
+        PMLPsatdAlgorithm(const SpectralKSpace& spectral_kspace,
+                         const amrex::DistributionMapping& dm,
+                         const int norder_x, const int norder_y,
+                         const int norder_z, const bool nodal,
+                         const amrex::Real dt);
+
+        void InitializeSpectralCoefficients(
+	    const SpectralKSpace& spectral_kspace,
+	    const amrex::DistributionMapping& dm, 
+	    const amrex::Real dt);
+
+        // Redefine functions from base class
+        virtual void pushSpectralFields(SpectralFieldData& f) const override final;
+        virtual int getRequiredNumberOfFields() const override final {
+            return SpectralPMLIndex::n_fields;
+        }
+
+    private:
+        SpectralCoefficients C_coef, S_ck_coef;
+
+};
+
+#endif // WARPX_PML_PSATD_ALGORITHM_H_
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.cpp b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.cpp
new file mode 100644
index 000000000..d76259d4c
--- /dev/null
+++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.cpp
@@ -0,0 +1,146 @@
+#include <PMLPsatdAlgorithm.H>
+#include <WarpXConst.H>
+#include <cmath>
+
+using namespace amrex;
+
+/* \brief Initialize coefficients for the update equation */
+PMLPsatdAlgorithm::PMLPsatdAlgorithm(
+                         const SpectralKSpace& spectral_kspace,
+                         const DistributionMapping& dm,
+                         const int norder_x, const int norder_y,
+                         const int norder_z, const bool nodal, const Real dt)
+     // Initialize members of base class
+     : SpectralBaseAlgorithm( spectral_kspace, dm,
+                              norder_x, norder_y, norder_z, nodal )
+{
+    const BoxArray& ba = spectral_kspace.spectralspace_ba;
+
+    // Allocate the arrays of coefficients
+    C_coef = SpectralCoefficients(ba, dm, 1, 0);
+    S_ck_coef = SpectralCoefficients(ba, dm, 1, 0);
+
+    InitializeSpectralCoefficients(spectral_kspace, dm, dt);
+}
+
+/* Advance the E and B field in spectral space (stored in `f`)
+ * over one time step */
+void
+PMLPsatdAlgorithm::pushSpectralFields(SpectralFieldData& f) const{
+
+    // Loop over boxes
+    for (MFIter mfi(f.fields); mfi.isValid(); ++mfi){
+
+        const Box& bx = f.fields[mfi].box();
+
+        // Extract arrays for the fields to be updated
+        Array4<Complex> fields = f.fields[mfi].array();
+        // Extract arrays for the coefficients
+        Array4<const Real> C_arr = C_coef[mfi].array();
+        Array4<const Real> S_ck_arr = S_ck_coef[mfi].array();
+        // Extract pointers for the k vectors
+        const Real* modified_kx_arr = modified_kx_vec[mfi].dataPtr();
+#if (AMREX_SPACEDIM==3)
+        const Real* modified_ky_arr = modified_ky_vec[mfi].dataPtr();
+#endif
+        const Real* modified_kz_arr = modified_kz_vec[mfi].dataPtr();
+
+        // Loop over indices within one box
+        ParallelFor(bx,
+        [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
+        {
+            // Record old values of the fields to be updated
+            using Idx = SpectralPMLIndex;
+            const Complex Ex_old = fields(i,j,k,Idx::Exy) \
+                                 + fields(i,j,k,Idx::Exz);
+            const Complex Ey_old = fields(i,j,k,Idx::Eyx) \
+                                 + fields(i,j,k,Idx::Eyz);
+            const Complex Ez_old = fields(i,j,k,Idx::Ezx) \
+                                 + fields(i,j,k,Idx::Ezy);
+            const Complex Bx_old = fields(i,j,k,Idx::Bxy) \
+                                 + fields(i,j,k,Idx::Bxz);
+            const Complex By_old = fields(i,j,k,Idx::Byx) \
+                                 + fields(i,j,k,Idx::Byz);
+            const Complex Bz_old = fields(i,j,k,Idx::Bzx) \
+                                 + fields(i,j,k,Idx::Bzy);
+            // k vector values, and coefficients
+            const Real kx = modified_kx_arr[i];
+#if (AMREX_SPACEDIM==3)
+            const Real ky = modified_ky_arr[j];
+            const Real kz = modified_kz_arr[k];
+#else
+            constexpr Real ky = 0;
+            const Real kz = modified_kz_arr[j];
+#endif
+            constexpr Real c2 = PhysConst::c*PhysConst::c;
+            const Complex I = Complex{0,1};
+            const Real C = C_arr(i,j,k);
+            const Real S_ck = S_ck_arr(i,j,k);
+
+            // Update E
+            fields(i,j,k,Idx::Exy) = C*fields(i,j,k,Idx::Exy) + S_ck*c2*I*ky*Bz_old;
+            fields(i,j,k,Idx::Exz) = C*fields(i,j,k,Idx::Exz) - S_ck*c2*I*kz*By_old;
+            fields(i,j,k,Idx::Eyz) = C*fields(i,j,k,Idx::Eyz) + S_ck*c2*I*kz*Bx_old;
+            fields(i,j,k,Idx::Eyx) = C*fields(i,j,k,Idx::Eyx) - S_ck*c2*I*kx*Bz_old;
+            fields(i,j,k,Idx::Ezx) = C*fields(i,j,k,Idx::Ezx) + S_ck*c2*I*kx*By_old;
+            fields(i,j,k,Idx::Ezy) = C*fields(i,j,k,Idx::Ezy) - S_ck*c2*I*ky*Bx_old;
+            // Update B
+            fields(i,j,k,Idx::Bxy) = C*fields(i,j,k,Idx::Bxy) - S_ck*I*ky*Ez_old;
+            fields(i,j,k,Idx::Bxz) = C*fields(i,j,k,Idx::Bxz) + S_ck*I*kz*Ey_old;
+            fields(i,j,k,Idx::Byz) = C*fields(i,j,k,Idx::Byz) - S_ck*I*kz*Ex_old;
+            fields(i,j,k,Idx::Byx) = C*fields(i,j,k,Idx::Byx) + S_ck*I*kx*Ez_old;
+            fields(i,j,k,Idx::Bzx) = C*fields(i,j,k,Idx::Bzx) - S_ck*I*kx*Ey_old;
+            fields(i,j,k,Idx::Bzy) = C*fields(i,j,k,Idx::Bzy) + S_ck*I*ky*Ex_old;
+        });
+    }
+};
+
+void PMLPsatdAlgorithm::InitializeSpectralCoefficients (
+    const SpectralKSpace& spectral_kspace,
+    const amrex::DistributionMapping& dm,
+    const amrex::Real dt)
+{
+    const BoxArray& ba = spectral_kspace.spectralspace_ba;
+    // Fill them with the right values:
+    // Loop over boxes and allocate the corresponding coefficients
+    // for each box owned by the local MPI proc
+    for (MFIter mfi(ba, dm); mfi.isValid(); ++mfi){
+
+        const Box& bx = ba[mfi];
+
+        // Extract pointers for the k vectors
+        const Real* modified_kx = modified_kx_vec[mfi].dataPtr();
+#if (AMREX_SPACEDIM==3)
+        const Real* modified_ky = modified_ky_vec[mfi].dataPtr();
+#endif
+        const Real* modified_kz = modified_kz_vec[mfi].dataPtr();
+        // Extract arrays for the coefficients
+        Array4<Real> C = C_coef[mfi].array();
+        Array4<Real> S_ck = S_ck_coef[mfi].array();
+
+        // Loop over indices within one box
+        ParallelFor(bx,
+        [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
+        {
+            // Calculate norm of vector
+            const Real k_norm = std::sqrt(
+                std::pow(modified_kx[i], 2) +
+#if (AMREX_SPACEDIM==3)
+                std::pow(modified_ky[j], 2) +
+                std::pow(modified_kz[k], 2));
+#else
+                std::pow(modified_kz[j], 2));
+#endif
+
+            // Calculate coefficients
+            constexpr Real c = PhysConst::c;
+            if (k_norm != 0){
+                C(i,j,k) = std::cos(c*k_norm*dt);
+                S_ck(i,j,k) = std::sin(c*k_norm*dt)/(c*k_norm);
+            } else { // Handle k_norm = 0, by using the analytical limit
+                C(i,j,k) = 1.;
+                S_ck(i,j,k) = dt;
+            }
+        });
+    }
+};
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H
index 12718e38b..825d04dc2 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H
@@ -13,14 +13,18 @@ class PsatdAlgorithm : public SpectralBaseAlgorithm
         PsatdAlgorithm(const SpectralKSpace& spectral_kspace,
                          const amrex::DistributionMapping& dm,
                          const int norder_x, const int norder_y,
-                         const int norder_z, const bool nodal, const amrex::Real dt);
-
+                         const int norder_z, const bool nodal,
+                         const amrex::Real dt);
+        // Redefine functions from base class
+        virtual void pushSpectralFields(SpectralFieldData& f) const override final;
+        virtual int getRequiredNumberOfFields() const override final {
+            return SpectralFieldIndex::n_fields;
+        }
+        
         void InitializeSpectralCoefficients(const SpectralKSpace& spectral_kspace,
-                                    const amrex::DistributionMapping& dm, 
+                                    const amrex::DistributionMapping& dm,
                                     const amrex::Real dt);
 
-        void pushSpectralFields(SpectralFieldData& f) const override final;
-
     private:
         SpectralCoefficients C_coef, S_ck_coef, X1_coef, X2_coef, X3_coef;
 };
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithm.H b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithm.H
index 602eb2473..5d5e376c1 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithm.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithm.H
@@ -14,9 +14,9 @@
 class SpectralBaseAlgorithm
 {
     public:
-        // Member function that updates the fields in spectral space ;
-        // meant to be overridden in subclasses
+        // Virtual member function ; meant to be overridden in subclasses
         virtual void pushSpectralFields(SpectralFieldData& f) const = 0;
+        virtual int getRequiredNumberOfFields() const = 0;
         // The destructor should also be a virtual function, so that
         // a pointer to subclass of `SpectraBaseAlgorithm` actually
         // calls the subclass's destructor.
diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.H b/Source/FieldSolver/SpectralSolver/SpectralFieldData.H
index 7954414b8..6a2446981 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.H
@@ -8,18 +8,24 @@
 // Declare type for spectral fields
 using SpectralField = amrex::FabArray< amrex::BaseFab <Complex> >;
 
-/* Index for the fields that will be stored in spectral space */
+/* Index for the regular fields, when stored in spectral space */
 struct SpectralFieldIndex {
   enum { Ex=0, Ey, Ez, Bx, By, Bz, Jx, Jy, Jz, rho_old, rho_new, n_fields };
   // n_fields is automatically the total number of fields
 };
 
+/* Index for the PML fields, when stored in spectral space */
+struct SpectralPMLIndex {
+  enum { Exy=0, Exz, Eyx, Eyz, Ezx, Ezy,
+           Bxy, Bxz, Byx, Byz, Bzx, Bzy, n_fields };
+  // n_fields is automatically the total number of fields
+};
+
 /* \brief Class that stores the fields in spectral space, and performs the
  *  Fourier transforms between real space and spectral space
  */
 class SpectralFieldData
 {
-    friend class PsatdAlgorithm;
 
     // Define the FFTplans type, which holds one fft plan per box
     // (plans are only initialized for the boxes that are owned by
@@ -32,8 +38,9 @@ class SpectralFieldData
 
     public:
         SpectralFieldData( const amrex::BoxArray& realspace_ba,
-                      const SpectralKSpace& k_space,
-                      const amrex::DistributionMapping& dm );
+                           const SpectralKSpace& k_space,
+                           const amrex::DistributionMapping& dm,
+                           const int n_field_required );
         SpectralFieldData() = default; // Default constructor
         SpectralFieldData& operator=(SpectralFieldData&& field_data) = default;
         ~SpectralFieldData();
@@ -41,10 +48,10 @@ class SpectralFieldData
                                const int field_index, const int i_comp);
         void BackwardTransform( amrex::MultiFab& mf,
                                const int field_index, const int i_comp);
-
-    private:
         // `fields` stores fields in spectral space, as multicomponent FabArray
         SpectralField fields;
+
+    private:
         // tmpRealField and tmpSpectralField store fields
         // right before/after the Fourier transform
         SpectralField tmpSpectralField; // contains Complexs
diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
index 948baf0a6..8f0853484 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
@@ -5,14 +5,14 @@ using namespace amrex;
 /* \brief Initialize fields in spectral space, and FFT plans */
 SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba,
                             const SpectralKSpace& k_space,
-                            const DistributionMapping& dm )
+                            const DistributionMapping& dm,
+                            const int n_field_required )
 {
     const BoxArray& spectralspace_ba = k_space.spectralspace_ba;
 
     // Allocate the arrays that contain the fields in spectral space
     // (one component per field)
-    fields = SpectralField(spectralspace_ba, dm,
-                            SpectralFieldIndex::n_fields, 0);
+    fields = SpectralField(spectralspace_ba, dm, n_field_required, 0);
 
     // Allocate temporary arrays - in real space and spectral space
     // These arrays will store the data just before/after the FFT
diff --git a/Source/FieldSolver/SpectralSolver/SpectralSolver.H b/Source/FieldSolver/SpectralSolver/SpectralSolver.H
index d4019a9a3..c570b017b 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralSolver.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralSolver.H
@@ -23,7 +23,8 @@ class SpectralSolver
                         const amrex::DistributionMapping& dm,
                         const int norder_x, const int norder_y,
                         const int norder_z, const bool nodal,
-                        const amrex::RealVect dx, const amrex::Real dt );
+                        const amrex::RealVect dx, const amrex::Real dt,
+                        const bool pml=false );
 
         /* \brief Transform the component `i_comp` of MultiFab `mf`
          *  to spectral space, and store the corresponding result internally
diff --git a/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp b/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp
index c21c3cfb1..4b9def013 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp
@@ -1,19 +1,29 @@
 #include <SpectralKSpace.H>
 #include <SpectralSolver.H>
 #include <PsatdAlgorithm.H>
+#include <PMLPsatdAlgorithm.H>
 
 /* \brief Initialize the spectral Maxwell solver
  *
  * This function selects the spectral algorithm to be used, allocates the
  * corresponding coefficients for the discretized field update equation,
  * and prepares the structures that store the fields in spectral space.
+ *
+ * \param norder_x Order of accuracy of the spatial derivatives along x
+ * \param norder_y Order of accuracy of the spatial derivatives along y
+ * \param norder_z Order of accuracy of the spatial derivatives along z
+ * \param nodal    Whether the solver is applied to a nodal or staggered grid
+ * \param dx       Cell size along each dimension
+ * \param dt       Time step
+ * \param pml      Whether the boxes in which the solver is applied are PML boxes
  */
 SpectralSolver::SpectralSolver(
                 const amrex::BoxArray& realspace_ba,
                 const amrex::DistributionMapping& dm,
                 const int norder_x, const int norder_y,
                 const int norder_z, const bool nodal,
-                const amrex::RealVect dx, const amrex::Real dt ) {
+                const amrex::RealVect dx, const amrex::Real dt,
+                const bool pml ) {
 
     // Initialize all structures using the same distribution mapping dm
 
@@ -24,12 +34,16 @@ SpectralSolver::SpectralSolver(
 
     // - Select the algorithm depending on the input parameters
     //   Initialize the corresponding coefficients over k space
-    // TODO: Add more algorithms + selection depending on input parameters
-    //       For the moment, this only uses the standard PsatdAlgorithm
-    algorithm = std::unique_ptr<PsatdAlgorithm>( new PsatdAlgorithm(
+    if (pml) {
+        algorithm = std::unique_ptr<PMLPsatdAlgorithm>( new PMLPsatdAlgorithm(
+            k_space, dm, norder_x, norder_y, norder_z, nodal, dt ) );
+    } else {
+        algorithm = std::unique_ptr<PsatdAlgorithm>( new PsatdAlgorithm(
             k_space, dm, norder_x, norder_y, norder_z, nodal, dt ) );
+    }
 
     // - Initialize arrays for fields in spectral space + FFT plans
-    field_data = SpectralFieldData( realspace_ba, k_space, dm );
+    field_data = SpectralFieldData( realspace_ba, k_space, dm,
+            algorithm->getRequiredNumberOfFields() );
 
 };
diff --git a/Source/FieldSolver/WarpXPushFieldsEM.cpp b/Source/FieldSolver/WarpXPushFieldsEM.cpp
index 4fce4717b..1df05bc0f 100644
--- a/Source/FieldSolver/WarpXPushFieldsEM.cpp
+++ b/Source/FieldSolver/WarpXPushFieldsEM.cpp
@@ -18,6 +18,40 @@
 using namespace amrex;
 
 #ifdef WARPX_USE_PSATD
+namespace {
+    void
+    PushPSATDSinglePatch (
+        SpectralSolver& solver,
+        std::array<std::unique_ptr<amrex::MultiFab>,3>& Efield,
+        std::array<std::unique_ptr<amrex::MultiFab>,3>& Bfield,
+        std::array<std::unique_ptr<amrex::MultiFab>,3>& current,
+        std::unique_ptr<amrex::MultiFab>& rho ) {
+
+        using Idx = SpectralFieldIndex;
+
+        // Perform forward Fourier transform
+        solver.ForwardTransform(*Efield[0], Idx::Ex);
+        solver.ForwardTransform(*Efield[1], Idx::Ey);
+        solver.ForwardTransform(*Efield[2], Idx::Ez);
+        solver.ForwardTransform(*Bfield[0], Idx::Bx);
+        solver.ForwardTransform(*Bfield[1], Idx::By);
+        solver.ForwardTransform(*Bfield[2], Idx::Bz);
+        solver.ForwardTransform(*current[0], Idx::Jx);
+        solver.ForwardTransform(*current[1], Idx::Jy);
+        solver.ForwardTransform(*current[2], Idx::Jz);
+        solver.ForwardTransform(*rho, Idx::rho_old, 0);
+        solver.ForwardTransform(*rho, Idx::rho_new, 1);
+        // Advance fields in spectral space
+        solver.pushSpectralFields();
+        // Perform backward Fourier Transform
+        solver.BackwardTransform(*Efield[0], Idx::Ex);
+        solver.BackwardTransform(*Efield[1], Idx::Ey);
+        solver.BackwardTransform(*Efield[2], Idx::Ez);
+        solver.BackwardTransform(*Bfield[0], Idx::Bx);
+        solver.BackwardTransform(*Bfield[1], Idx::By);
+        solver.BackwardTransform(*Bfield[2], Idx::Bz);
+    }
+}
 
 void
 WarpX::PushPSATD (amrex::Real a_dt)
@@ -31,38 +65,25 @@ WarpX::PushPSATD (amrex::Real a_dt)
         } else {
             PushPSATD_localFFT(lev, a_dt);
         }
+
+        // Evolve the fields in the PML boxes
+        if (do_pml && pml[lev]->ok()) {
+            pml[lev]->PushPSATD();
+        }
     }
 }
 
-void WarpX::PushPSATD_localFFT (int lev, amrex::Real /* dt */)
+void
+WarpX::PushPSATD_localFFT (int lev, amrex::Real /* dt */)
 {
-    auto& solver = *spectral_solver_fp[lev];
-
-    // Perform forward Fourier transform
-    solver.ForwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex);
-    solver.ForwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey);
-    solver.ForwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez);
-    solver.ForwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx);
-    solver.ForwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By);
-    solver.ForwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz);
-    solver.ForwardTransform(*current_fp[lev][0], SpectralFieldIndex::Jx);
-    solver.ForwardTransform(*current_fp[lev][1], SpectralFieldIndex::Jy);
-    solver.ForwardTransform(*current_fp[lev][2], SpectralFieldIndex::Jz);
-    solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_old, 0);
-    solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_new, 1);
-
-    // Advance fields in spectral space
-    solver.pushSpectralFields();
-
-    // Perform backward Fourier Transform
-    solver.BackwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex);
-    solver.BackwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey);
-    solver.BackwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez);
-    solver.BackwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx);
-    solver.BackwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By);
-    solver.BackwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz);
+    // Update the fields on the fine and coarse patch
+    PushPSATDSinglePatch( *spectral_solver_fp[lev],
+        Efield_fp[lev], Bfield_fp[lev], current_fp[lev], rho_fp[lev] );
+    if (spectral_solver_cp[lev]) {
+        PushPSATDSinglePatch( *spectral_solver_cp[lev],
+             Efield_cp[lev], Bfield_cp[lev], current_cp[lev], rho_cp[lev] );
+    }
 }
-
 #endif
 
 void
@@ -560,3 +581,143 @@ WarpX::EvolveF (int lev, PatchType patch_type, Real a_dt, DtType a_dt_type)
     }
 }
 
+#ifdef WARPX_DIM_RZ
+// This scales the current by the inverse volume and wraps around the depostion at negative radius.
+// It is faster to apply this on the grid than to do it particle by particle.
+// It is put here since there isn't another nice place for it.
+void
+WarpX::ApplyInverseVolumeScalingToCurrentDensity (MultiFab* Jx, MultiFab* Jy, MultiFab* Jz, int lev)
+{
+    const long ngJ = Jx->nGrow();
+    const std::array<Real,3>& dx = WarpX::CellSize(lev);
+    const Real dr = dx[0];
+
+    Box tilebox;
+
+    for ( MFIter mfi(*Jx, TilingIfNotGPU()); mfi.isValid(); ++mfi )
+    {
+
+        Array4<Real> const& Jr_arr = Jx->array(mfi);
+        Array4<Real> const& Jt_arr = Jy->array(mfi);
+        Array4<Real> const& Jz_arr = Jz->array(mfi);
+
+        tilebox = mfi.tilebox();
+        Box tbr = convert(tilebox, WarpX::jx_nodal_flag);
+        Box tbt = convert(tilebox, WarpX::jy_nodal_flag);
+        Box tbz = convert(tilebox, WarpX::jz_nodal_flag);
+
+        // Lower corner of tile box physical domain
+        // Note that this is done before the tilebox.grow so that
+        // these do not include the guard cells.
+        const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, lev);
+        const Dim3 lo = lbound(tilebox);
+        const Real rmin = xyzmin[0];
+        const int irmin = lo.x;
+
+        // Rescale current in r-z mode since the inverse volume factor was not
+        // included in the current deposition.
+        amrex::ParallelFor(tbr,
+        [=] AMREX_GPU_DEVICE (int i, int j, int k)
+        {
+            // Wrap the current density deposited in the guard cells around
+            // to the cells above the axis.
+            // Note that Jr(i==0) is at 1/2 dr.
+            if (rmin == 0. && 0 <= i && i < ngJ) {
+                Jr_arr(i,j,0) -= Jr_arr(-1-i,j,0);
+            }
+            // Apply the inverse volume scaling
+            // Since Jr is not cell centered in r, no need for distinction
+            // between on axis and off-axis factors
+            const amrex::Real r = std::abs(rmin + (i - irmin + 0.5)*dr);
+            Jr_arr(i,j,0) /= (2.*MathConst::pi*r);
+        });
+        amrex::ParallelFor(tbt,
+        [=] AMREX_GPU_DEVICE (int i, int j, int k)
+        {
+            // Wrap the current density deposited in the guard cells around
+            // to the cells above the axis.
+            // Jt is located on the boundary
+            if (rmin == 0. && 0 < i && i <= ngJ) {
+                Jt_arr(i,j,0) += Jt_arr(-i,j,0);
+            }
+
+            // Apply the inverse volume scaling
+            // Jt is forced to zero on axis.
+            const amrex::Real r = std::abs(rmin + (i - irmin)*dr);
+            if (r == 0.) {
+                Jt_arr(i,j,0) = 0.;
+            } else {
+                Jt_arr(i,j,0) /= (2.*MathConst::pi*r);
+            }
+        });
+        amrex::ParallelFor(tbz,
+        [=] AMREX_GPU_DEVICE (int i, int j, int k)
+        {
+            // Wrap the current density deposited in the guard cells around
+            // to the cells above the axis.
+            // Jz is located on the boundary
+            if (rmin == 0. && 0 < i && i <= ngJ) {
+                Jz_arr(i,j,0) += Jz_arr(-i,j,0);
+            }
+
+            // Apply the inverse volume scaling
+            const amrex::Real r = std::abs(rmin + (i - irmin)*dr);
+            if (r == 0.) {
+                // Verboncoeur JCP 164, 421-427 (2001) : corrected volume on axis
+                Jz_arr(i,j,0) /= (MathConst::pi*dr/3.);
+            } else {
+                Jz_arr(i,j,0) /= (2.*MathConst::pi*r);
+            }
+        });
+    }
+}
+
+void
+WarpX::ApplyInverseVolumeScalingToChargeDensity (MultiFab* Rho, int lev)
+{
+    const long ngRho = Rho->nGrow();
+    const std::array<Real,3>& dx = WarpX::CellSize(lev);
+    const Real dr = dx[0];
+
+    Box tilebox;
+
+    for ( MFIter mfi(*Rho, TilingIfNotGPU()); mfi.isValid(); ++mfi )
+    {
+
+        Array4<Real> const& Rho_arr = Rho->array(mfi);
+
+        tilebox = mfi.tilebox();
+        Box tb = convert(tilebox, IntVect::TheUnitVector());
+
+        // Lower corner of tile box physical domain
+        // Note that this is done before the tilebox.grow so that
+        // these do not include the guard cells.
+        const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, lev);
+        const Dim3 lo = lbound(tilebox);
+        const Real rmin = xyzmin[0];
+        const int irmin = lo.x;
+
+        // Rescale charge in r-z mode since the inverse volume factor was not
+        // included in the charge deposition.
+        amrex::ParallelFor(tb, Rho->nComp(),
+        [=] AMREX_GPU_DEVICE (int i, int j, int k, int icomp)
+        {
+            // Wrap the charge density deposited in the guard cells around
+            // to the cells above the axis.
+            // Rho is located on the boundary
+            if (rmin == 0. && 0 < i && i <= ngRho) {
+                Rho_arr(i,j,0,icomp) += Rho_arr(-i,j,0,icomp);
+            }
+
+            // Apply the inverse volume scaling
+            const amrex::Real r = std::abs(rmin + (i - irmin)*dr);
+            if (r == 0.) {
+                // Verboncoeur JCP 164, 421-427 (2001) : corrected volume on axis
+                Rho_arr(i,j,0,icomp) /= (MathConst::pi*dr/3.);
+            } else {
+                Rho_arr(i,j,0,icomp) /= (2.*MathConst::pi*r);
+            }
+        });
+    }
+}
+#endif
diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H
index 0440148eb..aac23f781 100644
--- a/Source/FortranInterface/WarpX_f.H
+++ b/Source/FortranInterface/WarpX_f.H
@@ -62,7 +62,7 @@
 #define WRPX_PUSH_LEAPFROG               warpx_push_leapfrog_2d
 #define WRPX_PUSH_LEAPFROG_POSITIONS     warpx_push_leapfrog_positions_2d
 
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
 #define WRPX_COMPUTE_DIVE                warpx_compute_dive_rz
 #else
 #define WRPX_COMPUTE_DIVE                warpx_compute_dive_2d
@@ -75,22 +75,6 @@ extern "C"
 {
 #endif
 
-	// Charge deposition
-	void warpx_charge_deposition(amrex::Real* rho,
-            const long* np,	const amrex::Real* xp, const amrex::Real* yp, const amrex::Real* zp,	const amrex::Real* w,
-			const amrex::Real* q, const amrex::Real* xmin, const amrex::Real* ymin, const amrex::Real* zmin,
-			const amrex::Real* dx, const amrex::Real* dy, const amrex::Real* dz,
-			const long* nx, const long* ny, const long* nz,
-			const long* nxguard, const long* nyguard, const long* nzguard,
-			const long* nox, const long* noy,const long* noz,
-			const long* lvect, const long* charge_depo_algo);
-
-        // Charge deposition finalize for RZ
-        void warpx_charge_deposition_rz_volume_scaling(
-			amrex::Real* rho, const long* rho_ng, const int* rho_ntot,
-			const amrex::Real* rmin,
-			const amrex::Real* dr);
-
 	// Current deposition
 	void warpx_current_deposition(
 			amrex::Real* jx, const long* jx_ng, const int* jx_ntot,
@@ -106,34 +90,6 @@ extern "C"
 			const long* nox, const long* noy,const long* noz,
                         const int* l_nodal, const long* lvect, const long* current_depo_algo);
 
-        // Current deposition finalize for RZ
-        void warpx_current_deposition_rz_volume_scaling(
-			amrex::Real* jx, const long* jx_ng, const int* jx_ntot,
-			amrex::Real* jy, const long* jy_ng, const int* jy_ntot,
-			amrex::Real* jz, const long* jz_ng, const int* jz_ntot,
-			const amrex::Real* rmin,
-			const amrex::Real* dr);
-
-	// Field gathering
-
-	void warpx_geteb_energy_conserving(const long* np,
-			const amrex::Real* xp, const amrex::Real* yp, const amrex::Real* zp,
-			amrex::Real* exp, amrex::Real* eyp, amrex::Real* ezp,
-                        amrex::Real* bxp, amrex::Real* byp, amrex::Real* bzp,
-                        const int* ixyzmin,
-                        const amrex::Real* xmin, const amrex::Real* ymin, const amrex::Real* zmin,
-                        const amrex::Real* dx, const amrex::Real* dy, const amrex::Real* dz,
-			const long* nox, const long* noy, const long* noz,
-			const amrex::Real* exg, const int* exg_lo, const int* exg_hi,
-			const amrex::Real* eyg, const int* eyg_lo, const int* eyg_hi,
-			const amrex::Real* ezg, const int* ezg_lo, const int* ezg_hi,
-			const amrex::Real* bxg, const int* bxg_lo, const int* bxg_hi,
-			const amrex::Real* byg, const int* byg_lo, const int* byg_hi,
-			const amrex::Real* bzg, const int* bzg_lo, const int* bzg_hi,
-			const int* ll4symtry, const int* l_lower_order_in_v,
-                        const int* l_nodal, const long* lvect,
-			const long* field_gathe_algo);
-
 	// Particle pusher (velocity and position)
 
 	void warpx_particle_pusher(const long* np,
@@ -342,7 +298,7 @@ extern "C"
                              const BL_FORT_FAB_ARG_ANYD(ey),
                              const BL_FORT_FAB_ARG_ANYD(ez),
                              const amrex::Real* dx
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
                              ,const amrex::Real* rmin
 #endif
                              );
diff --git a/Source/FortranInterface/WarpX_picsar.F90 b/Source/FortranInterface/WarpX_picsar.F90
index dc47245dd..34084d753 100644
--- a/Source/FortranInterface/WarpX_picsar.F90
+++ b/Source/FortranInterface/WarpX_picsar.F90
@@ -1,20 +1,15 @@
 #if (AMREX_SPACEDIM == 3)
 
-#define WRPX_PXR_GETEB_ENERGY_CONSERVING  geteb3d_energy_conserving_generic
 #define WRPX_PXR_CURRENT_DEPOSITION       depose_jxjyjz_generic
 
 #elif (AMREX_SPACEDIM == 2)
 
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
 
-#define WRPX_PXR_GETEB_ENERGY_CONSERVING  geteb2drz_energy_conserving_generic
 #define WRPX_PXR_CURRENT_DEPOSITION       depose_jrjtjz_generic_rz
-#define WRPX_PXR_RZ_VOLUME_SCALING_RHO    apply_rz_volume_scaling_rho
-#define WRPX_PXR_RZ_VOLUME_SCALING_J      apply_rz_volume_scaling_j
 
 #else
 
-#define WRPX_PXR_GETEB_ENERGY_CONSERVING  geteb2dxz_energy_conserving_generic
 #define WRPX_PXR_CURRENT_DEPOSITION       depose_jxjyjz_generic_2d
 
 #endif
@@ -56,227 +51,6 @@ contains
   ! _________________________________________________________________
   !>
   !> @brief
-  !> Main subroutine for the field gathering process
-  !>
-  !> @param[in] np number of particles
-  !> @param[in] xp,yp,zp particle position arrays
-  !> @param[in] ex,ey,ez particle electric fields in each direction
-  !> @param[in] bx,by,bz particle magnetic fields in each direction
-  !> @param[in] ixyzmin tile grid minimum index
-  !> @param[in] xmin,ymin,zmin tile grid minimum position
-  !> @param[in] dx,dy,dz space discretization steps
-  !> @param[in] xyzmin grid minimum position
-  !> @param[in] dxyz space discretization steps
-  !> @param[in] nox,noy,noz interpolation order
-  !> @param[in] exg,eyg,ezg electric field grid arrays
-  !> @param[in] bxg,byg,bzg electric field grid arrays
-  !> @param[in] lvect vector length
-  !>
-  subroutine warpx_geteb_energy_conserving(np,xp,yp,zp, &
-       ex,ey,ez,bx,by,bz,ixyzmin,xmin,ymin,zmin,dx,dy,dz,nox,noy,noz, &
-       exg,exg_lo,exg_hi,eyg,eyg_lo,eyg_hi,ezg,ezg_lo,ezg_hi, &
-       bxg,bxg_lo,bxg_hi,byg,byg_lo,byg_hi,bzg,bzg_lo,bzg_hi, &
-       ll4symtry,l_lower_order_in_v, l_nodal,&
-       lvect,field_gathe_algo) &
-       bind(C, name="warpx_geteb_energy_conserving")
-
-    integer, intent(in) :: exg_lo(AMREX_SPACEDIM), eyg_lo(AMREX_SPACEDIM), ezg_lo(AMREX_SPACEDIM), &
-                           bxg_lo(AMREX_SPACEDIM), byg_lo(AMREX_SPACEDIM), bzg_lo(AMREX_SPACEDIM)
-    integer, intent(in) :: exg_hi(AMREX_SPACEDIM), eyg_hi(AMREX_SPACEDIM), ezg_hi(AMREX_SPACEDIM), &
-                           bxg_hi(AMREX_SPACEDIM), byg_hi(AMREX_SPACEDIM), bzg_hi(AMREX_SPACEDIM)
-    integer, intent(in) :: ixyzmin(AMREX_SPACEDIM)
-    real(amrex_real), intent(in) :: xmin,ymin,zmin,dx,dy,dz
-    integer(c_long), intent(in) :: field_gathe_algo
-    integer(c_long), intent(in) :: np,nox,noy,noz
-    integer(c_int), intent(in)  :: ll4symtry,l_lower_order_in_v, l_nodal
-    integer(c_long),intent(in)   :: lvect
-    real(amrex_real), intent(in), dimension(np) :: xp,yp,zp
-    real(amrex_real), intent(out), dimension(np) :: ex,ey,ez,bx,by,bz
-    real(amrex_real),intent(in):: exg(*), eyg(*), ezg(*), bxg(*), byg(*), bzg(*)
-    logical(pxr_logical) :: pxr_ll4symtry, pxr_l_lower_order_in_v, pxr_l_nodal
-
-    ! Compute the number of valid cells and guard cells
-    integer(c_long) :: exg_nvalid(AMREX_SPACEDIM), eyg_nvalid(AMREX_SPACEDIM), ezg_nvalid(AMREX_SPACEDIM),    &
-                       bxg_nvalid(AMREX_SPACEDIM), byg_nvalid(AMREX_SPACEDIM), bzg_nvalid(AMREX_SPACEDIM),    &
-                       exg_nguards(AMREX_SPACEDIM), eyg_nguards(AMREX_SPACEDIM), ezg_nguards(AMREX_SPACEDIM), &
-                       bxg_nguards(AMREX_SPACEDIM), byg_nguards(AMREX_SPACEDIM), bzg_nguards(AMREX_SPACEDIM)
-
-    pxr_ll4symtry = ll4symtry .eq. 1
-    pxr_l_lower_order_in_v = l_lower_order_in_v .eq. 1
-    pxr_l_nodal = l_nodal .eq. 1
-
-    exg_nguards = ixyzmin - exg_lo
-    eyg_nguards = ixyzmin - eyg_lo
-    ezg_nguards = ixyzmin - ezg_lo
-    bxg_nguards = ixyzmin - bxg_lo
-    byg_nguards = ixyzmin - byg_lo
-    bzg_nguards = ixyzmin - bzg_lo
-    exg_nvalid = exg_lo + exg_hi - 2_c_long*ixyzmin + 1_c_long
-    eyg_nvalid = eyg_lo + eyg_hi - 2_c_long*ixyzmin + 1_c_long
-    ezg_nvalid = ezg_lo + ezg_hi - 2_c_long*ixyzmin + 1_c_long
-    bxg_nvalid = bxg_lo + bxg_hi - 2_c_long*ixyzmin + 1_c_long
-    byg_nvalid = byg_lo + byg_hi - 2_c_long*ixyzmin + 1_c_long
-    bzg_nvalid = bzg_lo + bzg_hi - 2_c_long*ixyzmin + 1_c_long
-
-    CALL WRPX_PXR_GETEB_ENERGY_CONSERVING(np,xp,yp,zp, &
-         ex,ey,ez,bx,by,bz,xmin,ymin,zmin,dx,dy,dz,nox,noy,noz, &
-         exg,exg_nguards,exg_nvalid,&
-         eyg,eyg_nguards,eyg_nvalid,&
-         ezg,ezg_nguards,ezg_nvalid,&
-         bxg,bxg_nguards,bxg_nvalid,&
-         byg,byg_nguards,byg_nvalid,&
-         bzg,bzg_nguards,bzg_nvalid,&
-	 pxr_ll4symtry, pxr_l_lower_order_in_v, pxr_l_nodal, &
-	 lvect, field_gathe_algo )
-
-  end subroutine warpx_geteb_energy_conserving
-
-! _________________________________________________________________
-!>
-!> @brief
-!> Main subroutine for the charge deposition
-!>
-!> @details
-!> This subroutines enable to controle the interpolation order
-!> via the parameters nox,noy,noz and the type of algorithm via
-!> the parameter charge_depo_algo
-!
-!> @param[inout] rho charge array
-!> @param[in] np number of particles
-!> @param[in] xp,yp,zp particle position arrays
-!> @param[in] w particle weight arrays
-!> @param[in] q particle species charge
-!> @param[in] xmin,ymin,zmin tile grid minimum position
-!> @param[in] dx,dy,dz space discretization steps
-!> @param[in] nx,ny,nz number of cells
-!> @param[in] nxguard,nyguard,nzguard number of guard cells
-!> @param[in] nox,noy,noz interpolation order
-!> @param[in] lvect vector length
-!> @param[in] charge_depo_algo algorithm choice for the charge deposition
-!>
-subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
-   nxguard,nyguard,nzguard,nox,noy,noz,lvect,charge_depo_algo) &
-  bind(C, name="warpx_charge_deposition")
-
-  integer(c_long), intent(IN)                   :: np
-  integer(c_long), intent(IN)                   :: nx,ny,nz
-  integer(c_long), intent(IN)                   :: nxguard,nyguard,nzguard
-  integer(c_long), intent(IN)                   :: nox,noy,noz
-  real(amrex_real), intent(IN OUT)              :: rho(*)
-  real(amrex_real), intent(IN)                  :: q
-  real(amrex_real), intent(IN)                  :: dx,dy,dz
-  real(amrex_real), intent(IN)                  :: xmin,ymin,zmin
-  real(amrex_real), intent(IN),  dimension(np)  :: xp,yp,zp,w
-  integer(c_long), intent(IN)                   :: lvect
-  integer(c_long), intent(IN)                   :: charge_depo_algo
-
-
-  ! Dimension 3
-#if (AMREX_SPACEDIM==3)
-
-  SELECT CASE(charge_depo_algo)
-
-  ! Scalar classical charge deposition subroutines
-  CASE(1)
-    IF ((nox.eq.1).and.(noy.eq.1).and.(noz.eq.1)) THEN
-
-      CALL depose_rho_scalar_1_1_1(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
-    nxguard,nyguard,nzguard,lvect)
-
-    ELSE IF ((nox.eq.2).and.(noy.eq.2).and.(noz.eq.2)) THEN
-
-      CALL depose_rho_scalar_2_2_2(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
-    nxguard,nyguard,nzguard,lvect)
-
-    ELSE IF ((nox.eq.3).and.(noy.eq.3).and.(noz.eq.3)) THEN
-
-      CALL depose_rho_scalar_3_3_3(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
-    nxguard,nyguard,nzguard,lvect)
-
-    ELSE
-      CALL pxr_depose_rho_n(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
-                  nxguard,nyguard,nzguard,nox,noy,noz, &
-                  .TRUE._c_long,.FALSE._c_long)
-    ENDIF
-
-  ! Optimized subroutines
-  CASE DEFAULT
-
-    IF ((nox.eq.1).and.(noy.eq.1).and.(noz.eq.1)) THEN
-      CALL depose_rho_vecHVv2_1_1_1(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
-               nxguard,nyguard,nzguard,lvect)
-
-    ELSE IF ((nox.eq.2).and.(noy.eq.2).and.(noz.eq.2)) THEN
-      CALL depose_rho_vecHVv2_2_2_2(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
-                 nxguard,nyguard,nzguard,lvect)
-
-    ELSE
-      CALL pxr_depose_rho_n(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,&
-                  nxguard,nyguard,nzguard,nox,noy,noz, &
-                  .TRUE._c_long,.FALSE._c_long)
-    ENDIF
-  END SELECT
-
-  ! Dimension 2
-#elif (AMREX_SPACEDIM==2)
-
-#ifdef WARPX_RZ
-  logical(pxr_logical) :: l_2drz = .TRUE._c_long
-#else
-  logical(pxr_logical) :: l_2drz = .FALSE._c_long
-#endif
-
-  CALL pxr_depose_rho_n_2dxz(rho,np,xp,yp,zp,w,q,xmin,zmin,dx,dz,nx,nz,&
-       nxguard,nzguard,nox,noz, &
-       .TRUE._c_long, .FALSE._c_long, l_2drz, 0_c_long)
-
-#endif
-
- end subroutine warpx_charge_deposition
-
-  ! _________________________________________________________________
-  !>
-  !> @brief
-  !> Applies the inverse volume scaling for RZ charge deposition
-  !>
-  !> @details
-  !> The scaling is done for both single mode (FDTD) and
-  !> multi mode (spectral) (todo)
-  !
-  !> @param[inout] rho charge array
-  !> @param[in] rmin tile grid minimum radius
-  !> @param[in] dr radial space discretization steps
-  !> @param[in] nx,ny,nz number of cells
-  !> @param[in] nxguard,nyguard,nzguard number of guard cells
-  !>
-  subroutine warpx_charge_deposition_rz_volume_scaling(rho,rho_ng,rho_ntot,rmin,dr) &
-    bind(C, name="warpx_charge_deposition_rz_volume_scaling")
-
-    integer, intent(in) :: rho_ntot(AMREX_SPACEDIM)
-    integer(c_long), intent(in) :: rho_ng
-    real(amrex_real), intent(IN OUT):: rho(*)
-    real(amrex_real), intent(IN) :: rmin, dr
-
-#ifdef WARPX_RZ
-    integer(c_long) :: type_rz_depose = 1
-#endif
-
-    ! Compute the number of valid cells and guard cells
-    integer(c_long) :: rho_nvalid(AMREX_SPACEDIM), rho_nguards(AMREX_SPACEDIM)
-    rho_nvalid = rho_ntot - 2*rho_ng
-    rho_nguards = rho_ng
-
-#ifdef WARPX_RZ
-    CALL WRPX_PXR_RZ_VOLUME_SCALING_RHO(   &
-                 rho,rho_nguards,rho_nvalid, &
-                 rmin,dr,type_rz_depose)
-#endif
-
-  end subroutine warpx_charge_deposition_rz_volume_scaling
-
-  ! _________________________________________________________________
-  !>
-  !> @brief
   !> Main subroutine for the current deposition
   !>
   !> @details
@@ -355,165 +129,4 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n
 
   end subroutine warpx_current_deposition
 
-  ! _________________________________________________________________
-  !>
-  !> @brief
-  !> Applies the inverse volume scaling for RZ current deposition
-  !>
-  !> @details
-  !> The scaling is done for single mode only
-  !
-  !> @param[inout] jx,jy,jz current arrays
-  !> @param[in] jx_ntot,jy_ntot,jz_ntot vectors with total number of
-  !>            cells (including guard cells) along each axis for each current
-  !> @param[in] jx_ng,jy_ng,jz_ng vectors with number of guard cells along each
-  !>            axis for each current
-  !> @param[in] rmin tile grid minimum radius
-  !> @param[in] dr radial space discretization steps
-  !>
-  subroutine warpx_current_deposition_rz_volume_scaling( &
-    jx,jx_ng,jx_ntot,jy,jy_ng,jy_ntot,jz,jz_ng,jz_ntot, &
-    rmin,dr) &
-    bind(C, name="warpx_current_deposition_rz_volume_scaling")
-
-    integer, intent(in) :: jx_ntot(AMREX_SPACEDIM), jy_ntot(AMREX_SPACEDIM), jz_ntot(AMREX_SPACEDIM)
-    integer(c_long), intent(in) :: jx_ng, jy_ng, jz_ng
-    real(amrex_real), intent(IN OUT):: jx(*), jy(*), jz(*)
-    real(amrex_real), intent(IN) :: rmin, dr
-
-#ifdef WARPX_RZ
-    integer(c_long) :: type_rz_depose = 1
-#endif
-    ! Compute the number of valid cells and guard cells
-    integer(c_long) :: jx_nvalid(AMREX_SPACEDIM), jy_nvalid(AMREX_SPACEDIM), jz_nvalid(AMREX_SPACEDIM), &
-                       jx_nguards(AMREX_SPACEDIM), jy_nguards(AMREX_SPACEDIM), jz_nguards(AMREX_SPACEDIM)
-    jx_nvalid = jx_ntot - 2*jx_ng
-    jy_nvalid = jy_ntot - 2*jy_ng
-    jz_nvalid = jz_ntot - 2*jz_ng
-    jx_nguards = jx_ng
-    jy_nguards = jy_ng
-    jz_nguards = jz_ng
-
-#ifdef WARPX_RZ
-    CALL WRPX_PXR_RZ_VOLUME_SCALING_J(   &
-                 jx,jx_nguards,jx_nvalid, &
-                 jy,jy_nguards,jy_nvalid, &
-                 jz,jz_nguards,jz_nvalid, &
-                 rmin,dr,type_rz_depose)
-#endif
-
-  end subroutine warpx_current_deposition_rz_volume_scaling
-
-  ! _________________________________________________________________
-  !>
-  !> @brief
-  !> Main subroutine for the particle pusher (velocity and position)
-  !>
-  !> @param[in] np number of super-particles
-  !> @param[in] xp,yp,zp particle position arrays
-  !> @param[in] uxp,uyp,uzp normalized momentum in each direction
-  !> @param[in] gaminv particle Lorentz factors
-  !> @param[in] ex,ey,ez particle electric fields in each direction
-  !> @param[in] bx,by,bz particle magnetic fields in each direction
-  !> @param[in] q charge
-  !> @param[in] m masse
-  !> @param[in] dt time step
-  !> @param[in] particle_pusher_algo Particle pusher algorithm
-  subroutine warpx_particle_pusher(np,xp,yp,zp,uxp,uyp,uzp, &
-                                  gaminv,&
-                                  ex,ey,ez,bx,by,bz,q,m,dt, &
-                                  particle_pusher_algo) &
-       bind(C, name="warpx_particle_pusher")
-
-    INTEGER(c_long), INTENT(IN)   :: np
-    REAL(amrex_real),INTENT(INOUT)    :: gaminv(np)
-    REAL(amrex_real),INTENT(INOUT)    :: xp(np),yp(np),zp(np)
-    REAL(amrex_real),INTENT(INOUT)    :: uxp(np),uyp(np),uzp(np)
-    REAL(amrex_real),INTENT(IN)       :: ex(np),ey(np),ez(np)
-    REAL(amrex_real),INTENT(IN)       :: bx(np),by(np),bz(np)
-    REAL(amrex_real),INTENT(IN)       :: q,m,dt
-    INTEGER(c_long), INTENT(IN)   :: particle_pusher_algo
-
-    SELECT CASE (particle_pusher_algo)
-
-    !! Vay pusher -- Full push
-    CASE (1_c_long)
-      CALL pxr_set_gamma(np,uxp,uyp,uzp,gaminv)
-
-      CALL pxr_ebcancelpush3d(np,uxp,uyp,uzp,gaminv, &
-                                 ex,ey,ez,  &
-                                 bx,by,bz,q,m,dt,0_c_long)
-    CASE DEFAULT
-
-      ! Momentum pusher in a single loop
-      CALL pxr_boris_push_u_3d(np,uxp,uyp,uzp,&
-                                     gaminv, &
-                                     ex,ey,ez, &
-                                     bx,by,bz, &
-                                     q,m,dt)
-
-    END SELECT
-
-    !!!! --- push particle species positions a time step
-#if (AMREX_SPACEDIM == 3) || (defined WARPX_RZ)
-    CALL pxr_pushxyz(np,xp,yp,zp,uxp,uyp,uzp,gaminv,dt)
-#elif (AMREX_SPACEDIM == 2)
-    CALL pxr_pushxz(np,xp,zp,uxp,uzp,gaminv,dt)
-#endif
-
-  end subroutine warpx_particle_pusher
-
-
-  ! _________________________________________________________________
-  !>
-  !> @brief
-  !> Main subroutine for the particle pusher (velocity)
-  !>
-  !> @param[in] np number of super-particles
-  !> @param[in] xp,yp,zp particle position arrays
-  !> @param[in] uxp,uyp,uzp normalized momentum in each direction
-  !> @param[in] gaminv particle Lorentz factors
-  !> @param[in] ex,ey,ez particle electric fields in each direction
-  !> @param[in] bx,by,bz particle magnetic fields in each direction
-  !> @param[in] q charge
-  !> @param[in] m masse
-  !> @param[in] dt time step
-  !> @param[in] particle_pusher_algo Particle pusher algorithm
-  subroutine warpx_particle_pusher_momenta(np,xp,yp,zp,uxp,uyp,uzp, &
-                                  gaminv,&
-                                  ex,ey,ez,bx,by,bz,q,m,dt, &
-                                  particle_pusher_algo) &
-       bind(C, name="warpx_particle_pusher_momenta")
-
-    INTEGER(c_long), INTENT(IN)   :: np
-    REAL(amrex_real),INTENT(INOUT)    :: gaminv(np)
-    REAL(amrex_real),INTENT(IN)       :: xp(np),yp(np),zp(np)
-    REAL(amrex_real),INTENT(INOUT)    :: uxp(np),uyp(np),uzp(np)
-    REAL(amrex_real),INTENT(IN)       :: ex(np),ey(np),ez(np)
-    REAL(amrex_real),INTENT(IN)       :: bx(np),by(np),bz(np)
-    REAL(amrex_real),INTENT(IN)       :: q,m,dt
-    INTEGER(c_long), INTENT(IN)   :: particle_pusher_algo
-
-    SELECT CASE (particle_pusher_algo)
-
-    !! Vay pusher -- Full push
-    CASE (1_c_long)
-      CALL pxr_set_gamma(np,uxp,uyp,uzp,gaminv)
-
-      CALL pxr_ebcancelpush3d(np,uxp,uyp,uzp,gaminv, &
-                                 ex,ey,ez,  &
-                                 bx,by,bz,q,m,dt,0_c_long)
-    CASE DEFAULT
-
-      ! Momentum pusher in a single loop
-      CALL pxr_boris_push_u_3d(np,uxp,uyp,uzp,&
-                                     gaminv, &
-                                     ex,ey,ez, &
-                                     bx,by,bz, &
-                                     q,m,dt)
-
-    END SELECT
-
-  end subroutine warpx_particle_pusher_momenta
-
 end module warpx_to_pxr_module
diff --git a/Source/Initialization/CustomDensityProb.H b/Source/Initialization/CustomDensityProb.H
new file mode 100644
index 000000000..b00830e6c
--- /dev/null
+++ b/Source/Initialization/CustomDensityProb.H
@@ -0,0 +1,49 @@
+#ifndef CUSTOM_DENSITY_PROB_H_
+#define CUSTOM_DENSITY_PROB_H_
+
+#include <AMReX_ParmParse.H>
+#include <AMReX_Arena.H>
+#include <AMReX_Gpu.H>
+#include <AMReX_Dim3.H>
+
+// An example of Custom Density Profile
+
+// struct whose getDensity returns density at a given position computed from
+// a custom function, with runtime input parameters.
+struct InjectorDensityCustom
+{
+    InjectorDensityCustom (std::string const& species_name)
+        : p(nullptr)
+    {
+        // Read parameters for custom density profile from file, and
+        // store them in managed memory.
+        amrex::ParmParse pp(species_name);
+        std::vector<amrex::Real> v;
+        pp.getarr("custom_profile_params", v);
+        p = static_cast<amrex::Real*>
+            (amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real)*v.size()));
+        for (int i = 0; i < static_cast<int>(v.size()); ++i) {
+            p[i] = v[i];
+        }
+    }
+
+    // Return density at given position, using user-defined parameters 
+    // stored in p.
+    AMREX_GPU_HOST_DEVICE
+    amrex::Real
+    getDensity (amrex::Real, amrex::Real, amrex::Real) const noexcept
+    {
+        return p[0];
+    }
+
+    // Note that we are not allowed to have non-trivial destructor.
+    // So we rely on clear() to free memory.
+    void clear () {
+        amrex::The_Managed_Arena()->free(p);
+    }
+
+private:
+    amrex::Real* p;
+};
+
+#endif
diff --git a/Source/Initialization/CustomDensityProb.cpp b/Source/Initialization/CustomDensityProb.cpp
deleted file mode 100644
index 3efcb13c5..000000000
--- a/Source/Initialization/CustomDensityProb.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-#include <PlasmaInjector.H>
-
-#include <iostream>
-
-using namespace amrex;
-
-///
-/// This "custom" density profile just does constant
-///
-Real CustomDensityProfile::getDensity(Real x, Real y, Real z) const {
-  return params[0];
-}
diff --git a/Source/Initialization/CustomMomentumProb.H b/Source/Initialization/CustomMomentumProb.H
new file mode 100644
index 000000000..f8bc29a05
--- /dev/null
+++ b/Source/Initialization/CustomMomentumProb.H
@@ -0,0 +1,30 @@
+#ifndef CUSTOM_MOMENTUM_PROB_H
+#define CUSTOM_MOMENTUM_PROB_H
+
+#include <AMReX_ParmParse.H>
+#include <AMReX_Gpu.H>
+#include <AMReX_Arena.H>
+#include <AMReX_Dim3.H>
+
+// An example of Custom Momentum Profile
+
+// struct whose getDensity returns momentum at a given position computed from
+// a custom function.
+struct InjectorMomentumCustom
+{
+    InjectorMomentumCustom (std::string const& /*a_species_name*/) {}
+
+    // Return momentum at given position (illustration: momentum=0).
+    AMREX_GPU_HOST_DEVICE
+    amrex::XDim3
+    getMomentum (amrex::Real, amrex::Real, amrex::Real) const noexcept
+    {
+        return {0., 0., 0.};
+    }
+
+    // Note that we are not allowed to have non-trivial destructor.
+    // So we rely on clear() to free memory if needed.
+    void clear () { }
+};
+
+#endif
diff --git a/Source/Initialization/CustomMomentumProb.cpp b/Source/Initialization/CustomMomentumProb.cpp
deleted file mode 100644
index fa21252d0..000000000
--- a/Source/Initialization/CustomMomentumProb.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <PlasmaInjector.H>
-
-#include <iostream>
-
-using namespace amrex;
-
-///
-/// This "custom" momentum distribution just does 0 momentum
-///
-void CustomMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) {
-  u[0] = 0;
-  u[1] = 0;
-  u[2] = 0;
-}
diff --git a/Source/Initialization/InjectorDensity.H b/Source/Initialization/InjectorDensity.H
new file mode 100644
index 000000000..b7f5c26eb
--- /dev/null
+++ b/Source/Initialization/InjectorDensity.H
@@ -0,0 +1,202 @@
+#ifndef INJECTOR_DENSITY_H_
+#define INJECTOR_DENSITY_H_
+
+#include <AMReX_Gpu.H>
+#include <AMReX_Dim3.H>
+#include <GpuParser.H>
+#include <CustomDensityProb.H>
+#include <WarpXConst.H>
+
+// struct whose getDensity returns constant density.
+struct InjectorDensityConstant
+{
+    InjectorDensityConstant (amrex::Real a_rho) noexcept : m_rho(a_rho) {}
+
+    AMREX_GPU_HOST_DEVICE
+    amrex::Real
+    getDensity (amrex::Real, amrex::Real, amrex::Real) const noexcept
+    {
+        return m_rho;
+    }
+
+private:
+    amrex::Real m_rho;
+};
+
+// struct whose getDensity returns local density computed from parser.
+struct InjectorDensityParser
+{
+    InjectorDensityParser (WarpXParser const& a_parser) noexcept
+        : m_parser(a_parser) {}
+
+    AMREX_GPU_HOST_DEVICE
+    amrex::Real
+    getDensity (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+    {
+        return m_parser(x,y,z);
+    }
+
+    // InjectorDensityParser constructs this GpuParser from WarpXParser.
+    GpuParser m_parser;
+};
+
+// struct whose getDensity returns local density computed from predefined profile.
+struct InjectorDensityPredefined
+{
+    InjectorDensityPredefined (std::string const& a_species_name) noexcept;
+
+    void clear ();
+
+    AMREX_GPU_HOST_DEVICE
+    amrex::Real
+    getDensity (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+    {
+        // Choices for profile are:
+        // - parabolic_channel
+        switch (profile)
+        {
+        case Profile::parabolic_channel:
+        {
+            amrex::Real z_start   = p[0];
+            amrex::Real ramp_up   = p[1];
+            amrex::Real plateau   = p[2];
+            amrex::Real ramp_down = p[3];
+            amrex::Real rc        = p[4];
+            amrex::Real n0        = p[5];
+            amrex::Real n;
+            amrex::Real kp = PhysConst::q_e/PhysConst::c
+                *std::sqrt( n0/(PhysConst::m_e*PhysConst::ep0) );
+
+            if        ((z-z_start)>=0               and
+                       (z-z_start)<ramp_up ) {
+                n = (z-z_start)/ramp_up;
+            } else if ((z-z_start)>=ramp_up         and
+                       (z-z_start)< ramp_up+plateau ) {
+                n = 1.;
+            } else if ((z-z_start)>=ramp_up+plateau and
+                       (z-z_start)< ramp_up+plateau+ramp_down) {
+                n = 1.-((z-z_start)-ramp_up-plateau)/ramp_down;
+            } else {
+                n = 0.;
+            }
+            n *= n0*(1.+4.*(x*x+y*y)/(kp*kp*rc*rc*rc*rc));
+            return n;
+        }
+        default:
+            amrex::Abort("InjectorDensityPredefined: how did we get here?");
+            return 0.0;
+        }
+    }
+
+private:
+    enum struct Profile { null, parabolic_channel };
+    Profile profile;
+    amrex::Real* p;
+};
+
+// Base struct for density injector. 
+// InjectorDensity contains a union (called Object) that holds any one 
+// instance of: 
+// - InjectorDensityConstant  : to generate constant density;
+// - InjectorDensityParser    : to generate density from parser;
+// - InjectorDensityCustom    : to generate density from custom profile;
+// - InjectorDensityPredefined: to generate density from predefined profile;
+// The choice is made at runtime, depending in the constructor called.
+// This mimics virtual functions, except the struct is stored in managed memory
+// and member functions are made __host__ __device__ to run on CPU and GPU.
+// This struct inherits from amrex::Gpu::Managed to provide new and delete
+// operators in managed memory when running on GPU. Nothing special on CPU.
+struct InjectorDensity
+    : public amrex::Gpu::Managed
+{
+    // This constructor stores a InjectorDensityConstant in union object.
+    InjectorDensity (InjectorDensityConstant* t, amrex::Real a_rho)
+        : type(Type::constant),
+          object(t,a_rho)
+    { }
+
+    // This constructor stores a InjectorDensityParser in union object.
+    InjectorDensity (InjectorDensityParser* t, WarpXParser const& a_parser)
+        : type(Type::parser),
+          object(t,a_parser)
+    { }
+
+    // This constructor stores a InjectorDensityCustom in union object.
+    InjectorDensity (InjectorDensityCustom* t, std::string const& a_species_name)
+        : type(Type::custom),
+          object(t,a_species_name)
+    { }
+
+    // This constructor stores a InjectorDensityPredefined in union object.
+    InjectorDensity (InjectorDensityPredefined* t, std::string const& a_species_name)
+        : type(Type::predefined),
+          object(t,a_species_name)
+    { }
+
+    // Explicitly prevent the compiler from generating copy constructors
+    // and copy assignment operators.
+    InjectorDensity (InjectorDensity const&) = delete;
+    InjectorDensity (InjectorDensity&&) = delete;
+    void operator= (InjectorDensity const&) = delete;
+    void operator= (InjectorDensity &&) = delete;
+
+    ~InjectorDensity ();
+
+    std::size_t sharedMemoryNeeded () const noexcept;
+
+    // call getDensity from the object stored in the union
+    // (the union is called Object, and the instance is called object).
+    AMREX_GPU_HOST_DEVICE
+    amrex::Real
+    getDensity (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+    {
+        switch (type)
+        {
+        case Type::parser:
+        {
+            return object.parser.getDensity(x,y,z);
+        }
+        case Type::constant:
+        {
+            return object.constant.getDensity(x,y,z);
+        }
+        case Type::custom:
+        {
+            return object.custom.getDensity(x,y,z);
+        }
+        case Type::predefined:
+        {
+            return object.predefined.getDensity(x,y,z);
+        }
+        default:
+        {
+            amrex::Abort("InjectorDensity: unknown type");
+            return 0.0;
+        }
+        }
+    }
+
+private:
+    enum struct Type { constant, custom, predefined, parser };
+    Type type;
+
+    // An instance of union Object constructs and stores any one of
+    // the objects declared (constant or parser or custom or predefined).
+    union Object {
+        Object (InjectorDensityConstant*, amrex::Real a_rho) noexcept
+            : constant(a_rho) {}
+        Object (InjectorDensityParser*, WarpXParser const& a_parser) noexcept
+            : parser(a_parser) {}
+        Object (InjectorDensityCustom*, std::string const& a_species_name) noexcept
+            : custom(a_species_name) {}
+        Object (InjectorDensityPredefined*, std::string const& a_species_name) noexcept
+            : predefined(a_species_name) {}
+        InjectorDensityConstant   constant;
+        InjectorDensityParser     parser;
+        InjectorDensityCustom     custom;
+        InjectorDensityPredefined predefined;
+    };
+    Object object;
+};
+
+#endif
diff --git a/Source/Initialization/InjectorDensity.cpp b/Source/Initialization/InjectorDensity.cpp
new file mode 100644
index 000000000..54df4b14d
--- /dev/null
+++ b/Source/Initialization/InjectorDensity.cpp
@@ -0,0 +1,77 @@
+#include <PlasmaInjector.H>
+
+using namespace amrex;
+
+InjectorDensity::~InjectorDensity ()
+{
+    switch (type)
+    {
+    case Type::parser:
+    {
+        object.parser.m_parser.clear();
+        break;
+    }
+    case Type::custom:
+    {
+        object.custom.clear();
+        break;
+    }
+    case Type::predefined:
+    {
+        object.predefined.clear();
+        break;
+    }
+    }
+}
+
+// Compute the amount of memory needed in GPU Shared Memory.
+std::size_t
+InjectorDensity::sharedMemoryNeeded () const noexcept
+{
+    switch (type)
+    {
+    case Type::parser:
+    {
+        // For parser injector, the 3D position of each particle
+        // is stored in shared memory.
+        return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 3;
+    }
+    default:
+        return 0;
+    }
+}
+
+InjectorDensityPredefined::InjectorDensityPredefined (
+    std::string const& a_species_name) noexcept
+    : profile(Profile::null)
+{
+    ParmParse pp(a_species_name);
+
+    std::vector<amrex::Real> v;
+    // Read parameters for the predefined plasma profile, 
+    // and store them in managed memory
+    pp.getarr("predefined_profile_params", v);
+    p = static_cast<amrex::Real*>
+        (amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real)*v.size()));
+    for (int i = 0; i < static_cast<int>(v.size()); ++i) {
+        p[i] = v[i];
+    }
+
+    // Parse predefined profile name, and update member variable profile.
+    std::string which_profile_s;
+    pp.query("predefined_profile_name", which_profile_s);
+    std::transform(which_profile_s.begin(), which_profile_s.end(),
+                   which_profile_s.begin(), ::tolower);
+    if (which_profile_s == "parabolic_channel"){
+        profile = Profile::parabolic_channel;
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(v.size() > 5,
+            "InjectorDensityPredefined::parabolic_channel: not enough parameters");
+    }
+}
+
+// Note that we are not allowed to have non-trivial destructor.
+// So we rely on clear() to free memory.
+void InjectorDensityPredefined::clear ()
+{
+    amrex::The_Managed_Arena()->free(p);
+}
diff --git a/Source/Initialization/InjectorMomentum.H b/Source/Initialization/InjectorMomentum.H
new file mode 100644
index 000000000..399ee7759
--- /dev/null
+++ b/Source/Initialization/InjectorMomentum.H
@@ -0,0 +1,223 @@
+#ifndef INJECTOR_MOMENTUM_H_
+#define INJECTOR_MOMENTUM_H_
+
+#include <AMReX_Gpu.H>
+#include <AMReX_Dim3.H>
+#include <GpuParser.H>
+#include <CustomMomentumProb.H>
+
+// struct whose getMomentum returns constant momentum.
+struct InjectorMomentumConstant
+{
+    InjectorMomentumConstant (amrex::Real a_ux, amrex::Real a_uy, amrex::Real a_uz) noexcept
+        : m_ux(a_ux), m_uy(a_uy), m_uz(a_uz) {}
+
+    AMREX_GPU_HOST_DEVICE
+    amrex::XDim3
+    getMomentum (amrex::Real, amrex::Real, amrex::Real) const noexcept
+    {
+        return amrex::XDim3{m_ux,m_uy,m_uz};
+    }
+private:
+    amrex::Real m_ux, m_uy, m_uz;
+};
+
+// struct whose getMomentum returns momentum for 1 particle, from random 
+// gaussian distribution.
+struct InjectorMomentumGaussian
+{
+    InjectorMomentumGaussian (amrex::Real a_ux_m, amrex::Real a_uy_m,
+                              amrex::Real a_uz_m, amrex::Real a_ux_th,
+                              amrex::Real a_uy_th, amrex::Real a_uz_th) noexcept
+        : m_ux_m(a_ux_m), m_uy_m(a_uy_m), m_uz_m(a_uz_m),
+          m_ux_th(a_ux_th), m_uy_th(a_uy_th), m_uz_th(a_uz_th)
+        {}
+
+    AMREX_GPU_HOST_DEVICE
+    amrex::XDim3
+    getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+    {
+        return amrex::XDim3{amrex::RandomNormal(m_ux_m, m_ux_th),
+                            amrex::RandomNormal(m_uy_m, m_uy_th),
+                            amrex::RandomNormal(m_uz_m, m_uz_th)};
+    }
+private:
+    amrex::Real m_ux_m, m_uy_m, m_uz_m;
+    amrex::Real m_ux_th, m_uy_th, m_uz_th;
+};
+
+// struct whose getMomentum returns momentum for 1 particle, for
+// radial expansion
+struct InjectorMomentumRadialExpansion
+{
+    InjectorMomentumRadialExpansion (amrex::Real a_u_over_r) noexcept
+        : u_over_r(a_u_over_r)
+        {}
+
+    AMREX_GPU_HOST_DEVICE
+    amrex::XDim3
+    getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+    {
+        return {x*u_over_r, y*u_over_r, z*u_over_r};
+    }
+
+private:
+    amrex::Real u_over_r;
+};
+
+// struct whose getMomentumm returns local momentum computed from parser.
+struct InjectorMomentumParser
+{
+    InjectorMomentumParser (WarpXParser const& a_ux_parser,
+                            WarpXParser const& a_uy_parser,
+                            WarpXParser const& a_uz_parser) noexcept
+        : m_ux_parser(a_ux_parser), m_uy_parser(a_uy_parser),
+          m_uz_parser(a_uz_parser) {}
+
+    AMREX_GPU_HOST_DEVICE
+    amrex::XDim3
+    getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+    {
+        return amrex::XDim3{m_ux_parser(x,y,z),m_uy_parser(x,y,z),m_uz_parser(x,y,z)};
+    }
+
+    GpuParser m_ux_parser, m_uy_parser, m_uz_parser;
+};
+
+// Base struct for momentum injector. 
+// InjectorMomentum contains a union (called Object) that holds any one 
+// instance of: 
+// - InjectorMomentumConstant       : to generate constant density;
+// - InjectorMomentumGaussian       : to generate gaussian distribution;
+// - InjectorMomentumRadialExpansion: to generate radial expansion;
+// - InjectorMomentumParser         : to generate momentum from parser;
+// The choice is made at runtime, depending in the constructor called.
+// This mimics virtual functions, except the struct is stored in managed memory
+// and member functions are made __host__ __device__ to run on CPU and GPU.
+// This struct inherits from amrex::Gpu::Managed to provide new and delete
+// operators in managed memory when running on GPU. Nothing special on CPU.
+struct InjectorMomentum
+    : public amrex::Gpu::Managed
+{
+    // This constructor stores a InjectorMomentumConstant in union object.
+    InjectorMomentum (InjectorMomentumConstant* t,
+                      amrex::Real a_ux, amrex::Real a_uy, amrex::Real a_uz)
+        : type(Type::constant),
+          object(t, a_ux, a_uy, a_uz)
+    { }
+
+    // This constructor stores a InjectorMomentumParser in union object.
+    InjectorMomentum (InjectorMomentumParser* t,
+                      WarpXParser const& a_ux_parser,
+                      WarpXParser const& a_uy_parser,
+                      WarpXParser const& a_uz_parser)
+        : type(Type::parser),
+          object(t, a_ux_parser, a_uy_parser, a_uz_parser)
+    { }
+
+    // This constructor stores a InjectorMomentumGaussian in union object.
+    InjectorMomentum (InjectorMomentumGaussian* t,
+                      amrex::Real a_ux_m, amrex::Real a_uy_m, amrex::Real a_uz_m,
+                      amrex::Real a_ux_th, amrex::Real a_uy_th, amrex::Real a_uz_th)
+        : type(Type::gaussian),
+          object(t,a_ux_m,a_uy_m,a_uz_m,a_ux_th,a_uy_th,a_uz_th)
+    { }
+
+    // This constructor stores a InjectorMomentumCustom in union object.
+    InjectorMomentum (InjectorMomentumCustom* t,
+                      std::string const& a_species_name)
+        : type(Type::custom),
+          object(t, a_species_name)
+    { }
+
+    // This constructor stores a InjectorMomentumRadialExpansion in union object.
+    InjectorMomentum (InjectorMomentumRadialExpansion* t,
+                      amrex::Real u_over_r)
+        : type(Type::radial_expansion),
+          object(t, u_over_r)
+    { }
+
+    // Explicitly prevent the compiler from generating copy constructors
+    // and copy assignment operators.
+    InjectorMomentum (InjectorMomentum const&) = delete;
+    InjectorMomentum (InjectorMomentum&&) = delete;
+    void operator= (InjectorMomentum const&) = delete;
+    void operator= (InjectorMomentum &&) = delete;
+
+    ~InjectorMomentum ();
+
+    std::size_t sharedMemoryNeeded () const noexcept;
+
+    // call getMomentum from the object stored in the union
+    // (the union is called Object, and the instance is called object).
+    AMREX_GPU_HOST_DEVICE
+    amrex::XDim3
+    getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+    {
+        switch (type)
+        {
+        case Type::parser:
+        {
+            return object.parser.getMomentum(x,y,z);
+        }
+        case Type::gaussian:
+        {
+            return object.gaussian.getMomentum(x,y,z);
+        }
+        case Type::constant:
+        {
+            return object.constant.getMomentum(x,y,z);
+        }
+        case Type::radial_expansion:
+        {
+            return object.radial_expansion.getMomentum(x,y,z);
+        }
+        case Type::custom:
+        {
+            return object.custom.getMomentum(x,y,z);
+        }
+        default:
+        {
+            amrex::Abort("InjectorMomentum: unknown type");
+            return {0.0,0.0,0.0};
+        }
+        }
+    }
+
+private:
+    enum struct Type { constant, custom, gaussian, radial_expansion, parser };
+    Type type;
+
+    // An instance of union Object constructs and stores any one of
+    // the objects declared (constant or custom or gaussian or 
+    // radial_expansion or parser).
+    union Object {
+        Object (InjectorMomentumConstant*,
+                amrex::Real a_ux, amrex::Real a_uy, amrex::Real a_uz) noexcept
+            : constant(a_ux,a_uy,a_uz) {}
+        Object (InjectorMomentumCustom*,
+                std::string const& a_species_name) noexcept
+            : custom(a_species_name) {}
+        Object (InjectorMomentumGaussian*,
+                amrex::Real a_ux_m, amrex::Real a_uy_m,
+                amrex::Real a_uz_m, amrex::Real a_ux_th,
+                amrex::Real a_uy_th, amrex::Real a_uz_th) noexcept
+            : gaussian(a_ux_m,a_uy_m,a_uz_m,a_ux_th,a_uy_th,a_uz_th) {}
+        Object (InjectorMomentumRadialExpansion*,
+                amrex::Real u_over_r) noexcept
+            : radial_expansion(u_over_r) {}
+        Object (InjectorMomentumParser*,
+                WarpXParser const& a_ux_parser,
+                WarpXParser const& a_uy_parser,
+                WarpXParser const& a_uz_parser) noexcept
+            : parser(a_ux_parser, a_uy_parser, a_uz_parser) {}
+        InjectorMomentumConstant constant;
+        InjectorMomentumCustom   custom;
+        InjectorMomentumGaussian gaussian;
+        InjectorMomentumRadialExpansion radial_expansion;
+        InjectorMomentumParser   parser;
+    };
+    Object object;
+};
+
+#endif
diff --git a/Source/Initialization/InjectorMomentum.cpp b/Source/Initialization/InjectorMomentum.cpp
new file mode 100644
index 000000000..a197b5bef
--- /dev/null
+++ b/Source/Initialization/InjectorMomentum.cpp
@@ -0,0 +1,40 @@
+#include <PlasmaInjector.H>
+
+using namespace amrex;
+
+InjectorMomentum::~InjectorMomentum ()
+{
+    switch (type)
+    {
+    case Type::parser:
+    {
+        object.parser.m_ux_parser.clear();
+        object.parser.m_uy_parser.clear();
+        object.parser.m_uz_parser.clear();
+        break;
+    }
+    case Type::custom:
+    {
+        object.custom.clear();
+        break;
+    }
+    }
+}
+
+// Compute the amount of memory needed in GPU Shared Memory.
+std::size_t
+InjectorMomentum::sharedMemoryNeeded () const noexcept
+{
+    switch (type)
+    {
+    case Type::parser:
+    {
+        // For parser injector, the 3D position of each particle
+        // is stored in shared memory.
+        return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 3;
+    }
+    default:
+        return 0;
+    }
+}
+
diff --git a/Source/Initialization/InjectorPosition.H b/Source/Initialization/InjectorPosition.H
new file mode 100644
index 000000000..19bb092dd
--- /dev/null
+++ b/Source/Initialization/InjectorPosition.H
@@ -0,0 +1,146 @@
+#ifndef INJECTOR_POSITION_H_
+#define INJECTOR_POSITION_H_
+
+#include <AMReX_Gpu.H>
+#include <AMReX_Dim3.H>
+#include <AMReX_Utility.H>
+
+// struct whose getPositionUnitBox returns x, y and z for a particle with
+// random distribution inside a unit cell.
+struct InjectorPositionRandom
+{
+    AMREX_GPU_HOST_DEVICE
+    amrex::XDim3
+    getPositionUnitBox (int i_part, int ref_fac=1) const noexcept
+    {
+        return amrex::XDim3{amrex::Random(), amrex::Random(), amrex::Random()};
+    }
+};
+
+// struct whose getPositionUnitBox returns x, y and z for a particle with
+// regular distribution inside a unit cell.
+struct InjectorPositionRegular
+{
+    InjectorPositionRegular (amrex::Dim3 const& a_ppc) noexcept : ppc(a_ppc) {}
+
+    // i_part: particle number within the cell, required to evenly space
+    // particles within the cell.
+    // ref_fac: the number of particles evenly-spaced within a cell 
+    // is a_ppc*(ref_fac**AMREX_SPACEDIM).
+    AMREX_GPU_HOST_DEVICE
+    amrex::XDim3
+    getPositionUnitBox (int i_part, int ref_fac=1) const noexcept
+    {
+        int nx = ref_fac*ppc.x;
+        int ny = ref_fac*ppc.y;
+#if (AMREX_SPACEDIM == 3)
+        int nz = ref_fac*ppc.z;
+#else
+        int nz = 1;
+#endif
+        int ix_part = i_part/(ny*nz);  // written this way backward compatibility
+        int iz_part = (i_part-ix_part*(ny*nz)) / ny;
+        int iy_part = (i_part-ix_part*(ny*nz)) - ny*iz_part;
+        return amrex::XDim3{(0.5+ix_part)/nx, (0.5+iy_part)/ny, (0.5+iz_part) / nz};
+    }
+private:
+    amrex::Dim3 ppc;
+};
+
+// Base struct for position injector. 
+// InjectorPosition contains a union (called Object) that holds any one 
+// instance of: 
+// - InjectorPositionRandom : to generate random distribution;
+// - InjectorPositionRegular: to generate regular distribution.
+// The choice is made at runtime, depending in the constructor called.
+// This mimics virtual functions, except the struct is stored in managed memory
+// and member functions are made __host__ __device__ to run on CPU and GPU.
+// This struct inherits from amrex::Gpu::Managed to provide new and delete
+// operators in managed memory when running on GPU. Nothing special on CPU.
+struct InjectorPosition
+    : public amrex::Gpu::Managed
+{
+    // This constructor stores a InjectorPositionRandom in union object.
+    InjectorPosition (InjectorPositionRandom* t,
+                      amrex::Real a_xmin, amrex::Real a_xmax,
+                      amrex::Real a_ymin, amrex::Real a_ymax,
+                      amrex::Real a_zmin, amrex::Real a_zmax)
+        : type(Type::random),
+          object(t),
+          xmin(a_xmin), xmax(a_xmax),
+          ymin(a_ymin), ymax(a_ymax),
+          zmin(a_zmin), zmax(a_zmax)
+    { }
+
+    // This constructor stores a InjectorPositionRegular in union object.
+    InjectorPosition (InjectorPositionRegular* t,
+                      amrex::Real a_xmin, amrex::Real a_xmax,
+                      amrex::Real a_ymin, amrex::Real a_ymax,
+                      amrex::Real a_zmin, amrex::Real a_zmax,
+                      amrex::Dim3 const& a_ppc)
+        : type(Type::regular),
+          object(t, a_ppc),
+          xmin(a_xmin), xmax(a_xmax),
+          ymin(a_ymin), ymax(a_ymax),
+          zmin(a_zmin), zmax(a_zmax)
+    { }
+
+    // Explicitly prevent the compiler from generating copy constructors
+    // and copy assignment operators.
+    InjectorPosition (InjectorPosition const&) = delete;
+    InjectorPosition (InjectorPosition&&) = delete;
+    void operator= (InjectorPosition const&) = delete;
+    void operator= (InjectorPosition &&) = delete;
+
+    std::size_t sharedMemoryNeeded () const noexcept { return 0; }
+
+    // call getPositionUnitBox from the object stored in the union
+    // (the union is called Object, and the instance is called object).
+    AMREX_GPU_HOST_DEVICE
+    amrex::XDim3
+    getPositionUnitBox (int i_part, int ref_fac=1) const noexcept
+    {
+        switch (type)
+        {
+        case Type::regular:
+        {
+            return object.regular.getPositionUnitBox(i_part, ref_fac);
+        }
+        default:
+        {
+            return object.random.getPositionUnitBox(i_part, ref_fac);
+        }
+        };
+    }
+
+    // bool: whether position specified is within bounds.
+    AMREX_GPU_HOST_DEVICE
+    bool
+    insideBounds (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+    {
+        return (x < xmax and x >= xmin and
+                y < ymax and y >= ymin and
+                z < zmax and z >= zmin);
+    }
+
+private:
+    enum struct Type { random, regular };
+    Type type;
+
+    // An instance of union Object constructs and stores any one of
+    // the objects declared (random or regular).
+    union Object {
+        Object (InjectorPositionRandom*) noexcept : random() {}
+        Object (InjectorPositionRegular*, amrex::Dim3 const& a_ppc) noexcept
+            : regular(a_ppc) {}
+        InjectorPositionRandom random;
+        InjectorPositionRegular regular;
+    };
+    Object object;
+
+    amrex::Real xmin, xmax;
+    amrex::Real ymin, ymax;
+    amrex::Real zmin, zmax;
+};
+
+#endif
diff --git a/Source/Initialization/Make.package b/Source/Initialization/Make.package
index edcf402c9..2c6458b6d 100644
--- a/Source/Initialization/Make.package
+++ b/Source/Initialization/Make.package
@@ -1,9 +1,18 @@
-CEXE_sources += CustomDensityProb.cpp
-CEXE_sources += PlasmaProfiles.cpp
 CEXE_sources += WarpXInitData.cpp
-CEXE_sources += CustomMomentumProb.cpp
+
 CEXE_sources += PlasmaInjector.cpp
 CEXE_headers += PlasmaInjector.H
 
+CEXE_headers += InjectorPosition.H
+
+CEXE_headers += InjectorDensity.H
+CEXE_sources += InjectorDensity.cpp
+
+CEXE_headers += InjectorMomentum.H
+CEXE_sources += InjectorMomentum.cpp
+
+CEXE_headers += CustomDensityProb.H
+CEXE_headers += CustomMomentumProb.H
+
 INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Initialization
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Initialization
diff --git a/Source/Initialization/PlasmaInjector.H b/Source/Initialization/PlasmaInjector.H
index f998e217e..f7e86bff5 100644
--- a/Source/Initialization/PlasmaInjector.H
+++ b/Source/Initialization/PlasmaInjector.H
@@ -1,250 +1,16 @@
 #ifndef PLASMA_INJECTOR_H_
 #define PLASMA_INJECTOR_H_
 
-#include <array>
+#include <InjectorPosition.H>
+#include <InjectorDensity.H>
+#include <InjectorMomentum.H>
 
-#include "AMReX_REAL.H"
+#include <array>
 #include <AMReX_Vector.H>
 #include <WarpXConst.H>
 #include <WarpXParser.H>
-#include "AMReX_ParmParse.H"
-#include "AMReX_Utility.H"
-
-enum class predefined_profile_flag { Null, parabolic_channel };
-
-///
-/// PlasmaDensityProfile describes how the charge density
-/// is set in particle initialization. Subclasses must define a
-/// getDensity function that describes the charge density as a
-/// function of x, y, and z.
-///
-class PlasmaDensityProfile
-{
-public:
-    virtual ~PlasmaDensityProfile() {};
-    virtual amrex::Real getDensity(amrex::Real x,
-                                   amrex::Real y,
-                                   amrex::Real z) const = 0;
-protected:
-    std::string _species_name;
-};
-
-///
-/// This describes a constant density distribution.
-///
-class ConstantDensityProfile : public PlasmaDensityProfile
-{
-public:
-    ConstantDensityProfile(amrex::Real _density);
-    virtual amrex::Real getDensity(amrex::Real x,
-                                   amrex::Real y,
-                                   amrex::Real z) const override;
-
-private:
-    amrex::Real _density;
-};
-
-///
-/// This describes a custom density distribution. Users can supply
-/// in their problem directory.
-///
-///
-class CustomDensityProfile : public PlasmaDensityProfile
-{
-public:
-    CustomDensityProfile(const std::string& species_name);
-    virtual amrex::Real getDensity(amrex::Real x,
-                                   amrex::Real y,
-                                   amrex::Real z) const override;
-private:
-    amrex::Vector<amrex::Real> params;
-};
-
-///
-/// This describes predefined density distributions.
-///
-class PredefinedDensityProfile : public PlasmaDensityProfile
-{
-public:
-    PredefinedDensityProfile(const std::string& species_name);
-    virtual amrex::Real getDensity(amrex::Real x,
-                                   amrex::Real y,
-                                   amrex::Real z) const override;
-    amrex::Real ParabolicChannel(amrex::Real x,
-                                 amrex::Real y,
-                                 amrex::Real z) const;
-private:
-    predefined_profile_flag which_profile = predefined_profile_flag::Null;
-    amrex::Vector<amrex::Real> params;
-};
-
-///
-/// This describes a density function parsed in the input file. 
-///
-class ParseDensityProfile : public PlasmaDensityProfile
-{
-public:
-    ParseDensityProfile(const std::string _parse_density_function);
-    virtual amrex::Real getDensity(amrex::Real x,
-                                   amrex::Real y,
-                                   amrex::Real z) const override;
-private:
-    std::string _parse_density_function;
-    WarpXParser parser_density;
-};
-
-///
-/// PlasmaMomentumDistribution describes how the particle momenta
-/// are set. Subclasses must define a getMomentum method that fills
-/// a u with the 3 components of the particle momentum
-///
-class PlasmaMomentumDistribution
-{
-public:
-    using vec3 = std::array<amrex::Real, 3>;
-    virtual ~PlasmaMomentumDistribution() {};
-    virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) = 0;
-};
-
-///
-/// This is a constant momentum distribution - all particles will
-/// have the same ux, uy, and uz
-///
-class ConstantMomentumDistribution : public PlasmaMomentumDistribution
-{
-public:
-    ConstantMomentumDistribution(amrex::Real ux,
-                                 amrex::Real uy,
-                                 amrex::Real uz);
-    virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override;
-
-private:
-    amrex::Real _ux;
-    amrex::Real _uy;
-    amrex::Real _uz;
-};
-
-///
-/// This describes a custom momentum distribution. Users can supply
-/// in their problem directory.
-///
-///
-class CustomMomentumDistribution : public PlasmaMomentumDistribution
-{
-public:
-    CustomMomentumDistribution(const std::string& species_name);
-    virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override;
-
-private:
-    amrex::Vector<amrex::Real> params;
-};
-
-
-///
-/// This is a Gaussian Random momentum distribution.
-/// Particles will get random momenta, drawn from a normal.
-/// ux_m, ux_y, and ux_z describe the mean components in the x, y, and z
-/// directions, while u_th is the standard deviation of the random
-/// component.
-///
-class GaussianRandomMomentumDistribution : public PlasmaMomentumDistribution
-{
-public:
-    GaussianRandomMomentumDistribution(amrex::Real ux_m,
-                                       amrex::Real uy_m,
-                                       amrex::Real uz_m,
-                                       amrex::Real ux_th,
-                                       amrex::Real uy_th,
-                                       amrex::Real uz_th);
-    virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override;
-private:
-    amrex::Real _ux_m;
-    amrex::Real _uy_m;
-    amrex::Real _uz_m;
-    amrex::Real _ux_th;
-    amrex::Real _uy_th;
-    amrex::Real _uz_th;
-};
-
-///
-/// This is a radially expanding momentum distribution
-/// Particles will have a radial momentum proportional to their 
-/// radius, with proportionality constant u_over_r
-class RadialExpansionMomentumDistribution : public PlasmaMomentumDistribution
-{
-public:
-  RadialExpansionMomentumDistribution( amrex::Real u_over_r );
-  virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override;
-private:
-    amrex::Real _u_over_r;
-};
-
-///
-/// This describes a momentum distribution function parsed in the input file. 
-///
-class ParseMomentumFunction : public PlasmaMomentumDistribution
-{
-public:
-    ParseMomentumFunction(const std::string _parse_momentum_function_ux,
-                          const std::string _parse_momentum_function_uy,
-                          const std::string _parse_momentum_function_uz);
-    virtual void getMomentum(vec3& u, 
-                             amrex::Real x,
-                             amrex::Real y,
-                             amrex::Real z) override;
-private:
-    std::string _parse_momentum_function_ux;
-    std::string _parse_momentum_function_uy;
-    std::string _parse_momentum_function_uz;
-    WarpXParser parser_ux;
-    WarpXParser parser_uy;
-    WarpXParser parser_uz;
-};
-
-
-///
-/// PlasmaParticlePosition describes how particles are initialized
-/// into each cell box. Subclasses must define a
-/// getPositionUnitBox function that returns the position of
-/// particle number i_part in a unitary box.
-///
-class PlasmaParticlePosition{
-public:
-  using vec3 = std::array<amrex::Real, 3>;
-  virtual ~PlasmaParticlePosition() {};
-    virtual void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1) = 0;
-};
-
-///
-/// Particles are initialized with a random uniform
-/// distribution inside each cell
-///
-class RandomPosition : public PlasmaParticlePosition{
-public:
-    RandomPosition(int num_particles_per_cell);
-    virtual void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1) override;
-private:
-    amrex::Real _x;
-    amrex::Real _y;
-    amrex::Real _z;
-    int _num_particles_per_cell;
-};
-
-///
-/// Particles are regularly distributed inside each cell. The user provides
-/// a 3d (resp. 2d) vector num_particles_per_cell_each_dim that contains
-/// the number of particles per cell along each dimension.
-///
-class RegularPosition : public PlasmaParticlePosition{
-public:
-  RegularPosition(const amrex::Vector<int>& num_particles_per_cell_each_dim);
-    virtual void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1) override;
-private:
-  amrex::Real _x;
-  amrex::Real _y;
-  amrex::Real _z;
-  amrex::Vector<int> _num_particles_per_cell_each_dim;
-};
+#include <AMReX_ParmParse.H>
+#include <AMReX_Utility.H>
 
 ///
 /// The PlasmaInjector class parses and stores information about the plasma
@@ -256,28 +22,23 @@ class PlasmaInjector
 
 public:
 
-    using vec3 = std::array<amrex::Real, 3>;
-
-    PlasmaInjector();
-
-    PlasmaInjector(int ispecies, const std::string& name);
+    PlasmaInjector ();
 
-    amrex::Real getDensity(amrex::Real x, amrex::Real y, amrex::Real z);
+    PlasmaInjector (int ispecies, const std::string& name);
 
-    bool insideBounds(amrex::Real x, amrex::Real y, amrex::Real z);
+    bool insideBounds (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept;
 
     int num_particles_per_cell;
 
     amrex::Vector<int> num_particles_per_cell_each_dim;
 
-    void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z);
+    // gamma * beta
+    amrex::XDim3 getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept;
 
-    void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1);
+    amrex::Real getCharge () {return charge;}
+    amrex::Real getMass () {return mass;}
 
-    amrex::Real getCharge() {return charge;}
-    amrex::Real getMass() {return mass;}
-
-    bool doInjection() { return part_pos != NULL;}
+    bool doInjection () const noexcept { return inj_pos != NULL;}
 
     bool add_single_particle = false;
     amrex::Vector<amrex::Real> single_particle_pos;
@@ -305,6 +66,21 @@ public:
     amrex::Real xmin, xmax;
     amrex::Real ymin, ymax;
     amrex::Real zmin, zmax;
+    amrex::Real density_min = 0;
+    amrex::Real density_max = std::numeric_limits<amrex::Real>::max();
+
+    InjectorPosition* getInjectorPosition ();
+    InjectorDensity*  getInjectorDensity ();
+    InjectorMomentum* getInjectorMomentum ();
+
+    // When running on GPU, injector for position, momentum and density store
+    // particle 3D positions in shared memory IF using the parser.
+    std::size_t
+    sharedMemoryNeeded () const noexcept {
+        return amrex::max(inj_pos->sharedMemoryNeeded(),
+                          inj_rho->sharedMemoryNeeded(),
+                          inj_mom->sharedMemoryNeeded());
+    }
 
 protected:
 
@@ -315,13 +91,12 @@ protected:
     int species_id;
     std::string species_name;
 
-    std::unique_ptr<PlasmaDensityProfile> rho_prof;
-    std::unique_ptr<PlasmaMomentumDistribution> mom_dist;
-    std::unique_ptr<PlasmaParticlePosition> part_pos;
-    
-    void parseDensity(amrex::ParmParse pp);
-    void parseMomentum(amrex::ParmParse pp);
+    std::unique_ptr<InjectorPosition> inj_pos;
+    std::unique_ptr<InjectorDensity > inj_rho;
+    std::unique_ptr<InjectorMomentum> inj_mom;
     
+    void parseDensity (amrex::ParmParse& pp);
+    void parseMomentum (amrex::ParmParse& pp);    
 };
 
 #endif
diff --git a/Source/Initialization/PlasmaInjector.cpp b/Source/Initialization/PlasmaInjector.cpp
index f9642d1b6..541999789 100644
--- a/Source/Initialization/PlasmaInjector.cpp
+++ b/Source/Initialization/PlasmaInjector.cpp
@@ -55,192 +55,34 @@ namespace {
     }
 }
 
-ConstantDensityProfile::ConstantDensityProfile(Real density)
-    : _density(density)
-{}
+PlasmaInjector::PlasmaInjector () {}
 
-Real ConstantDensityProfile::getDensity(Real x, Real y, Real z) const
-{
-    return _density;
-}
-
-CustomDensityProfile::CustomDensityProfile(const std::string& species_name)
-{
-    ParmParse pp(species_name);
-    pp.getarr("custom_profile_params", params);
-}
-
-PredefinedDensityProfile::PredefinedDensityProfile(const std::string& species_name)
+PlasmaInjector::PlasmaInjector (int ispecies, const std::string& name)
+    : species_id(ispecies), species_name(name)
 {
     ParmParse pp(species_name);
-    std::string which_profile_s;
-    pp.getarr("predefined_profile_params", params);
-    pp.query("predefined_profile_name", which_profile_s);
-    if (which_profile_s == "parabolic_channel"){
-        which_profile = predefined_profile_flag::parabolic_channel;
-    }
-}
-
-ParseDensityProfile::ParseDensityProfile(std::string parse_density_function)
-    : _parse_density_function(parse_density_function)
-{
-    parser_density.define(parse_density_function);
-    parser_density.registerVariables({"x","y","z"});
-
-    ParmParse pp("my_constants");
-    std::set<std::string> symbols = parser_density.symbols();
-    symbols.erase("x");
-    symbols.erase("y");
-    symbols.erase("z"); // after removing variables, we are left with constants
-    for (auto it = symbols.begin(); it != symbols.end(); ) {
-        Real v;
-        if (pp.query(it->c_str(), v)) {
-            parser_density.setConstant(*it, v);
-            it = symbols.erase(it);
-        } else {
-            ++it;
-        }
-    }
-    for (auto const& s : symbols) { // make sure there no unknown symbols
-        amrex::Abort("ParseDensityProfile: Unknown symbol "+s);
-    }
-}
-
-Real ParseDensityProfile::getDensity(Real x, Real y, Real z) const
-{
-    return parser_density.eval(x,y,z);
-}
-
-ConstantMomentumDistribution::ConstantMomentumDistribution(Real ux,
-                                                           Real uy,
-                                                           Real uz)
-    : _ux(ux), _uy(uy), _uz(uz)
-{}
-
-void ConstantMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) {
-    u[0] = _ux;
-    u[1] = _uy;
-    u[2] = _uz;
-}
 
-CustomMomentumDistribution::CustomMomentumDistribution(const std::string& species_name)
-{
-  ParmParse pp(species_name);
-  pp.getarr("custom_momentum_params", params);
-}
-
-GaussianRandomMomentumDistribution::GaussianRandomMomentumDistribution(Real ux_m,
-                                                                       Real uy_m,
-                                                                       Real uz_m,
-                                                                       Real ux_th,
-                                                                       Real uy_th,
-                                                                       Real uz_th)
-    : _ux_m(ux_m), _uy_m(uy_m), _uz_m(uz_m), _ux_th(ux_th), _uy_th(uy_th), _uz_th(uz_th)
-{
-}
-
-void GaussianRandomMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) {
-    Real ux_th = amrex::RandomNormal(0.0, _ux_th);
-    Real uy_th = amrex::RandomNormal(0.0, _uy_th);
-    Real uz_th = amrex::RandomNormal(0.0, _uz_th);
-
-    u[0] = _ux_m + ux_th;
-    u[1] = _uy_m + uy_th;
-    u[2] = _uz_m + uz_th;
-}
-RadialExpansionMomentumDistribution::RadialExpansionMomentumDistribution(Real u_over_r) : _u_over_r( u_over_r )
-{
-}
-
-void RadialExpansionMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) {
-  u[0] = _u_over_r * x;
-  u[1] = _u_over_r * y;
-  u[2] = _u_over_r * z;
-}
-
-ParseMomentumFunction::ParseMomentumFunction(std::string parse_momentum_function_ux,
-                                             std::string parse_momentum_function_uy,
-                                             std::string parse_momentum_function_uz)
-    : _parse_momentum_function_ux(parse_momentum_function_ux),
-      _parse_momentum_function_uy(parse_momentum_function_uy),
-      _parse_momentum_function_uz(parse_momentum_function_uz)
-{
-    parser_ux.define(parse_momentum_function_ux);
-    parser_uy.define(parse_momentum_function_uy);
-    parser_uz.define(parse_momentum_function_uz);
-
-    amrex::Array<std::reference_wrapper<WarpXParser>,3> parsers{parser_ux, parser_uy, parser_uz};
-    ParmParse pp("my_constants");
-    for (auto& p : parsers) {
-        auto& parser = p.get();
-        parser.registerVariables({"x","y","z"});
-        std::set<std::string> symbols = parser.symbols();
-        symbols.erase("x");
-        symbols.erase("y");
-        symbols.erase("z"); // after removing variables, we are left with constants
-        for (auto it = symbols.begin(); it != symbols.end(); ) {
-            Real v;
-            if (pp.query(it->c_str(), v)) {
-                parser.setConstant(*it, v);
-                it = symbols.erase(it);
-            } else {
-                ++it;
-            }
-        }
-        for (auto const& s : symbols) { // make sure there no unknown symbols
-            amrex::Abort("ParseMomentumFunction: Unknown symbol "+s);
-        }
-    }
-}
-
-void ParseMomentumFunction::getMomentum(vec3& u, Real x, Real y, Real z)
-{
-    u[0] = parser_ux.eval(x,y,z);
-    u[1] = parser_uy.eval(x,y,z);
-    u[2] = parser_uz.eval(x,y,z);
-}
-
-RandomPosition::RandomPosition(int num_particles_per_cell):
-  _num_particles_per_cell(num_particles_per_cell)
-{}
-
-void RandomPosition::getPositionUnitBox(vec3& r, int i_part, int ref_fac){
-    r[0] = amrex::Random();
-    r[1] = amrex::Random();
-    r[2] = amrex::Random();
-}
-
-RegularPosition::RegularPosition(const amrex::Vector<int>& num_particles_per_cell_each_dim)
-    : _num_particles_per_cell_each_dim(num_particles_per_cell_each_dim)
-{}
+    pp.query("radially_weighted", radially_weighted);
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(radially_weighted, "ERROR: Only radially_weighted=true is supported");
 
-void RegularPosition::getPositionUnitBox(vec3& r, int i_part, int ref_fac)
-{
-  int nx = ref_fac*_num_particles_per_cell_each_dim[0];
-  int ny = ref_fac*_num_particles_per_cell_each_dim[1];
-#if AMREX_SPACEDIM == 3
-  int nz = ref_fac*_num_particles_per_cell_each_dim[2];
-#else
-  int nz = 1;
-#endif
-  
-  int ix_part = i_part/(ny * nz);
-  int iy_part = (i_part % (ny * nz)) % ny;
-  int iz_part = (i_part % (ny * nz)) / ny;
+    // parse plasma boundaries
+    xmin = std::numeric_limits<amrex::Real>::lowest();
+    ymin = std::numeric_limits<amrex::Real>::lowest();
+    zmin = std::numeric_limits<amrex::Real>::lowest();
 
-  r[0] = (0.5+ix_part)/nx;
-  r[1] = (0.5+iy_part)/ny;
-  r[2] = (0.5+iz_part)/nz;
-}
+    xmax = std::numeric_limits<amrex::Real>::max();
+    ymax = std::numeric_limits<amrex::Real>::max();
+    zmax = std::numeric_limits<amrex::Real>::max();
 
-PlasmaInjector::PlasmaInjector(){
-    part_pos = NULL;
-}
+    pp.query("xmin", xmin);
+    pp.query("ymin", ymin);
+    pp.query("zmin", zmin);
+    pp.query("xmax", xmax);
+    pp.query("ymax", ymax);
+    pp.query("zmax", zmax);
 
-PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name)
-    : species_id(ispecies), species_name(name)
-{
-    ParmParse pp(species_name);
+    pp.query("density_min", density_min);
+    pp.query("density_max", density_max);
 
     // parse charge and mass
     std::string charge_s;
@@ -290,9 +132,14 @@ PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name)
         gaussian_beam = true;
         parseMomentum(pp);
     }
+    // Depending on injection type at runtime, initialize inj_pos
+    // so that inj_pos->getPositionUnitBox calls
+    // InjectorPosition[Random or Regular].getPositionUnitBox.
     else if (part_pos_s == "nrandompercell") {
         pp.query("num_particles_per_cell", num_particles_per_cell);
-        part_pos.reset(new RandomPosition(num_particles_per_cell));
+        // Construct InjectorPosition with InjectorPositionRandom.
+        inj_pos.reset(new InjectorPosition((InjectorPositionRandom*)nullptr,
+                                           xmin, xmax, ymin, ymax, zmin, zmax));
         parseDensity(pp);
         parseMomentum(pp);
     } else if (part_pos_s == "nuniformpercell") {
@@ -301,7 +148,12 @@ PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name)
 #if ( AMREX_SPACEDIM == 2 )
         num_particles_per_cell_each_dim[2] = 1;
 #endif
-        part_pos.reset(new RegularPosition(num_particles_per_cell_each_dim));
+        // Construct InjectorPosition from InjectorPositionRegular.
+        inj_pos.reset(new InjectorPosition((InjectorPositionRegular*)nullptr,
+                                           xmin, xmax, ymin, ymax, zmin, zmax,
+                                           Dim3{num_particles_per_cell_each_dim[0],
+                                                num_particles_per_cell_each_dim[1],
+                                                num_particles_per_cell_each_dim[2]}));
         num_particles_per_cell = num_particles_per_cell_each_dim[0] *
                                  num_particles_per_cell_each_dim[1] *
                                  num_particles_per_cell_each_dim[2];
@@ -310,52 +162,75 @@ PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name)
     } else {
         StringParseAbortMessage("Injection style", part_pos_s);
     }
+}
 
-    pp.query("radially_weighted", radially_weighted);
-    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(radially_weighted, "ERROR: Only radially_weighted=true is supported");
-
-    // parse plasma boundaries
-    xmin = std::numeric_limits<amrex::Real>::lowest();
-    ymin = std::numeric_limits<amrex::Real>::lowest();
-    zmin = std::numeric_limits<amrex::Real>::lowest();
-
-    xmax = std::numeric_limits<amrex::Real>::max();
-    ymax = std::numeric_limits<amrex::Real>::max();
-    zmax = std::numeric_limits<amrex::Real>::max();
+namespace {
+WarpXParser makeParser (std::string const& parse_function)
+{
+    WarpXParser parser(parse_function);
+    parser.registerVariables({"x","y","z"});
 
-    pp.query("xmin", xmin);
-    pp.query("ymin", ymin);
-    pp.query("zmin", zmin);
-    pp.query("xmax", xmax);
-    pp.query("ymax", ymax);
-    pp.query("zmax", zmax);
+    ParmParse pp("my_constants");
+    std::set<std::string> symbols = parser.symbols();
+    symbols.erase("x");
+    symbols.erase("y");
+    symbols.erase("z"); // after removing variables, we are left with constants
+    for (auto it = symbols.begin(); it != symbols.end(); ) {
+        Real v;
+        if (pp.query(it->c_str(), v)) {
+            parser.setConstant(*it, v);
+            it = symbols.erase(it);
+        } else {
+            ++it;
+        }
+    }
+    for (auto const& s : symbols) { // make sure there no unknown symbols
+        amrex::Abort("PlasmaInjector::makeParser: Unknown symbol "+s);
+    }
 
+    return parser;
+}
 }
 
-void PlasmaInjector::parseDensity(ParmParse pp){
+// Depending on injection type at runtime, initialize inj_rho
+// so that inj_rho->getDensity calls
+// InjectorPosition[Constant or Custom or etc.].getDensity.
+void PlasmaInjector::parseDensity (ParmParse& pp)
+{
     // parse density information
     std::string rho_prof_s;
     pp.get("profile", rho_prof_s);
-    std::transform(rho_prof_s.begin(),
-                   rho_prof_s.end(),
-                   rho_prof_s.begin(),
-                   ::tolower);
+    std::transform(rho_prof_s.begin(), rho_prof_s.end(),
+                   rho_prof_s.begin(), ::tolower);
     if (rho_prof_s == "constant") {
         pp.get("density", density);
-        rho_prof.reset(new ConstantDensityProfile(density));
+        // Construct InjectorDensity with InjectorDensityConstant.
+        inj_rho.reset(new InjectorDensity((InjectorDensityConstant*)nullptr, density));
     } else if (rho_prof_s == "custom") {
-        rho_prof.reset(new CustomDensityProfile(species_name));
+        // Construct InjectorDensity with InjectorDensityCustom.
+        inj_rho.reset(new InjectorDensity((InjectorDensityCustom*)nullptr, species_name));
     } else if (rho_prof_s == "predefined") {
-        rho_prof.reset(new PredefinedDensityProfile(species_name));
+        // Construct InjectorDensity with InjectorDensityPredefined.
+        inj_rho.reset(new InjectorDensity((InjectorDensityPredefined*)nullptr,species_name));
     } else if (rho_prof_s == "parse_density_function") {
-        pp.get("density_function(x,y,z)", str_density_function);
-        rho_prof.reset(new ParseDensityProfile(str_density_function));
+        std::vector<std::string> f;
+        pp.getarr("density_function(x,y,z)", f);
+        for (auto const& s : f) {
+            str_density_function += s;
+        }
+        // Construct InjectorDensity with InjectorDensityParser.
+        inj_rho.reset(new InjectorDensity((InjectorDensityParser*)nullptr,
+                                          makeParser(str_density_function)));
     } else {
         StringParseAbortMessage("Density profile type", rho_prof_s);
     }
 }
 
-void PlasmaInjector::parseMomentum(ParmParse pp){
+// Depending on injection type at runtime, initialize inj_mom
+// so that inj_mom->getMomentum calls
+// InjectorMomentum[Constant or Custom or etc.].getMomentum.
+void PlasmaInjector::parseMomentum (ParmParse& pp)
+{
     // parse momentum information
     std::string mom_dist_s;
     pp.get("momentum_distribution_type", mom_dist_s);
@@ -370,9 +245,11 @@ void PlasmaInjector::parseMomentum(ParmParse pp){
         pp.query("ux", ux);
         pp.query("uy", uy);
         pp.query("uz", uz);
-        mom_dist.reset(new ConstantMomentumDistribution(ux, uy, uz));
+        // Construct InjectorMomentum with InjectorMomentumConstant.
+        inj_mom.reset(new InjectorMomentum((InjectorMomentumConstant*)nullptr, ux,uy, uz));
     } else if (mom_dist_s == "custom") {
-        mom_dist.reset(new CustomMomentumDistribution(species_name));
+        // Construct InjectorMomentum with InjectorMomentumCustom.
+        inj_mom.reset(new InjectorMomentum((InjectorMomentumCustom*)nullptr, species_name));
     } else if (mom_dist_s == "gaussian") {
         Real ux_m = 0.;
         Real uy_m = 0.;
@@ -386,42 +263,68 @@ void PlasmaInjector::parseMomentum(ParmParse pp){
         pp.query("ux_th", ux_th);
         pp.query("uy_th", uy_th);
         pp.query("uz_th", uz_th);
-        mom_dist.reset(new GaussianRandomMomentumDistribution(ux_m, uy_m, uz_m, 
-                                                              ux_th, uy_th, uz_th));
+        // Construct InjectorMomentum with InjectorMomentumGaussian.
+        inj_mom.reset(new InjectorMomentum((InjectorMomentumGaussian*)nullptr,
+                                           ux_m, uy_m, uz_m, ux_th, uy_th, uz_th));
     } else if (mom_dist_s == "radial_expansion") {
         Real u_over_r = 0.;
         pp.query("u_over_r", u_over_r);
-        mom_dist.reset(new RadialExpansionMomentumDistribution(u_over_r));
+        // Construct InjectorMomentum with InjectorMomentumRadialExpansion.
+        inj_mom.reset(new InjectorMomentum
+                      ((InjectorMomentumRadialExpansion*)nullptr, u_over_r));
     } else if (mom_dist_s == "parse_momentum_function") {
-        pp.get("momentum_function_ux(x,y,z)", str_momentum_function_ux);
-        pp.get("momentum_function_uy(x,y,z)", str_momentum_function_uy);
-        pp.get("momentum_function_uz(x,y,z)", str_momentum_function_uz);
-        mom_dist.reset(new ParseMomentumFunction(str_momentum_function_ux, 
-                                                 str_momentum_function_uy, 
-                                                 str_momentum_function_uz));
+        std::vector<std::string> f;
+        pp.getarr("momentum_function_ux(x,y,z)", f);
+        for (auto const& s : f) {
+            str_momentum_function_ux += s;
+        }
+        f.clear();
+        pp.getarr("momentum_function_uy(x,y,z)", f);
+        for (auto const& s : f) {
+            str_momentum_function_uy += s;
+        }
+        f.clear();
+        pp.getarr("momentum_function_uz(x,y,z)", f);
+        for (auto const& s : f) {
+            str_momentum_function_uz += s;
+        }
+        // Construct InjectorMomentum with InjectorMomentumParser.
+        inj_mom.reset(new InjectorMomentum((InjectorMomentumParser*)nullptr,
+                                           makeParser(str_momentum_function_ux),
+                                           makeParser(str_momentum_function_uy),
+                                           makeParser(str_momentum_function_uz)));
     } else {
         StringParseAbortMessage("Momentum distribution type", mom_dist_s);
     }
 }
 
-void PlasmaInjector::getPositionUnitBox(vec3& r, int i_part, int ref_fac) {
-    return part_pos->getPositionUnitBox(r, i_part, ref_fac);
+XDim3 PlasmaInjector::getMomentum (Real x, Real y, Real z) const noexcept
+{
+    return inj_mom->getMomentum(x, y, z); // gamma*beta
+}
+
+bool PlasmaInjector::insideBounds (Real x, Real y, Real z) const noexcept
+{
+    return (x < xmax and x >= xmin and
+            y < ymax and y >= ymin and
+            z < zmax and z >= zmin);
 }
 
-void PlasmaInjector::getMomentum(vec3& u, Real x, Real y, Real z) {
-    mom_dist->getMomentum(u, x, y, z);
-    u[0] *= PhysConst::c;
-    u[1] *= PhysConst::c;
-    u[2] *= PhysConst::c;
+InjectorPosition*
+PlasmaInjector::getInjectorPosition ()
+{
+    return inj_pos.get();
 }
 
-bool PlasmaInjector::insideBounds(Real x, Real y, Real z) {
-  if (x >= xmax || x < xmin ||
-      y >= ymax || y < ymin ||
-      z >= zmax || z < zmin ) return false;
-  return true;
+InjectorDensity*
+PlasmaInjector::getInjectorDensity ()
+{
+    return inj_rho.get();
 }
 
-Real PlasmaInjector::getDensity(Real x, Real y, Real z) {
-    return rho_prof->getDensity(x, y, z);
+InjectorMomentum*
+PlasmaInjector::getInjectorMomentum ()
+{
+    return inj_mom.get();
 }
+
diff --git a/Source/Initialization/PlasmaProfiles.cpp b/Source/Initialization/PlasmaProfiles.cpp
deleted file mode 100644
index d9d207f7e..000000000
--- a/Source/Initialization/PlasmaProfiles.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-#include <PlasmaInjector.H>
-#include <cmath>
-#include <iostream>
-#include <WarpXConst.H>
-
-using namespace amrex;
-
-Real PredefinedDensityProfile::getDensity(Real x, Real y, Real z) const {
-    Real n;
-    if ( which_profile == predefined_profile_flag::parabolic_channel ) {
-        n = ParabolicChannel(x,y,z);
-    }
-    return n;
-}
-
-///
-/// plateau between linear upramp and downramp, and parab transverse profile
-///
-Real PredefinedDensityProfile::ParabolicChannel(Real x, Real y, Real z) const {
-    //  params = [z_start   ramp_up   plateau   ramp_down   rc       n0]
-    Real z_start   = params[0];
-    Real ramp_up   = params[1];
-    Real plateau   = params[2];
-    Real ramp_down = params[3];
-    Real rc        = params[4];
-    Real n0        = params[5];
-    Real n;
-    Real kp = PhysConst::q_e/PhysConst::c*sqrt( n0/(PhysConst::m_e*PhysConst::ep0) );
-
-    if        ((z-z_start)>=0               and (z-z_start)<ramp_up                  ) {
-        n = (z-z_start)/ramp_up;
-    } else if ((z-z_start)>=ramp_up         and (z-z_start)<ramp_up+plateau          ) {
-        n = 1;
-    } else if ((z-z_start)>=ramp_up+plateau and (z-z_start)<ramp_up+plateau+ramp_down) {
-        n = 1-((z-z_start)-ramp_up-plateau)/ramp_down;
-    } else {
-        n = 0;
-    }
-    n *= n0*(1+4*(x*x+y*y)/(kp*kp*std::pow(rc,4)));
-    return n;
-}
diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp
index 2442e0205..590c11b84 100644
--- a/Source/Initialization/WarpXInitData.cpp
+++ b/Source/Initialization/WarpXInitData.cpp
@@ -1,6 +1,4 @@
 
-#include <numeric>
-
 #include <AMReX_ParallelDescriptor.H>
 #include <AMReX_ParmParse.H>
 
@@ -88,7 +86,7 @@ WarpX::InitDiagnostics () {
         const Real* current_lo = geom[0].ProbLo();
         const Real* current_hi = geom[0].ProbHi();
         Real dt_boost = dt[0];
-        
+
 	// Find the positions of the lab-frame box that corresponds to the boosted-frame box at t=0
 	Real zmin_lab = current_lo[moving_window_dir]/( (1.+beta_boost)*gamma_boost );
 	Real zmax_lab = current_hi[moving_window_dir]/( (1.+beta_boost)*gamma_boost );
@@ -97,7 +95,7 @@ WarpX::InitDiagnostics () {
 					       zmax_lab,
                                                moving_window_v, dt_snapshots_lab,
                                                num_snapshots_lab, gamma_boost,
-                                               t_new[0], dt_boost, 
+                                               t_new[0], dt_boost,
                                                moving_window_dir, geom[0]));
     }
 }
@@ -118,10 +116,10 @@ WarpX::InitFromScratch ()
 
     InitPML();
 
-#ifdef WARPX_DO_ELECTROSTATIC    
+#ifdef WARPX_DO_ELECTROSTATIC
     if (do_electrostatic) {
         getLevelMasks(masks);
-        
+
         // the plus one is to convert from num_cells to num_nodes
         getLevelMasks(gather_masks, n_buffer + 1);
     }
@@ -133,14 +131,35 @@ WarpX::InitPML ()
 {
     if (do_pml)
     {
+        amrex::IntVect do_pml_Lo_corrected = do_pml_Lo;
+
+#ifdef WARPX_DIM_RZ
+        do_pml_Lo_corrected[0] = 0; // no PML at r=0, in cylindrical geometry
+#endif
         pml[0].reset(new PML(boxArray(0), DistributionMap(0), &Geom(0), nullptr,
-                             pml_ncell, pml_delta, 0, do_dive_cleaning, do_moving_window));
+                             pml_ncell, pml_delta, 0,
+#ifdef WARPX_USE_PSATD
+                             dt[0], nox_fft, noy_fft, noz_fft, do_nodal,
+#endif
+                             do_dive_cleaning, do_moving_window,
+                             do_pml_Lo_corrected, do_pml_Hi));
         for (int lev = 1; lev <= finest_level; ++lev)
         {
+            amrex::IntVect do_pml_Lo_MR = amrex::IntVect::TheUnitVector();
+#ifdef WARPX_DIM_RZ
+            //In cylindrical geometry, if the edge of the patch is at r=0, do not add PML
+            if ((max_level > 0) && (fine_tag_lo[0]==0.)) {
+                do_pml_Lo_MR[0] = 0;
+            }
+#endif
             pml[lev].reset(new PML(boxArray(lev), DistributionMap(lev),
                                    &Geom(lev), &Geom(lev-1),
-                                   pml_ncell, pml_delta, refRatio(lev-1)[0], do_dive_cleaning,
-                                   do_moving_window));
+                                   pml_ncell, pml_delta, refRatio(lev-1)[0],
+#ifdef WARPX_USE_PSATD
+                                   dt[lev], nox_fft, noy_fft, noz_fft, do_nodal,
+#endif
+                                   do_dive_cleaning, do_moving_window,
+                                   do_pml_Lo_MR, amrex::IntVect::TheUnitVector()));
         }
     }
 }
@@ -226,7 +245,7 @@ WarpX::InitOpenbc ()
     Vector<int> alllohi(6*nprocs,100000);
 
     MPI_Allgather(lohi, 6, MPI_INT, alllohi.data(), 6, MPI_INT, ParallelDescriptor::Communicator());
-    
+
     BoxList bl{IndexType::TheNodeType()};
     for (int i = 0; i < nprocs; ++i)
     {
@@ -252,7 +271,7 @@ WarpX::InitOpenbc ()
     rho_openbc.copy(*rho, 0, 0, 1, rho->nGrow(), 0, gm.periodicity(), FabArrayBase::ADD);
 
     const Real* dx = gm.CellSize();
-    
+
     warpx_openbc_potential(rho_openbc[myproc].dataPtr(), phi_openbc[myproc].dataPtr(), dx);
 
     BoxArray nba = boxArray(lev);
@@ -322,7 +341,7 @@ WarpX::InitLevelData (int lev, Real time)
 void
 WarpX::InitLevelDataFFT (int lev, Real time)
 {
- 
+
     Efield_fp_fft[lev][0]->setVal(0.0);
     Efield_fp_fft[lev][1]->setVal(0.0);
     Efield_fp_fft[lev][2]->setVal(0.0);
diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp
index 3d3447a3c..786ebc622 100644
--- a/Source/Laser/LaserParticleContainer.cpp
+++ b/Source/Laser/LaserParticleContainer.cpp
@@ -453,7 +453,12 @@ LaserParticleContainer::Evolve (int lev,
             pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
             BL_PROFILE_VAR_STOP(blp_copy);
 
-            if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev);
+            if (rho) {
+                DepositCharge(pti, wp, rho, 0, 0, np_current, thread_num, lev, lev);
+                if (crho) {
+                    DepositCharge(pti, wp, crho, 0, np_current, np-np_current, thread_num, lev, lev-1);
+                }
+            }
 
             //
             // Particle Push
@@ -504,15 +509,15 @@ LaserParticleContainer::Evolve (int lev,
             // Current Deposition
             //
             // Deposit inside domains
-            DepositCurrentFortran(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz,
-                                  0, np_current, thread_num,
-                                  lev, lev, dt);
+            DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz,
+                           0, np_current, thread_num,
+                           lev, lev, dt);
             bool has_buffer = cjx;
             if (has_buffer){
                 // Deposit in buffers
-                DepositCurrentFortran(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz,
-                                      np_current, np-np_current, thread_num,
-                                      lev, lev-1, dt);
+                DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz,
+                               np_current, np-np_current, thread_num,
+                               lev, lev-1, dt);
             }
 
             //
@@ -522,7 +527,12 @@ LaserParticleContainer::Evolve (int lev,
             pti.SetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
             BL_PROFILE_VAR_STOP(blp_copy);
 
-            if (rho) DepositCharge(pti, wp, rho, crho, 1, np_current, np, thread_num, lev);
+            if (rho) {
+                DepositCharge(pti, wp, rho, 1, 0, np_current, thread_num, lev, lev);
+                if (crho) {
+                    DepositCharge(pti, wp, crho, 1, np_current, np-np_current, thread_num, lev, lev-1);
+                }
+            }
 
             if (cost) {
                 const Box& tbx = pti.tilebox();
diff --git a/Source/Make.WarpX b/Source/Make.WarpX
index 3060ae8f0..e3a33a00f 100644
--- a/Source/Make.WarpX
+++ b/Source/Make.WarpX
@@ -97,16 +97,24 @@ ifeq ($(USE_OPENBC_POISSON),TRUE)
 endif
 
 ifeq ($(USE_OPENPMD), TRUE)
-   OPENPMD_LIB_PATH ?= NOT_SET
-   ifneq ($(OPENPMD_LIB_PATH),NOT_SET)
-     LIBRARY_LOCATIONS += $(OPENPMD_LIB_PATH)
+   # try pkg-config query
+   ifeq (0, $(shell pkg-config "openPMD >= 0.9.0"; echo $$?))
+       CXXFLAGS += $(shell pkg-config --cflags openPMD)
+       LDFLAGS += $(shell pkg-config --libs openPMD)
+       LDFLAGS += -Xlinker -rpath -Xlinker $(shell pkg-config --variable=libdir openPMD)
+   # fallback to manual settings
+   else
+       OPENPMD_LIB_PATH ?= NOT_SET
+       ifneq ($(OPENPMD_LIB_PATH),NOT_SET)
+         LIBRARY_LOCATIONS += $(OPENPMD_LIB_PATH)
+       endif
+       OPENPMD_INCLUDE_PATH ?= NOT_SET
+       ifneq ($(OPENPMD_INCLUDE_PATH),NOT_SET)
+         INCLUDE_LOCATIONS += $(OPENPMD_INCLUDE_PATH)
+       endif
+       libraries += -lopenPMD
    endif
-   OPENPMD_INCLUDE_PATH ?= NOT_SET
-   ifneq ($(OPENPMD_INCLUDE_PATH),NOT_SET)
-     INCLUDE_LOCATIONS += $(OPENPMD_INCLUDE_PATH)
-   endif
-   DEFINES += -DWARPX_USE_OPENPMD -DopenPMD_HAVE_MPI=1
-   LIBRARIES += -lopenPMD -lhdf5
+   DEFINES += -DWARPX_USE_OPENPMD
 endif
    
 
@@ -115,7 +123,7 @@ ifeq ($(USE_PSATD),TRUE)
   DEFINES += -DWARPX_USE_PSATD
   ifeq ($(USE_CUDA),FALSE) # Running on CPU
      # Use FFTW
-     LIBRARIES += -lfftw3_mpi -lfftw3 -lfftw3_threads
+     libraries += -lfftw3_mpi -lfftw3 -lfftw3_threads
      FFTW_HOME ?= NOT_SET
      ifneq ($(FFTW_HOME),NOT_SET)
        VPATH_LOCATIONS += $(FFTW_HOME)/include
@@ -127,13 +135,12 @@ ifeq ($(USE_PSATD),TRUE)
      DEFINES += -DFFTW # PICSAR uses it
   else
     # Use cuFFT
-    LIBRARIES += -lcufft
+    libraries += -lcufft
   endif
 endif
 
 ifeq ($(USE_RZ),TRUE)
   USERSuffix := $(USERSuffix).RZ
-  DEFINES += -DWARPX_RZ
 endif
 
 ifeq ($(DO_ELECTROSTATIC),TRUE)
@@ -151,7 +158,7 @@ ifeq ($(USE_HDF5),TRUE)
         LIBRARY_LOCATIONS += $(HDF5_HOME)/lib
     endif
     DEFINES += -DWARPX_USE_HDF5
-    LIBRARIES += -lhdf5 -lz
+    libraries += -lhdf5 -lz
 endif     
 
 # job_info support
diff --git a/Source/Parser/GpuParser.H b/Source/Parser/GpuParser.H
new file mode 100644
index 000000000..1533ee6b9
--- /dev/null
+++ b/Source/Parser/GpuParser.H
@@ -0,0 +1,72 @@
+#ifndef WARPX_GPU_PARSER_H_
+#define WARPX_GPU_PARSER_H_
+
+#include <WarpXParser.H>
+#include <AMReX_Gpu.H>
+
+// When compiled for CPU, wrap WarpXParser and enable threading.
+// When compiled for GPU, store one copy of the parser in 
+// CUDA managed memory for __device__ code, and one copy of the parser
+// in CUDA managed memory for __host__ code. This way, the parser can be
+// efficiently called from both host and device.
+class GpuParser
+{
+public:
+    GpuParser (WarpXParser const& wp);
+    void clear ();
+
+    AMREX_GPU_HOST_DEVICE
+    double
+    operator() (double x, double y, double z) const noexcept
+    {
+#ifdef AMREX_USE_GPU
+
+#ifdef AMREX_DEVICE_COMPILE
+// WarpX compiled for GPU, function compiled for __device__
+        // the 3D position of each particle is stored in shared memory.
+        amrex::Gpu::SharedMemory<double> gsm;
+        double* p = gsm.dataPtr();
+        int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y);
+        p[tid*3] = x;
+        p[tid*3+1] = y;
+        p[tid*3+2] = z;
+        return wp_ast_eval(m_gpu_parser.ast);
+#else
+// WarpX compiled for GPU, function compiled for __host__
+        m_var.x = x;
+        m_var.y = y;
+        m_var.z = z;
+        return wp_ast_eval(m_cpu_parser.ast);
+#endif
+
+#else
+// WarpX compiled for CPU
+#ifdef _OPENMP
+        int tid = omp_get_thread_num();
+#else
+        int tid = 0;
+#endif
+        m_var[tid].x = x;
+        m_var[tid].y = y;
+        m_var[tid].z = z;
+        return wp_ast_eval(m_parser[tid]->ast);
+#endif
+    }
+
+private:
+
+#ifdef AMREX_USE_GPU
+    // Copy of the parser running on __device__
+    struct wp_parser m_gpu_parser;
+    // Copy of the parser running on __host__
+    struct wp_parser m_cpu_parser;
+    mutable amrex::XDim3 m_var;
+#else
+    // Only one parser
+    struct wp_parser** m_parser;
+    mutable amrex::XDim3* m_var;
+    int nthreads;
+#endif
+};
+
+#endif
diff --git a/Source/Parser/GpuParser.cpp b/Source/Parser/GpuParser.cpp
new file mode 100644
index 000000000..db1c2287d
--- /dev/null
+++ b/Source/Parser/GpuParser.cpp
@@ -0,0 +1,73 @@
+#include <GpuParser.H>
+
+GpuParser::GpuParser (WarpXParser const& wp)
+{
+#ifdef AMREX_USE_GPU
+
+    struct wp_parser* a_wp = wp.m_parser;
+    // Initialize GPU parser: allocate memory in CUDA managed memory,
+    // copy all data needed on GPU to m_gpu_parser
+    m_gpu_parser.sz_mempool = wp_ast_size(a_wp->ast);
+    m_gpu_parser.p_root = (struct wp_node*)
+        amrex::The_Managed_Arena()->alloc(m_gpu_parser.sz_mempool);
+    m_gpu_parser.p_free = m_gpu_parser.p_root;
+    // 0: don't free the source
+    m_gpu_parser.ast = wp_parser_ast_dup(&m_gpu_parser, a_wp->ast, 0);
+    wp_parser_regvar_gpu(&m_gpu_parser, "x", 0);
+    wp_parser_regvar_gpu(&m_gpu_parser, "y", 1);
+    wp_parser_regvar_gpu(&m_gpu_parser, "z", 2);
+
+    // Initialize CPU parser: allocate memory in CUDA managed memory,
+    // copy all data needed on CPU to m_cpu_parser
+    m_cpu_parser.sz_mempool = wp_ast_size(a_wp->ast);
+    m_cpu_parser.p_root = (struct wp_node*)
+        amrex::The_Managed_Arena()->alloc(m_cpu_parser.sz_mempool);
+    m_cpu_parser.p_free = m_cpu_parser.p_root;
+    // 0: don't free the source
+    m_cpu_parser.ast = wp_parser_ast_dup(&m_cpu_parser, a_wp->ast, 0);
+    wp_parser_regvar(&m_cpu_parser, "x", &(m_var.x));
+    wp_parser_regvar(&m_cpu_parser, "y", &(m_var.y));
+    wp_parser_regvar(&m_cpu_parser, "z", &(m_var.z));
+    
+#else // not defined AMREX_USE_GPU
+
+#ifdef _OPENMP
+    nthreads = omp_get_max_threads();
+#else // _OPENMP
+    nthreads = 1;
+#endif // _OPENMP
+
+    m_parser = ::new struct wp_parser*[nthreads];
+    m_var = ::new amrex::XDim3[nthreads];
+
+    for (int tid = 0; tid < nthreads; ++tid)
+    {
+#ifdef _OPENMP
+        m_parser[tid] = wp_parser_dup(wp.m_parser[tid]);
+#else // _OPENMP
+        m_parser[tid] = wp_parser_dup(wp.m_parser);
+#endif // _OPENMP
+        wp_parser_regvar(m_parser[tid], "x", &(m_var[tid].x));
+        wp_parser_regvar(m_parser[tid], "y", &(m_var[tid].y));
+        wp_parser_regvar(m_parser[tid], "z", &(m_var[tid].z));
+    }
+
+#endif // AMREX_USE_GPU
+}
+
+void
+GpuParser::clear ()
+{
+#ifdef AMREX_USE_GPU
+    amrex::The_Managed_Arena()->free(m_gpu_parser.ast);
+    amrex::The_Managed_Arena()->free(m_cpu_parser.ast);
+#else
+    for (int tid = 0; tid < nthreads; ++tid)
+    {
+        wp_parser_delete(m_parser[tid]);
+    }
+    ::delete[] m_parser;
+    ::delete[] m_var;
+#endif
+}
+
diff --git a/Source/Parser/Make.package b/Source/Parser/Make.package
index 26ef4fb43..5ce02cbda 100644
--- a/Source/Parser/Make.package
+++ b/Source/Parser/Make.package
@@ -3,6 +3,8 @@ cEXE_sources += wp_parser_y.c wp_parser.tab.c wp_parser.lex.c wp_parser_c.c
 cEXE_headers += wp_parser_y.h wp_parser.tab.h wp_parser.lex.h wp_parser_c.h
 CEXE_sources += WarpXParser.cpp
 CEXE_headers += WarpXParser.H
+CEXE_headers += GpuParser.H
+CEXE_sources += GpuParser.cpp
 
 INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Parser
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Parser
diff --git a/Source/Parser/WarpXParser.H b/Source/Parser/WarpXParser.H
index 046491e29..ffa61e457 100644
--- a/Source/Parser/WarpXParser.H
+++ b/Source/Parser/WarpXParser.H
@@ -13,6 +13,8 @@
 #include <omp.h>
 #endif
 
+class GpuParser;
+
 class WarpXParser
 {
 public:
@@ -46,6 +48,8 @@ public:
 
     std::set<std::string> symbols () const;
 
+    friend class GpuParser;
+
 private:
     void clear ();
 
diff --git a/Source/Parser/wp_parser_c.h b/Source/Parser/wp_parser_c.h
index d810bd685..3aafdec65 100644
--- a/Source/Parser/wp_parser_c.h
+++ b/Source/Parser/wp_parser_c.h
@@ -2,6 +2,8 @@
 #define WP_PARSER_C_H_
 
 #include "wp_parser_y.h"
+#include <AMReX_GpuQualifiers.H>
+#include <AMReX_Extension.H>
 
 #ifdef __cplusplus
 extern "C" {
@@ -18,71 +20,167 @@ extern "C" {
 #include <set>
 #include <string>
 
-inline
-double
+AMREX_GPU_HOST_DEVICE
+inline double
 wp_ast_eval (struct wp_node* node)
 {
     double result;
 
+#ifdef AMREX_DEVICE_COMPILE
+    extern __shared__ double extern_xyz[];
+    int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y);
+    double* x = extern_xyz + tid*3;
+#endif
+
     switch (node->type)
     {
     case WP_NUMBER:
+    {
         result = ((struct wp_number*)node)->value;
         break;
+    }
     case WP_SYMBOL:
-        result = *(((struct wp_symbol*)node)->pointer);
+    {
+#ifdef AMREX_DEVICE_COMPILE
+        int i =((struct wp_symbol*)node)->ip.i;
+        result = x[i];
+#else
+        result = *(((struct wp_symbol*)node)->ip.p);
+#endif
         break;
+    }
     case WP_ADD:
+    {
         result = wp_ast_eval(node->l) + wp_ast_eval(node->r);
         break;
+    }
     case WP_SUB:
+    {
         result = wp_ast_eval(node->l) - wp_ast_eval(node->r);
         break;
+    }
     case WP_MUL:
+    {
         result = wp_ast_eval(node->l) * wp_ast_eval(node->r);
         break;
+    }
     case WP_DIV:
+    {
         result = wp_ast_eval(node->l) / wp_ast_eval(node->r);
         break;
+    }
     case WP_NEG:
+    {
         result = -wp_ast_eval(node->l);
         break;
+    }
     case WP_F1:
+    {
         result = wp_call_f1(((struct wp_f1*)node)->ftype,
                 wp_ast_eval(((struct wp_f1*)node)->l));
         break;
+    }
     case WP_F2:
+    {
         result = wp_call_f2(((struct wp_f2*)node)->ftype,
                 wp_ast_eval(((struct wp_f2*)node)->l),
                 wp_ast_eval(((struct wp_f2*)node)->r));
         break;
+    }
     case WP_ADD_VP:
-        result = node->lvp.v + *(node->rp);
+    {
+#ifdef AMREX_DEVICE_COMPILE
+        int i = node->rip.i;
+        result = node->lvp.v + x[i];
+#else
+        result = node->lvp.v + *(node->rip.p);
+#endif
         break;
+    }
     case WP_ADD_PP:
-        result = *(node->lvp.p) + *(node->rp);
+    {
+#ifdef AMREX_DEVICE_COMPILE
+        int i = node->lvp.ip.i;
+        int j = node->rip.i;
+        result = x[i] + x[j];
+#else
+        result = *(node->lvp.ip.p) + *(node->rip.p);
+#endif
         break;
+    }
     case WP_SUB_VP:
-        result = node->lvp.v - *(node->rp);
+    {
+#ifdef AMREX_DEVICE_COMPILE
+        int i = node->rip.i;
+        result = node->lvp.v - x[i];
+#else
+        result = node->lvp.v - *(node->rip.p);
+#endif
         break;
+    }
     case WP_SUB_PP:
-        result = *(node->lvp.p) - *(node->rp);
+    {
+#ifdef AMREX_DEVICE_COMPILE
+        int i = node->lvp.ip.i;
+        int j = node->rip.i;
+        result = x[i] - x[j];
+#else
+        result = *(node->lvp.ip.p) - *(node->rip.p);
+#endif
         break;
+    }
     case WP_MUL_VP:
-        result = node->lvp.v * *(node->rp);
+    {
+#ifdef AMREX_DEVICE_COMPILE
+        int i = node->rip.i;
+        result = node->lvp.v * x[i];
+#else
+        result = node->lvp.v * *(node->rip.p);
+#endif
         break;
+    }
     case WP_MUL_PP:
-        result = *(node->lvp.p) * *(node->rp);
+    {
+#ifdef AMREX_DEVICE_COMPILE
+        int i = node->lvp.ip.i;
+        int j = node->rip.i;
+        result = x[i] * x[j];
+#else
+        result = *(node->lvp.ip.p) * *(node->rip.p);
+#endif
         break;
+    }
     case WP_DIV_VP:
-        result = node->lvp.v / *(node->rp);
+    {
+#ifdef AMREX_DEVICE_COMPILE
+        int i = node->rip.i;
+        result = node->lvp.v / x[i];
+#else
+        result = node->lvp.v / *(node->rip.p);
+#endif
         break;
+    }
     case WP_DIV_PP:
-        result = *(node->lvp.p) / *(node->rp);
+    {
+#ifdef AMREX_DEVICE_COMPILE
+        int i = node->lvp.ip.i;
+        int j = node->rip.i;
+        result = x[i] / x[j];
+#else
+        result = *(node->lvp.ip.p) / *(node->rip.p);
+#endif
         break;
+    }
     case WP_NEG_P:
-        result = -*(node->lvp.p);
+    {
+#ifdef AMREX_DEVICE_COMPILE
+        int i = node->rip.i;
+        result = -x[i];
+#else
+        result = -*(node->lvp.ip.p);
+#endif
         break;
+    }
     default:
         yyerror("wp_ast_eval: unknown node type %d\n", node->type);
     }
diff --git a/Source/Parser/wp_parser_y.c b/Source/Parser/wp_parser_y.c
index 46cb199db..259f9368b 100644
--- a/Source/Parser/wp_parser_y.c
+++ b/Source/Parser/wp_parser_y.c
@@ -6,6 +6,8 @@
 #include "wp_parser_y.h"
 #include "wp_parser.tab.h"
 
+#include <AMReX_GpuQualifiers.H>
+
 static struct wp_node* wp_root = NULL;
 
 /* This is called by a bison rule to store the original AST in a
@@ -33,7 +35,7 @@ wp_makesymbol (char* name)
     struct wp_symbol* symbol = (struct wp_symbol*) malloc(sizeof(struct wp_symbol));
     symbol->type = WP_SYMBOL;
     symbol->name = strdup(name);
-    symbol->pointer = NULL;
+    symbol->ip.p = NULL;
     return symbol;
 }
 
@@ -74,13 +76,19 @@ wp_newf2 (enum wp_f2_t ftype, struct wp_node* l, struct wp_node* r)
     return (struct wp_node*) tmp;
 }
 
+AMREX_GPU_HOST_DEVICE
 void
 yyerror (char const *s, ...)
 {
     va_list vl;
     va_start(vl, s);
+#ifdef AMREX_DEVICE_COMPILE
+    printf(s,"\n");
+    assert(0);
+#else
     vfprintf(stderr, s, vl);
     fprintf(stderr, "\n");
+#endif
     va_end(vl);
 }
 
@@ -97,7 +105,7 @@ wp_parser_new (void)
 
     my_parser->ast = wp_parser_ast_dup(my_parser, wp_root,1); /* 1: free the source wp_root */
 
-    if (my_parser->p_root + my_parser->sz_mempool != my_parser->p_free) {
+    if ((char*)my_parser->p_root + my_parser->sz_mempool != (char*)my_parser->p_free) {
         yyerror("wp_parser_new: error in memory size");
         exit(1);
     }
@@ -145,6 +153,7 @@ wp_parser_dup (struct wp_parser* source)
     return dest;
 }
 
+AMREX_GPU_HOST_DEVICE
 double
 wp_call_f1 (enum wp_f1_t type, double a)
 {
@@ -175,6 +184,7 @@ wp_call_f1 (enum wp_f1_t type, double a)
     }
 }
 
+AMREX_GPU_HOST_DEVICE
 double
 wp_call_f2 (enum wp_f2_t type, double a, double b)
 {
@@ -346,23 +356,23 @@ wp_parser_ast_dup (struct wp_parser* my_parser, struct wp_node* node, int move)
 
 #define WP_MOVEUP_R(node, v) \
     struct wp_node* n = node->r->r; \
-    double* p = node->r->rp; \
+    double* p = node->r->rip.p; \
     node->r = n; \
     node->lvp.v = v; \
-    node->rp = p;
+    node->rip.p = p;
 #define WP_MOVEUP_L(node, v) \
     struct wp_node* n = node->l->r; \
-    double* p = node->l->rp; \
+    double* p = node->l->rip.p; \
     node->r = n; \
     node->lvp.v = v; \
-    node->rp = p;
+    node->rip.p = p;
 #define WP_EVAL_R(node) node->r->lvp.v
 #define WP_EVAL_L(node) node->l->lvp.v
 
 #define WP_NEG_MOVEUP(node) \
     node->r = node->l->r; \
     node->lvp.v = -node->l->lvp.v; \
-    node->rp = node->l->rp;
+    node->rip.p = node->l->rip.p;
 
 void
 wp_ast_optimize (struct wp_node* node)
@@ -391,22 +401,22 @@ wp_ast_optimize (struct wp_node* node)
                  node->r->type == WP_SYMBOL)
         {
             node->lvp.v = ((struct wp_number*)(node->l))->value;
-            node->rp = ((struct wp_symbol*)(node->r))->pointer;
+            node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
             node->type = WP_ADD_VP;
         }
         else if (node->l->type == WP_SYMBOL &&
                  node->r->type == WP_NUMBER)
         {
             node->lvp.v = ((struct wp_number*)(node->r))->value;
-            node->rp = ((struct wp_symbol*)(node->l))->pointer;
+            node->rip.p = ((struct wp_symbol*)(node->l))->ip.p;
             node->r = node->l;
             node->type = WP_ADD_VP;
         }
         else if (node->l->type == WP_SYMBOL &&
                  node->r->type == WP_SYMBOL)
         {
-            node->lvp.p = ((struct wp_symbol*)(node->l))->pointer;
-            node->rp = ((struct wp_symbol*)(node->r))->pointer;
+            node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p;
+            node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
             node->type = WP_ADD_PP;
         }
         else if (node->l->type == WP_NUMBER &&
@@ -454,22 +464,22 @@ wp_ast_optimize (struct wp_node* node)
                  node->r->type == WP_SYMBOL)
         {
             node->lvp.v = ((struct wp_number*)(node->l))->value;
-            node->rp = ((struct wp_symbol*)(node->r))->pointer;
+            node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
             node->type = WP_SUB_VP;
         }
         else if (node->l->type == WP_SYMBOL &&
                  node->r->type == WP_NUMBER)
         {
             node->lvp.v = -((struct wp_number*)(node->r))->value;
-            node->rp = ((struct wp_symbol*)(node->l))->pointer;
+            node->rip.p = ((struct wp_symbol*)(node->l))->ip.p;
             node->r = node->l;
             node->type = WP_ADD_VP;
         }
         else if (node->l->type == WP_SYMBOL &&
                  node->r->type == WP_SYMBOL)
         {
-            node->lvp.p = ((struct wp_symbol*)(node->l))->pointer;
-            node->rp = ((struct wp_symbol*)(node->r))->pointer;
+            node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p;
+            node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
             node->type = WP_SUB_PP;
         }
         else if (node->l->type == WP_NUMBER &&
@@ -517,22 +527,22 @@ wp_ast_optimize (struct wp_node* node)
                  node->r->type == WP_SYMBOL)
         {
             node->lvp.v = ((struct wp_number*)(node->l))->value;
-            node->rp = ((struct wp_symbol*)(node->r))->pointer;
+            node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
             node->type = WP_MUL_VP;
         }
         else if (node->l->type == WP_SYMBOL &&
                  node->r->type == WP_NUMBER)
         {
             node->lvp.v = ((struct wp_number*)(node->r))->value;
-            node->rp = ((struct wp_symbol*)(node->l))->pointer;
+            node->rip.p = ((struct wp_symbol*)(node->l))->ip.p;
             node->r = node->l;
             node->type = WP_MUL_VP;
         }
         else if (node->l->type == WP_SYMBOL &&
                  node->r->type == WP_SYMBOL)
         {
-            node->lvp.p = ((struct wp_symbol*)(node->l))->pointer;
-            node->rp = ((struct wp_symbol*)(node->r))->pointer;
+            node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p;
+            node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
             node->type = WP_MUL_PP;
         }
         else if (node->l->type == WP_NUMBER &&
@@ -580,22 +590,22 @@ wp_ast_optimize (struct wp_node* node)
                  node->r->type == WP_SYMBOL)
         {
             node->lvp.v = ((struct wp_number*)(node->l))->value;
-            node->rp = ((struct wp_symbol*)(node->r))->pointer;
+            node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
             node->type = WP_DIV_VP;
         }
         else if (node->l->type == WP_SYMBOL &&
                  node->r->type == WP_NUMBER)
         {
             node->lvp.v = 1./((struct wp_number*)(node->r))->value;
-            node->rp = ((struct wp_symbol*)(node->l))->pointer;
+            node->rip.p = ((struct wp_symbol*)(node->l))->ip.p;
             node->r = node->l;
             node->type = WP_MUL_VP;
         }
         else if (node->l->type == WP_SYMBOL &&
                  node->r->type == WP_SYMBOL)
         {
-            node->lvp.p = ((struct wp_symbol*)(node->l))->pointer;
-            node->rp = ((struct wp_symbol*)(node->r))->pointer;
+            node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p;
+            node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
             node->type = WP_DIV_PP;
         }
         else if (node->l->type == WP_NUMBER &&
@@ -637,7 +647,7 @@ wp_ast_optimize (struct wp_node* node)
         }
         else if (node->l->type == WP_SYMBOL)
         {
-            node->lvp.p = ((struct wp_symbol*)(node->l))->pointer;
+            node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p;
             node->type = WP_NEG_P;
         }
         else if (node->l->type == WP_ADD_VP)
@@ -936,7 +946,7 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p)
         break;
     case WP_SYMBOL:
         if (strcmp(name, ((struct wp_symbol*)node)->name) == 0) {
-            ((struct wp_symbol*)node)->pointer = p;
+            ((struct wp_symbol*)node)->ip.p = p;
         }
         break;
     case WP_ADD:
@@ -961,11 +971,11 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p)
     case WP_MUL_VP:
     case WP_DIV_VP:
         wp_ast_regvar(node->r, name, p);
-        node->rp = ((struct wp_symbol*)(node->r))->pointer;
+        node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
         break;
     case WP_NEG_P:
         wp_ast_regvar(node->l, name, p);
-        node->lvp.p = ((struct wp_symbol*)(node->l))->pointer;
+        node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p;
         break;
     case WP_ADD_PP:
     case WP_SUB_PP:
@@ -973,8 +983,8 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p)
     case WP_DIV_PP:
         wp_ast_regvar(node->l, name, p);
         wp_ast_regvar(node->r, name, p);
-        node->lvp.p = ((struct wp_symbol*)(node->l))->pointer;
-        node->rp = ((struct wp_symbol*)(node->r))->pointer;
+        node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p;
+        node->rip.p = ((struct wp_symbol*)(node->r))->ip.p;
         break;
     default:
         yyerror("wp_ast_regvar: unknown node type %d\n", node->type);
@@ -982,6 +992,61 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p)
     }
 }
 
+void
+wp_ast_regvar_gpu (struct wp_node* node, char const* name, int i)
+{
+    switch (node->type)
+    {
+    case WP_NUMBER:
+        break;
+    case WP_SYMBOL:
+        if (strcmp(name, ((struct wp_symbol*)node)->name) == 0) {
+            ((struct wp_symbol*)node)->ip.i = i;
+        }
+        break;
+    case WP_ADD:
+    case WP_SUB:
+    case WP_MUL:
+    case WP_DIV:
+        wp_ast_regvar_gpu(node->l, name, i);
+        wp_ast_regvar_gpu(node->r, name, i);
+        break;
+    case WP_NEG:
+        wp_ast_regvar_gpu(node->l, name, i);
+        break;
+    case WP_F1:
+        wp_ast_regvar_gpu(node->l, name, i);
+        break;
+    case WP_F2:
+        wp_ast_regvar_gpu(node->l, name, i);
+        wp_ast_regvar_gpu(node->r, name, i);
+        break;
+    case WP_ADD_VP:
+    case WP_SUB_VP:
+    case WP_MUL_VP:
+    case WP_DIV_VP:
+        wp_ast_regvar_gpu(node->r, name, i);
+        node->rip.i = ((struct wp_symbol*)(node->r))->ip.i;
+        break;
+    case WP_NEG_P:
+        wp_ast_regvar_gpu(node->l, name, i);
+        node->lvp.ip.i = ((struct wp_symbol*)(node->l))->ip.i;
+        break;
+    case WP_ADD_PP:
+    case WP_SUB_PP:
+    case WP_MUL_PP:
+    case WP_DIV_PP:
+        wp_ast_regvar_gpu(node->l, name, i);
+        wp_ast_regvar_gpu(node->r, name, i);
+        node->lvp.ip.i = ((struct wp_symbol*)(node->l))->ip.i;
+        node->rip.i = ((struct wp_symbol*)(node->r))->ip.i;
+        break;
+    default:
+        yyerror("wp_ast_regvar_gpu: unknown node type %d\n", node->type);
+        exit(1);
+    }
+}
+
 void wp_ast_setconst (struct wp_node* node, char const* name, double c)
 {
     switch (node->type)
@@ -1040,6 +1105,12 @@ wp_parser_regvar (struct wp_parser* parser, char const* name, double* p)
 }
 
 void
+wp_parser_regvar_gpu (struct wp_parser* parser, char const* name, int i)
+{
+    wp_ast_regvar_gpu(parser->ast, name, i);
+}
+
+void
 wp_parser_setconst (struct wp_parser* parser, char const* name, double c)
 {
     wp_ast_setconst(parser->ast, name, c);
diff --git a/Source/Parser/wp_parser_y.h b/Source/Parser/wp_parser_y.h
index 4a3aeda40..8c9f8e4e4 100644
--- a/Source/Parser/wp_parser_y.h
+++ b/Source/Parser/wp_parser_y.h
@@ -1,6 +1,8 @@
 #ifndef WP_PARSER_Y_H_
 #define WP_PARSER_Y_H_
 
+#include <AMReX_GpuQualifiers.H>
+
 #ifdef __cplusplus
 #include <cstdlib>
 extern "C" {
@@ -73,17 +75,22 @@ enum wp_node_t {
  * wp_node_t type can be safely checked to determine their real type.
  */
 
-union wp_vp {
-    double  v;
+union wp_ip {
+    int i;
     double* p;
 };
 
+union wp_vp {
+    double v;
+    union wp_ip ip;
+};
+
 struct wp_node {
     enum wp_node_t type;
     struct wp_node* l;
     struct wp_node* r;
     union wp_vp lvp;  // After optimization, this may store left value/pointer.
-    double* rp;       //                     this may store right      pointer.
+    union wp_ip rip;  //                     this may store right      pointer.
 };
 
 struct wp_number {
@@ -94,7 +101,7 @@ struct wp_number {
 struct wp_symbol {
     enum wp_node_t type;
     char* name;
-    double* pointer;
+    union wp_ip ip;
 };
 
 struct wp_f1 {  /* Builtin functions with one argument */
@@ -124,6 +131,7 @@ struct wp_node* wp_newf1 (enum wp_f1_t ftype, struct wp_node* l);
 struct wp_node* wp_newf2 (enum wp_f2_t ftype, struct wp_node* l,
                           struct wp_node* r);
 
+AMREX_GPU_HOST_DEVICE
 void yyerror (char const *s, ...);
 
 /*******************************************************************/
@@ -146,6 +154,7 @@ struct wp_parser* wp_parser_dup (struct wp_parser* source);
 struct wp_node* wp_parser_ast_dup (struct wp_parser* parser, struct wp_node* src, int move);
 
 void wp_parser_regvar (struct wp_parser* parser, char const* name, double* p);
+void wp_parser_regvar_gpu (struct wp_parser* parser, char const* name, int i);
 void wp_parser_setconst (struct wp_parser* parser, char const* name, double c);
 
 /* We need to walk the tree in these functions */
@@ -153,10 +162,11 @@ void wp_ast_optimize (struct wp_node* node);
 size_t wp_ast_size (struct wp_node* node);
 void wp_ast_print (struct wp_node* node);
 void wp_ast_regvar (struct wp_node* node, char const* name, double* p);
+void wp_ast_regvar_gpu (struct wp_node* node, char const* name, int i);
 void wp_ast_setconst (struct wp_node* node, char const* name, double c);
 
-double wp_call_f1 (enum wp_f1_t type, double a);
-double wp_call_f2 (enum wp_f2_t type, double a, double b);
+AMREX_GPU_HOST_DEVICE double wp_call_f1 (enum wp_f1_t type, double a);
+AMREX_GPU_HOST_DEVICE double wp_call_f2 (enum wp_f2_t type, double a, double b);
 
 #ifdef __cplusplus
 }
diff --git a/Source/Particles/Deposition/ChargeDeposition.H b/Source/Particles/Deposition/ChargeDeposition.H
new file mode 100755
index 000000000..a6573b7ab
--- /dev/null
+++ b/Source/Particles/Deposition/ChargeDeposition.H
@@ -0,0 +1,97 @@
+#ifndef CHARGEDEPOSITION_H_
+#define CHARGEDEPOSITION_H_
+
+#include "ShapeFactors.H"
+
+/* \brief Charge Deposition for thread thread_num
+ * /param xp, yp, zp   : Pointer to arrays of particle positions.
+ * \param wp           : Pointer to array of particle weights.
+ * \param rho_arr      : Array4 of charge density, either full array or tile.
+ * \param np_to_depose : Number of particles for which current is deposited.
+ * \param dx           : 3D cell size
+ * \param xyzmin       : Physical lower bounds of domain.
+ * \param lo           : Index lower bounds of domain.
+ * /param q            : species charge.
+ */
+template <int depos_order>
+void doChargeDepositionShapeN(const amrex::Real * const xp, 
+                              const amrex::Real * const yp, 
+                              const amrex::Real * const zp,
+                              const amrex::Real * const wp,
+                              const amrex::Array4<amrex::Real>& rho_arr,
+                              const long np_to_depose,
+                              const std::array<amrex::Real,3>& dx,
+                              const std::array<amrex::Real, 3> xyzmin,
+                              const amrex::Dim3 lo,
+                              const amrex::Real q)
+{
+    const amrex::Real dxi = 1.0/dx[0];
+    const amrex::Real dzi = 1.0/dx[2];
+#if (AMREX_SPACEDIM == 2)
+    const amrex::Real invvol = dxi*dzi;
+#elif (defined WARPX_DIM_3D)
+    const amrex::Real dyi = 1.0/dx[1];
+    const amrex::Real invvol = dxi*dyi*dzi;
+#endif
+
+    const amrex::Real xmin = xyzmin[0];
+    const amrex::Real ymin = xyzmin[1];
+    const amrex::Real zmin = xyzmin[2];
+
+    // Loop over particles and deposit into rho_arr
+    amrex::ParallelFor(
+        np_to_depose,
+        [=] AMREX_GPU_DEVICE (long ip) {
+            // --- Get particle quantities
+            const amrex::Real wq = q*wp[ip]*invvol;
+
+            // --- Compute shape factors
+            // x direction
+            // Get particle position in grid coordinates
+#if (defined WARPX_DIM_RZ)
+            const amrex::Real r = std::sqrt(xp[ip]*xp[ip] + yp[ip]*yp[ip]);
+            const amrex::Real x = (r - xmin)*dxi;
+#else
+            const amrex::Real x = (xp[ip] - xmin)*dxi;
+#endif
+            // Compute shape factors for node-centered quantities
+            amrex::Real AMREX_RESTRICT sx[depos_order + 1];
+            // i: leftmost grid point (node-centered) that the particle touches
+            const int i = compute_shape_factor<depos_order>(sx,  x);
+                     
+#if (defined WARPX_DIM_3D)
+            // y direction
+            const amrex::Real y = (yp[ip] - ymin)*dyi;
+            amrex::Real AMREX_RESTRICT sy[depos_order + 1];
+            const int j = compute_shape_factor<depos_order>(sy,  y);
+#endif
+            // z direction
+            const amrex::Real z = (zp[ip] - zmin)*dzi;
+            amrex::Real AMREX_RESTRICT sz[depos_order + 1];
+            const int k = compute_shape_factor<depos_order>(sz,  z);
+
+            // Deposit charge into rho_arr
+#if (defined WARPX_DIM_2D) || (defined WARPX_DIM_RZ)
+            for (int iz=0; iz<=depos_order; iz++){
+                for (int ix=0; ix<=depos_order; ix++){
+                    amrex::Gpu::Atomic::Add(
+                        &rho_arr(lo.x+i+ix, lo.y+k+iz, 0), 
+                        sx[ix]*sz[iz]*wq);
+                }
+            }
+#elif (defined WARPX_DIM_3D)
+            for (int iz=0; iz<=depos_order; iz++){
+                for (int iy=0; iy<=depos_order; iy++){
+                    for (int ix=0; ix<=depos_order; ix++){
+                        amrex::Gpu::Atomic::Add(
+                            &rho_arr(lo.x+i+ix, lo.y+j+iy, lo.z+k+iz),
+                            sx[ix]*sy[iy]*sz[iz]*wq);
+                    }
+                }
+            }
+#endif
+        }
+        );
+}
+
+#endif // CHARGEDEPOSITION_H_
diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H
index 97bc53c20..4a392b57e 100644
--- a/Source/Particles/Deposition/CurrentDeposition.H
+++ b/Source/Particles/Deposition/CurrentDeposition.H
@@ -1,52 +1,7 @@
 #ifndef CURRENTDEPOSITION_H_
 #define CURRENTDEPOSITION_H_
 
-using namespace amrex;
-
-// Compute shape factor and return index of leftmost cell where
-// particle writes.
-// Specialized templates are defined below for orders 1, 2 and 3.
-template <int depos_order>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shape_factor(Real* const sx, Real xint);
-
-// Compute shape factor for order 1.
-template <>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shape_factor <1> (Real* const sx, Real xmid){
-    int j = (int) xmid;
-    Real xint = xmid-j;
-    sx[0] = 1.0 - xint;
-    sx[1] = xint;
-    return j;
-}
-
-// Compute shape factor for order 2.
-template <>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shape_factor <2> (Real* const sx, Real xmid){
-    int j = (int) (xmid+0.5);
-    Real xint = xmid-j;
-    sx[0] = 0.5*(0.5-xint)*(0.5-xint);
-    sx[1] = 0.75-xint*xint;
-    sx[2] = 0.5*(0.5+xint)*(0.5+xint);
-    // index of the leftmost cell where particle deposits
-    return j-1;
-}
-
-// Compute shape factor for order 3.
-template <>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shape_factor <3> (Real* const sx, Real xmid){
-    int j = (int) xmid;
-    Real xint = xmid-j;
-    sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint);
-    sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0);
-    sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint));
-    sx[3] = 1.0/6.0*xint*xint*xint;
-    // index of the leftmost cell where particle deposits
-    return j-1;
-}
+#include "ShapeFactors.H"
 
 /* \brief Current Deposition for thread thread_num
  * /param xp, yp, zp   : Pointer to arrays of particle positions.
@@ -55,9 +10,7 @@ int compute_shape_factor <3> (Real* const sx, Real xmid){
  * \param jx_arr       : Array4 of current density, either full array or tile.
  * \param jy_arr       : Array4 of current density, either full array or tile.
  * \param jz_arr       : Array4 of current density, either full array or tile.
- * \param offset       : Index of first particle for which current is deposited
  * \param np_to_depose : Number of particles for which current is deposited.
-                        Particles [offset,offset+np_tp_depose] deposit current.
  * \param dt           : Time step for particle level
  * \param dx           : 3D cell size
  * \param xyzmin       : Physical lower bounds of domain.
@@ -66,164 +19,144 @@ int compute_shape_factor <3> (Real* const sx, Real xmid){
  * /param q            : species charge.
  */
 template <int depos_order>
-void doDepositionShapeN(const Real * const xp, const Real * const yp, const Real * const zp,
-                        const Real * const wp, const Real * const uxp,
-                        const Real * const uyp, const Real * const uzp,
-                        const amrex::Array4<amrex::Real>& jx_arr, 
-                        const amrex::Array4<amrex::Real>& jy_arr, 
+void doDepositionShapeN(const amrex::Real * const xp, 
+                        const amrex::Real * const yp, 
+                        const amrex::Real * const zp,
+                        const amrex::Real * const wp,
+                        const amrex::Real * const uxp,
+                        const amrex::Real * const uyp,
+                        const amrex::Real * const uzp,
+                        const amrex::Array4<amrex::Real>& jx_arr,
+                        const amrex::Array4<amrex::Real>& jy_arr,
                         const amrex::Array4<amrex::Real>& jz_arr,
-                        const long offset, const long np_to_depose, 
-                        const amrex::Real dt, const std::array<amrex::Real,3>& dx,
-                        const std::array<Real, 3> xyzmin,
-                        const Dim3 lo,
+                        const long np_to_depose, const amrex::Real dt,
+                        const std::array<amrex::Real,3>& dx,
+                        const std::array<amrex::Real, 3> xyzmin,
+                        const amrex::Dim3 lo,
                         const amrex::Real stagger_shift, 
                         const amrex::Real q)
 {
-    const Real dxi = 1.0/dx[0];
-    const Real dzi = 1.0/dx[2];
-    const Real dts2dx = 0.5*dt*dxi;
-    const Real dts2dz = 0.5*dt*dzi;
+    const amrex::Real dxi = 1.0/dx[0];
+    const amrex::Real dzi = 1.0/dx[2];
+    const amrex::Real dts2dx = 0.5*dt*dxi;
+    const amrex::Real dts2dz = 0.5*dt*dzi;
 #if (AMREX_SPACEDIM == 2)
-    const Real invvol = dxi*dzi;
-#else // (AMREX_SPACEDIM == 3)
-    const Real dyi = 1.0/dx[1];
-    const Real dts2dy = 0.5*dt*dyi;
-    const Real invvol = dxi*dyi*dzi;
+    const amrex::Real invvol = dxi*dzi;
+#elif (defined WARPX_DIM_3D)
+    const amrex::Real dyi = 1.0/dx[1];
+    const amrex::Real dts2dy = 0.5*dt*dyi;
+    const amrex::Real invvol = dxi*dyi*dzi;
 #endif
 
-    const Real xmin = xyzmin[0];
-    const Real ymin = xyzmin[1];
-    const Real zmin = xyzmin[2];
-    const Real clightsq = 1.0/PhysConst::c/PhysConst::c;
+    const amrex::Real xmin = xyzmin[0];
+    const amrex::Real ymin = xyzmin[1];
+    const amrex::Real zmin = xyzmin[2];
+    const amrex::Real clightsq = 1.0/PhysConst::c/PhysConst::c;
 
     // Loop over particles and deposit into jx_arr, jy_arr and jz_arr
-    ParallelFor( np_to_depose,
-                 [=] AMREX_GPU_DEVICE (long ip) {
-                     // --- Get particle quantities
-                     const Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq
-						       + uyp[ip]*uyp[ip]*clightsq
-						       + uzp[ip]*uzp[ip]*clightsq);
-                     const Real wq  = q*wp[ip];
-                     const Real vx  = uxp[ip]*gaminv;
-                     const Real vy  = uyp[ip]*gaminv;
-                     const Real vz  = uzp[ip]*gaminv;
-                     // wqx, wqy wqz are particle current in each direction 
-                     const Real wqx = wq*invvol*vx;
-                     const Real wqy = wq*invvol*vy;
-                     const Real wqz = wq*invvol*vz;
-
-                     // --- Compute shape factors
-                     // x direction
-                     // Get particle position after 1/2 push back in position
-                     const Real xmid = (xp[ip]-xmin)*dxi-dts2dx*vx;
-                     // Compute shape factors for node-centered quantities
-                     Real AMREX_RESTRICT sx [depos_order + 1];
-                     // j: leftmost grid point (node-centered) that the particle touches
-                     const int j  = compute_shape_factor<depos_order>(sx,  xmid);
-                     // Compute shape factors for cell-centered quantities
-                     Real AMREX_RESTRICT sx0[depos_order + 1];
-                     // j0: leftmost grid point (cell-centered) that the particle touches
-                     const int j0 = compute_shape_factor<depos_order>(sx0, xmid-stagger_shift);
+    amrex::ParallelFor(
+        np_to_depose,
+        [=] AMREX_GPU_DEVICE (long ip) {
+            // --- Get particle quantities
+            const amrex::Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq
+                                                     + uyp[ip]*uyp[ip]*clightsq
+                                                     + uzp[ip]*uzp[ip]*clightsq);
+            const amrex::Real wq  = q*wp[ip];
+            const amrex::Real vx  = uxp[ip]*gaminv;
+            const amrex::Real vy  = uyp[ip]*gaminv;
+            const amrex::Real vz  = uzp[ip]*gaminv;
+            // wqx, wqy wqz are particle current in each direction 
+#if (defined WARPX_DIM_RZ)
+            // In RZ, wqx is actually wqr, and wqy is wqtheta
+            // Convert to cylinderical at the mid point
+            const amrex::Real xpmid = xp[ip] - 0.5*dt*vx;
+            const amrex::Real ypmid = yp[ip] - 0.5*dt*vy;
+            const amrex::Real rpmid = std::sqrt(xpmid*xpmid + ypmid*ypmid);
+            amrex::Real costheta;
+            amrex::Real sintheta;
+            if (rpmid > 0.) {
+                costheta = xpmid/rpmid;
+                sintheta = ypmid/rpmid;
+            } else {
+                costheta = 1.;
+                sintheta = 0.;
+            }
+            const amrex::Real wqx = wq*invvol*(+vx*costheta + vy*sintheta);
+            const amrex::Real wqy = wq*invvol*(-vx*sintheta + vy*costheta);
+#else
+            const amrex::Real wqx = wq*invvol*vx;
+            const amrex::Real wqy = wq*invvol*vy;
+#endif
+            const amrex::Real wqz = wq*invvol*vz;
+
+            // --- Compute shape factors
+            // x direction
+            // Get particle position after 1/2 push back in position
+#if (defined WARPX_DIM_RZ)
+            const amrex::Real xmid = (rpmid-xmin)*dxi;
+#else
+            const amrex::Real xmid = (xp[ip]-xmin)*dxi-dts2dx*vx;
+#endif
+            // Compute shape factors for node-centered quantities
+            amrex::Real AMREX_RESTRICT sx [depos_order + 1];
+            // j: leftmost grid point (node-centered) that the particle touches
+            const int j  = compute_shape_factor<depos_order>(sx,  xmid);
+            // Compute shape factors for cell-centered quantities
+            amrex::Real AMREX_RESTRICT sx0[depos_order + 1];
+            // j0: leftmost grid point (cell-centered) that the particle touches
+            const int j0 = compute_shape_factor<depos_order>(sx0, xmid-stagger_shift);
                      
-#if (AMREX_SPACEDIM == 3)
-                     // y direction
-                     const Real ymid= (yp[ip]-ymin)*dyi-dts2dy*vy;
-                     Real AMREX_RESTRICT sy [depos_order + 1];
-		     const int k  = compute_shape_factor<depos_order>(sy,  ymid);
-                     Real AMREX_RESTRICT sy0[depos_order + 1];
-		     const int k0 = compute_shape_factor<depos_order>(sy0, ymid-stagger_shift);
+#if (defined WARPX_DIM_3D)
+            // y direction
+            const amrex::Real ymid= (yp[ip]-ymin)*dyi-dts2dy*vy;
+            amrex::Real AMREX_RESTRICT sy [depos_order + 1];
+            const int k  = compute_shape_factor<depos_order>(sy,  ymid);
+            amrex::Real AMREX_RESTRICT sy0[depos_order + 1];
+            const int k0 = compute_shape_factor<depos_order>(sy0, ymid-stagger_shift);
 #endif
-                     // z direction
-                     const Real zmid= (zp[ip]-zmin)*dzi-dts2dz*vz;
-                     Real AMREX_RESTRICT sz [depos_order + 1];
-		     const int l  = compute_shape_factor<depos_order>(sz,  zmid);
-                     Real AMREX_RESTRICT sz0[depos_order + 1];
-		     const int l0 = compute_shape_factor<depos_order>(sz0, zmid-stagger_shift);
-
-                     // Deposit current into jx_arr, jy_arr and jz_arr
-#if (AMREX_SPACEDIM == 2)
-                     for (int iz=0; iz<=depos_order; iz++){
-                         for (int ix=0; ix<=depos_order; ix++){
-                             amrex::Gpu::Atomic::Add(
-                                 &jx_arr(lo.x+j0+ix, lo.y+l +iz, 0), 
-                                 sx0[ix]*sz [iz]*wqx);
-                             amrex::Gpu::Atomic::Add(
-                                 &jy_arr(lo.x+j +ix, lo.y+l +iz, 0), 
-                                 sx [ix]*sz [iz]*wqy);
-                             amrex::Gpu::Atomic::Add(
-                                 &jz_arr(lo.x+j +ix, lo.y+l0+iz, 0), 
-                                 sx [ix]*sz0[iz]*wqz);
-                         }
-                     }
-#else // (AMREX_SPACEDIM == 3)
-                     for (int iz=0; iz<=depos_order; iz++){
-                         for (int iy=0; iy<=depos_order; iy++){
-                             for (int ix=0; ix<=depos_order; ix++){
-                                 amrex::Gpu::Atomic::Add(
-                                     &jx_arr(lo.x+j0+ix, lo.y+k +iy, lo.z+l +iz),
-                                     sx0[ix]*sy [iy]*sz [iz]*wqx);
-                                 amrex::Gpu::Atomic::Add(
-                                     &jy_arr(lo.x+j +ix, lo.y+k0+iy, lo.z+l +iz), 
-                                     sx [ix]*sy0[iy]*sz [iz]*wqy);
-                                 amrex::Gpu::Atomic::Add(
-                                     &jz_arr(lo.x+j +ix, lo.y+k +iy, lo.z+l0+iz),
-                                     sx [ix]*sy [iy]*sz0[iz]*wqz);
-                             }
-                         }
-                     }
+            // z direction
+            const amrex::Real zmid= (zp[ip]-zmin)*dzi-dts2dz*vz;
+            amrex::Real AMREX_RESTRICT sz [depos_order + 1];
+            const int l  = compute_shape_factor<depos_order>(sz,  zmid);
+            amrex::Real AMREX_RESTRICT sz0[depos_order + 1];
+            const int l0 = compute_shape_factor<depos_order>(sz0, zmid-stagger_shift);
+
+            // Deposit current into jx_arr, jy_arr and jz_arr
+#if (defined WARPX_DIM_2D) || (defined WARPX_DIM_RZ)
+            for (int iz=0; iz<=depos_order; iz++){
+                for (int ix=0; ix<=depos_order; ix++){
+                    amrex::Gpu::Atomic::Add(
+                        &jx_arr(lo.x+j0+ix, lo.y+l +iz, 0), 
+                        sx0[ix]*sz [iz]*wqx);
+                    amrex::Gpu::Atomic::Add(
+                        &jy_arr(lo.x+j +ix, lo.y+l +iz, 0), 
+                        sx [ix]*sz [iz]*wqy);
+                    amrex::Gpu::Atomic::Add(
+                        &jz_arr(lo.x+j +ix, lo.y+l0+iz, 0), 
+                        sx [ix]*sz0[iz]*wqz);
+                }
+            }
+#elif (defined WARPX_DIM_3D)
+            for (int iz=0; iz<=depos_order; iz++){
+                for (int iy=0; iy<=depos_order; iy++){
+                    for (int ix=0; ix<=depos_order; ix++){
+                        amrex::Gpu::Atomic::Add(
+                            &jx_arr(lo.x+j0+ix, lo.y+k +iy, lo.z+l +iz),
+                            sx0[ix]*sy [iy]*sz [iz]*wqx);
+                        amrex::Gpu::Atomic::Add(
+                            &jy_arr(lo.x+j +ix, lo.y+k0+iy, lo.z+l +iz), 
+                            sx [ix]*sy0[iy]*sz [iz]*wqy);
+                        amrex::Gpu::Atomic::Add(
+                            &jz_arr(lo.x+j +ix, lo.y+k +iy, lo.z+l0+iz),
+                            sx [ix]*sy [iy]*sz0[iz]*wqz);
+                    }
+                }
+            }
 #endif
-                 }
+        }
         );
 }
 
-// Compute shape factor and return index of leftmost cell where
-// particle writes.
-// Specialized templates are defined below for orders 1, 2 and 3.
-template <int depos_order>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shifted_shape_factor (Real* const sx, const Real x_old, const int i_new);
-
-// Compute shape factor for order 1.
-template <>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shifted_shape_factor <1> (Real* const sx, const Real x_old, const int i_new){
-    const int i = (int) x_old;
-    const int i_shift = i - i_new;
-    const Real xint = x_old - i;
-    sx[1+i_shift] = 1.0 - xint;
-    sx[2+i_shift] = xint;
-    return i;
-}
-
-// Compute shape factor for order 2.
-template <>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shifted_shape_factor <2> (Real* const sx, const Real x_old, const int i_new){
-    const int i = (int) (x_old+0.5);
-    const int i_shift = i - (i_new + 1);
-    const Real xint = x_old - i;
-    sx[1+i_shift] = 0.5*(0.5-xint)*(0.5-xint);
-    sx[2+i_shift] = 0.75-xint*xint;
-    sx[3+i_shift] = 0.5*(0.5+xint)*(0.5+xint);
-    // index of the leftmost cell where particle deposits
-    return i-1;
-}
-
-// Compute shape factor for order 3.
-template <>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-int compute_shifted_shape_factor <3> (Real* const sx, const Real x_old, const int i_new){
-    const int i = (int) x_old;
-    const int i_shift = i - (i_new + 1);
-    const Real xint = x_old - i;
-    sx[1+i_shift] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint);
-    sx[2+i_shift] = 2.0/3.0-xint*xint*(1-xint/2.0);
-    sx[3+i_shift] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint));
-    sx[4+i_shift] = 1.0/6.0*xint*xint*xint;
-    // index of the leftmost cell where particle deposits
-    return i-1;
-}
-
 /* \brief Esirkepov Current Deposition for thread thread_num
  * /param xp, yp, zp   : Pointer to arrays of particle positions.
  * \param wp           : Pointer to array of particle weights.
@@ -239,170 +172,197 @@ int compute_shifted_shape_factor <3> (Real* const sx, const Real x_old, const in
  * /param q            : species charge.
  */
 template <int depos_order>
-void doEsirkepovDepositionShapeN (const Real * const xp, const Real * const yp, const Real * const zp,
-                                  const Real * const wp, const Real * const uxp,
-                                  const Real * const uyp, const Real * const uzp,
+void doEsirkepovDepositionShapeN (const amrex::Real * const xp,
+                                  const amrex::Real * const yp,
+                                  const amrex::Real * const zp,
+                                  const amrex::Real * const wp,
+                                  const amrex::Real * const uxp,
+                                  const amrex::Real * const uyp,
+                                  const amrex::Real * const uzp,
                                   const amrex::Array4<amrex::Real>& Jx_arr,
                                   const amrex::Array4<amrex::Real>& Jy_arr,
                                   const amrex::Array4<amrex::Real>& Jz_arr,
                                   const long np_to_depose,
-                                  const amrex::Real dt, const std::array<amrex::Real,3>& dx,
-                                  const std::array<Real, 3> xyzmin,
-                                  const Dim3 lo,
+                                  const amrex::Real dt,
+                                  const std::array<amrex::Real,3>& dx,
+                                  const std::array<amrex::Real, 3> xyzmin,
+                                  const amrex::Dim3 lo,
                                   const amrex::Real q)
 {
-    const Real dxi = 1.0/dx[0];
-    const Real dtsdx0 = dt*dxi;
-    const Real xmin = xyzmin[0];
-#if (AMREX_SPACEDIM == 3)
-    const Real dyi = 1.0/dx[1];
-    const Real dtsdy0 = dt*dyi;
-    const Real ymin = xyzmin[1];
+    const amrex::Real dxi = 1.0/dx[0];
+    const amrex::Real dtsdx0 = dt*dxi;
+    const amrex::Real xmin = xyzmin[0];
+#if (defined WARPX_DIM_3D)
+    const amrex::Real dyi = 1.0/dx[1];
+    const amrex::Real dtsdy0 = dt*dyi;
+    const amrex::Real ymin = xyzmin[1];
 #endif
-    const Real dzi = 1.0/dx[2];
-    const Real dtsdz0 = dt*dzi;
-    const Real zmin = xyzmin[2];
-
-#if (AMREX_SPACEDIM == 3)
-    const Real invdtdx = 1.0/(dt*dx[1]*dx[2]);
-    const Real invdtdy = 1.0/(dt*dx[0]*dx[2]);
-    const Real invdtdz = 1.0/(dt*dx[0]*dx[1]);
-#elif (AMREX_SPACEDIM == 2)
-    const Real invdtdx = 1.0/(dt*dx[2]);
-    const Real invdtdz = 1.0/(dt*dx[0]);
-    const Real invvol = 1.0/(dx[0]*dx[2]);
+    const amrex::Real dzi = 1.0/dx[2];
+    const amrex::Real dtsdz0 = dt*dzi;
+    const amrex::Real zmin = xyzmin[2];
+
+#if (defined WARPX_DIM_3D)
+    const amrex::Real invdtdx = 1.0/(dt*dx[1]*dx[2]);
+    const amrex::Real invdtdy = 1.0/(dt*dx[0]*dx[2]);
+    const amrex::Real invdtdz = 1.0/(dt*dx[0]*dx[1]);
+#elif (defined WARPX_DIM_2D) || (defined WARPX_DIM_RZ)
+    const amrex::Real invdtdx = 1.0/(dt*dx[2]);
+    const amrex::Real invdtdz = 1.0/(dt*dx[0]);
+    const amrex::Real invvol = 1.0/(dx[0]*dx[2]);
 #endif
 
-    const Real clightsq = 1.0/PhysConst::c/PhysConst::c;
+    const amrex::Real clightsq = 1.0/PhysConst::c/PhysConst::c;
 
     // Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr
-    ParallelFor( np_to_depose,
-                 [=] AMREX_GPU_DEVICE (long ip) {
-
-                     // --- Get particle quantities
-                     const Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq
-                                                           + uyp[ip]*uyp[ip]*clightsq
-                                                           + uzp[ip]*uzp[ip]*clightsq);
-
-                     // wqx, wqy wqz are particle current in each direction
-                     const Real wq = q*wp[ip];
-                     const Real wqx = wq*invdtdx;
-#if (AMREX_SPACEDIM == 3)
-                     const Real wqy = wq*invdtdy;
+    amrex::ParallelFor( 
+        np_to_depose,
+        [=] AMREX_GPU_DEVICE (long ip) {
+
+            // --- Get particle quantities
+            const amrex::Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq
+                                                         + uyp[ip]*uyp[ip]*clightsq
+                                                         + uzp[ip]*uzp[ip]*clightsq);
+
+            // wqx, wqy wqz are particle current in each direction
+            const amrex::Real wq = q*wp[ip];
+            const amrex::Real wqx = wq*invdtdx;
+#if (defined WARPX_DIM_3D)
+            const amrex::Real wqy = wq*invdtdy;
 #endif
-                     const Real wqz = wq*invdtdz;
-
-                     // computes current and old position in grid units
-                     const Real x_new = (xp[ip] - xmin)*dxi;
-                     const Real x_old = x_new - dtsdx0*uxp[ip]*gaminv;
-#if (AMREX_SPACEDIM == 3)        
-                     const Real y_new = (yp[ip] - ymin)*dyi;
-                     const Real y_old = y_new - dtsdy0*uyp[ip]*gaminv;
+            const amrex::Real wqz = wq*invdtdz;
+
+            // computes current and old position in grid units
+#if (defined WARPX_DIM_RZ)
+            const amrex::Real r_new = std::sqrt(xp[ip]*xp[ip] + yp[ip]*yp[ip]);
+            const amrex::Real r_old = std::sqrt((xp[ip] - dt*uxp[ip]*gaminv)*(xp[ip] - dt*uxp[ip]*gaminv) +
+                                                (yp[ip] - dt*uyp[ip]*gaminv)*(yp[ip] - dt*uyp[ip]*gaminv));
+            const amrex::Real x_new = (r_new - xmin)*dxi;
+            const amrex::Real x_old = (r_old - xmin)*dxi;
+#else
+            const amrex::Real x_new = (xp[ip] - xmin)*dxi;
+            const amrex::Real x_old = x_new - dtsdx0*uxp[ip]*gaminv;
 #endif
-                     const Real z_new = (zp[ip] - zmin)*dzi;
-                     const Real z_old = z_new - dtsdz0*uzp[ip]*gaminv;
-
-                     // Shape factor arrays
-                     // Note that there are extra values above and below
-                     // to possibly hold the factor for the old particle
-                     // which can be at a different grid location.
-                     Real AMREX_RESTRICT sx_new[depos_order + 3] = {0.};
-                     Real AMREX_RESTRICT sx_old[depos_order + 3] = {0.};
-#if (AMREX_SPACEDIM == 3)
-                     Real AMREX_RESTRICT sy_new[depos_order + 3] = {0.};
-                     Real AMREX_RESTRICT sy_old[depos_order + 3] = {0.};
+#if (defined WARPX_DIM_3D)        
+            const amrex::Real y_new = (yp[ip] - ymin)*dyi;
+            const amrex::Real y_old = y_new - dtsdy0*uyp[ip]*gaminv;
+#endif
+            const amrex::Real z_new = (zp[ip] - zmin)*dzi;
+            const amrex::Real z_old = z_new - dtsdz0*uzp[ip]*gaminv;
+
+#if (defined WARPX_DIM_RZ)
+            amrex::Real costheta;
+            amrex::Real sintheta;
+            if (r_new > 0.) {
+                costheta = xp[ip]/r_new;
+                sintheta = yp[ip]/r_new;
+            } else {
+                costheta = 1.;
+                sintheta = 0.;
+            }
+            const amrex::Real vy = (-uxp[ip]*sintheta + uyp[ip]*costheta)*gaminv;
+#elif (defined WARPX_DIM_2D)
+            const amrex::Real vy = uyp[ip]*gaminv;
 #endif
-                     Real AMREX_RESTRICT sz_new[depos_order + 3] = {0.};
-                     Real AMREX_RESTRICT sz_old[depos_order + 3] = {0.};
 
-                     // --- Compute shape factors
-                     // Compute shape factors for position as they are now and at old positions
-                     // [ijk]_new: leftmost grid point that the particle touches
-                     const int i_new = compute_shape_factor<depos_order>(sx_new+1, x_new);
-                     const int i_old = compute_shifted_shape_factor<depos_order>(sx_old, x_old, i_new);
-#if (AMREX_SPACEDIM == 3)
-                     const int j_new = compute_shape_factor<depos_order>(sy_new+1, y_new);
-                     const int j_old = compute_shifted_shape_factor<depos_order>(sy_old, y_old, j_new);
+            // Shape factor arrays
+            // Note that there are extra values above and below
+            // to possibly hold the factor for the old particle
+            // which can be at a different grid location.
+            amrex::Real AMREX_RESTRICT sx_new[depos_order + 3] = {0.};
+            amrex::Real AMREX_RESTRICT sx_old[depos_order + 3] = {0.};
+#if (defined WARPX_DIM_3D)
+            amrex::Real AMREX_RESTRICT sy_new[depos_order + 3] = {0.};
+            amrex::Real AMREX_RESTRICT sy_old[depos_order + 3] = {0.};
+#endif
+            amrex::Real AMREX_RESTRICT sz_new[depos_order + 3] = {0.};
+            amrex::Real AMREX_RESTRICT sz_old[depos_order + 3] = {0.};
+
+            // --- Compute shape factors
+            // Compute shape factors for position as they are now and at old positions
+            // [ijk]_new: leftmost grid point that the particle touches
+            const int i_new = compute_shape_factor<depos_order>(sx_new+1, x_new);
+            const int i_old = compute_shifted_shape_factor<depos_order>(sx_old, x_old, i_new);
+#if (defined WARPX_DIM_3D)
+            const int j_new = compute_shape_factor<depos_order>(sy_new+1, y_new);
+            const int j_old = compute_shifted_shape_factor<depos_order>(sy_old, y_old, j_new);
 #endif 
-                     const int k_new = compute_shape_factor<depos_order>(sz_new+1, z_new);
-                     const int k_old = compute_shifted_shape_factor<depos_order>(sz_old, z_old, k_new);
-
-                     // computes min/max positions of current contributions
-                     int dil = 1, diu = 1;
-                     if (i_old < i_new) dil = 0;
-                     if (i_old > i_new) diu = 0;
-#if (AMREX_SPACEDIM == 3)
-                     int djl = 1, dju = 1;
-                     if (j_old < j_new) djl = 0;
-                     if (j_old > j_new) dju = 0;
+            const int k_new = compute_shape_factor<depos_order>(sz_new+1, z_new);
+            const int k_old = compute_shifted_shape_factor<depos_order>(sz_old, z_old, k_new);
+
+            // computes min/max positions of current contributions
+            int dil = 1, diu = 1;
+            if (i_old < i_new) dil = 0;
+            if (i_old > i_new) diu = 0;
+#if (defined WARPX_DIM_3D)
+            int djl = 1, dju = 1;
+            if (j_old < j_new) djl = 0;
+            if (j_old > j_new) dju = 0;
 #endif
-                     int dkl = 1, dku = 1;
-                     if (k_old < k_new) dkl = 0;
-                     if (k_old > k_new) dku = 0;
-
-#if (AMREX_SPACEDIM == 3)
-
-                     for (int k=dkl; k<=depos_order+2-dku; k++) {
-                         for (int j=djl; j<=depos_order+2-dju; j++) {
-                             Real sdxi = 0.;
-                             for (int i=dil; i<=depos_order+1-diu; i++) {
-                                 sdxi += wqx*(sx_old[i] - sx_new[i])*((sy_new[j] + 0.5*(sy_old[j] - sy_new[j]))*sz_new[k] +
-                                         (0.5*sy_new[j] + 1./3.*(sy_old[j] - sy_new[j]))*(sz_old[k] - sz_new[k]));
-                                 amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi);
-                             }
-                         }
-                     }
-                     for (int k=dkl; k<=depos_order+2-dku; k++) {
-                         for (int i=dil; i<=depos_order+2-diu; i++) {
-                             Real sdyj = 0.;
-                             for (int j=djl; j<=depos_order+1-dju; j++) {
-                                 sdyj += wqy*(sy_old[j] - sy_new[j])*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] +
-                                         (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i]));
-                                 amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj);
-                             }
-                         }
-                     }
-                     for (int j=djl; j<=depos_order+2-dju; j++) {
-                         for (int i=dil; i<=depos_order+2-diu; i++) {
-                             Real sdzk = 0.;
-                             for (int k=dkl; k<=depos_order+1-dku; k++) {
-                                 sdzk += wqz*(sz_old[k] - sz_new[k])*((sx_new[i] + 0.5*(sx_old[i] - sx_new[i]))*sy_new[j] +
-                                         (0.5*sx_new[i] + 1./3.*(sx_old[i] - sx_new[i]))*(sy_old[j] - sy_new[j]));
-                                 amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk);
-                             }
-                         }
-                     }
-
-#elif (AMREX_SPACEDIM == 2)
-
-                    for (int k=dkl; k<=depos_order+2-dku; k++) {
-                        Real sdxi = 0.;
-                        for (int i=dil; i<=depos_order+1-diu; i++) {
-                            sdxi += wqx*(sx_old[i] - sx_new[i])*(sz_new[k] + 0.5*(sz_old[k] - sz_new[k]));
-                            amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdxi);
-                        }
+            int dkl = 1, dku = 1;
+            if (k_old < k_new) dkl = 0;
+            if (k_old > k_new) dku = 0;
+
+#if (defined WARPX_DIM_3D)
+
+            for (int k=dkl; k<=depos_order+2-dku; k++) {
+                for (int j=djl; j<=depos_order+2-dju; j++) {
+                    amrex::Real sdxi = 0.;
+                    for (int i=dil; i<=depos_order+1-diu; i++) {
+                        sdxi += wqx*(sx_old[i] - sx_new[i])*((sy_new[j] + 0.5*(sy_old[j] - sy_new[j]))*sz_new[k] +
+                                                             (0.5*sy_new[j] + 1./3.*(sy_old[j] - sy_new[j]))*(sz_old[k] - sz_new[k]));
+                        amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi);
                     }
-                    for (int k=dkl; k<=depos_order+2-dku; k++) {
-                        for (int i=dil; i<=depos_order+2-diu; i++) {
-                            const Real sdyj = wq*uyp[ip]*gaminv*invvol*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] +
-                                        (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i]));
-                            amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdyj);
-                        }
+                }
+            }
+            for (int k=dkl; k<=depos_order+2-dku; k++) {
+                for (int i=dil; i<=depos_order+2-diu; i++) {
+                    amrex::Real sdyj = 0.;
+                    for (int j=djl; j<=depos_order+1-dju; j++) {
+                        sdyj += wqy*(sy_old[j] - sy_new[j])*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] +
+                                                             (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i]));
+                        amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj);
                     }
-                    for (int i=dil; i<=depos_order+2-diu; i++) {
-                        Real sdzk = 0.;
-                        for (int k=dkl; k<=depos_order+1-dku; k++) {
-                            sdzk += wqz*(sz_old[k] - sz_new[k])*(sx_new[i] + 0.5*(sx_old[i] - sx_new[i]));
-                            amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdzk);
-                        }
+                }
+            }
+            for (int j=djl; j<=depos_order+2-dju; j++) {
+                for (int i=dil; i<=depos_order+2-diu; i++) {
+                    amrex::Real sdzk = 0.;
+                    for (int k=dkl; k<=depos_order+1-dku; k++) {
+                        sdzk += wqz*(sz_old[k] - sz_new[k])*((sx_new[i] + 0.5*(sx_old[i] - sx_new[i]))*sy_new[j] +
+                                                             (0.5*sx_new[i] + 1./3.*(sx_old[i] - sx_new[i]))*(sy_old[j] - sy_new[j]));
+                        amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk);
                     }
+                }
+            }
+
+#elif (defined WARPX_DIM_2D) || (defined WARPX_DIM_RZ)
+
+            for (int k=dkl; k<=depos_order+2-dku; k++) {
+                amrex::Real sdxi = 0.;
+                for (int i=dil; i<=depos_order+1-diu; i++) {
+                    sdxi += wqx*(sx_old[i] - sx_new[i])*(sz_new[k] + 0.5*(sz_old[k] - sz_new[k]));
+                    amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdxi);
+                }
+            }
+            for (int k=dkl; k<=depos_order+2-dku; k++) {
+                for (int i=dil; i<=depos_order+2-diu; i++) {
+                    const amrex::Real sdyj = wq*vy*invvol*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] +
+                                                           (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i]));
+                    amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdyj);
+                }
+            }
+            for (int i=dil; i<=depos_order+2-diu; i++) {
+                amrex::Real sdzk = 0.;
+                for (int k=dkl; k<=depos_order+1-dku; k++) {
+                    sdzk += wqz*(sz_old[k] - sz_new[k])*(sx_new[i] + 0.5*(sx_old[i] - sx_new[i]));
+                    amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdzk);
+                }
+            }
+
 
 #endif
-                 }
+        }
         );
-
-
-
 }
 
 #endif // CURRENTDEPOSITION_H_
diff --git a/Source/Particles/Deposition/Make.package b/Source/Particles/Deposition/Make.package
index 0d5ebe2a7..e1aace998 100644
--- a/Source/Particles/Deposition/Make.package
+++ b/Source/Particles/Deposition/Make.package
@@ -1,3 +1,4 @@
 CEXE_headers += CurrentDeposition.H
+CEXE_headers += ChargeDeposition.H
 INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Deposition
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Particles/Deposition
diff --git a/Source/Particles/Gather/FieldGather.H b/Source/Particles/Gather/FieldGather.H
new file mode 100644
index 000000000..8f5e8d4cf
--- /dev/null
+++ b/Source/Particles/Gather/FieldGather.H
@@ -0,0 +1,216 @@
+#ifndef FIELDGATHER_H_
+#define FIELDGATHER_H_
+
+#include "ShapeFactors.H"
+
+/* \brief Field gather for particles handled by thread thread_num
+ * /param xp, yp, zp   : Pointer to arrays of particle positions.
+ * \param Exp, Eyp, Ezp: Pointer to array of electric field on particles.
+ * \param Bxp, Byp, Bzp: Pointer to array of magnetic field on particles.
+ * \param ex_arr ey_arr: Array4 of current density, either full array or tile.
+ * \param ez_arr bx_arr: Array4 of current density, either full array or tile.
+ * \param by_arr bz_arr: Array4 of current density, either full array or tile.
+ * \param np_to_gather : Number of particles for which field is gathered.
+ * \param dx           : 3D cell size
+ * \param xyzmin       : Physical lower bounds of domain.
+ * \param lo           : Index lower bounds of domain.
+ * \param stagger_shift: 0 if nodal, 0.5 if staggered.
+ */
+template <int depos_order, int lower_in_v>
+void doGatherShapeN(const amrex::Real * const xp,
+                    const amrex::Real * const yp,
+                    const amrex::Real * const zp,
+                    amrex::Real * const Exp, amrex::Real * const Eyp,
+                    amrex::Real * const Ezp, amrex::Real * const Bxp,
+                    amrex::Real * const Byp, amrex::Real * const Bzp,
+                    const amrex::Array4<const amrex::Real>& ex_arr,
+                    const amrex::Array4<const amrex::Real>& ey_arr,
+                    const amrex::Array4<const amrex::Real>& ez_arr,
+                    const amrex::Array4<const amrex::Real>& bx_arr,
+                    const amrex::Array4<const amrex::Real>& by_arr,
+                    const amrex::Array4<const amrex::Real>& bz_arr,
+                    const long np_to_gather,
+                    const std::array<amrex::Real, 3>& dx,
+                    const std::array<amrex::Real, 3> xyzmin,
+                    const amrex::Dim3 lo,
+                    const amrex::Real stagger_shift)
+{
+    const amrex::Real dxi = 1.0/dx[0];
+    const amrex::Real dzi = 1.0/dx[2];
+#if (AMREX_SPACEDIM == 3)
+    const amrex::Real dyi = 1.0/dx[1];
+#endif
+
+    const amrex::Real xmin = xyzmin[0];
+#if (AMREX_SPACEDIM == 3)
+    const amrex::Real ymin = xyzmin[1];
+#endif
+    const amrex::Real zmin = xyzmin[2];
+
+    // Loop over particles and gather fields from
+    // {e,b}{x,y,z}_arr to {E,B}{xyz}p.
+    amrex::ParallelFor(
+        np_to_gather,
+        [=] AMREX_GPU_DEVICE (long ip) {
+            // --- Compute shape factors
+            // x direction
+            // Get particle position
+#ifdef WARPX_DIM_RZ
+            const amrex::Real r = std::sqrt(xp[ip]*xp[ip] + yp[ip]*yp[ip]);
+            const amrex::Real x = (r - xmin)*dxi;
+#else
+            const amrex::Real x = (xp[ip]-xmin)*dxi;
+#endif
+            // Compute shape factors for node-centered quantities
+            amrex::Real AMREX_RESTRICT sx [depos_order + 1];
+            // j: leftmost grid point (node-centered) that particle touches
+            const int j  = compute_shape_factor<depos_order>(sx, x);
+            // Compute shape factors for cell-centered quantities
+            amrex::Real AMREX_RESTRICT sx0[depos_order + 1 - lower_in_v];
+            // j0: leftmost grid point (cell-centered) that particle touches
+            const int j0 = compute_shape_factor<depos_order - lower_in_v>(
+                sx0, x-stagger_shift);
+#if (AMREX_SPACEDIM == 3)
+            // y direction
+            const amrex::Real y = (yp[ip]-ymin)*dyi;
+            amrex::Real AMREX_RESTRICT sy [depos_order + 1];
+            const int k  = compute_shape_factor<depos_order>(sy, y);
+            amrex::Real AMREX_RESTRICT sy0[depos_order + 1 - lower_in_v];
+            const int k0 = compute_shape_factor<depos_order-lower_in_v>(
+                sy0, y-stagger_shift);
+#endif
+            // z direction
+            const amrex::Real z = (zp[ip]-zmin)*dzi;
+            amrex::Real AMREX_RESTRICT sz [depos_order + 1];
+            const int l  = compute_shape_factor<depos_order>(sz, z);
+            amrex::Real AMREX_RESTRICT sz0[depos_order + 1 - lower_in_v];
+            const int l0 = compute_shape_factor<depos_order - lower_in_v>(
+                sz0, z-stagger_shift);
+
+            // Set fields on particle to zero
+            Exp[ip] = 0;
+            Eyp[ip] = 0;
+            Ezp[ip] = 0;
+            Bxp[ip] = 0;
+            Byp[ip] = 0;
+            Bzp[ip] = 0;
+            // Each field is gathered in a separate block of 
+            // AMREX_SPACEDIM nested loops because the deposition
+            // order can differ for each component of each field
+            // when lower_in_v is set to 1
+#if (AMREX_SPACEDIM == 2)
+            // Gather field on particle Eyp[i] from field on grid ey_arr
+            for (int iz=0; iz<=depos_order; iz++){
+                for (int ix=0; ix<=depos_order; ix++){
+                    Eyp[ip] += sx[ix]*sz[iz]*
+                        ey_arr(lo.x+j+ix, lo.y+l+iz, 0);
+                }
+            }
+            // Gather field on particle Exp[i] from field on grid ex_arr
+            // Gather field on particle Bzp[i] from field on grid bz_arr
+            for (int iz=0; iz<=depos_order; iz++){
+                for (int ix=0; ix<=depos_order-lower_in_v; ix++){
+                    Exp[ip] += sx0[ix]*sz[iz]*
+                        ex_arr(lo.x+j0+ix, lo.y+l +iz, 0);
+                    Bzp[ip] += sx0[ix]*sz[iz]*
+                        bz_arr(lo.x+j0+ix, lo.y+l +iz, 0);
+                }
+            }
+            // Gather field on particle Ezp[i] from field on grid ez_arr
+            // Gather field on particle Bxp[i] from field on grid bx_arr
+            for (int iz=0; iz<=depos_order-lower_in_v; iz++){
+                for (int ix=0; ix<=depos_order; ix++){
+                    Ezp[ip] += sx[ix]*sz0[iz]*
+                        ez_arr(lo.x+j+ix, lo.y+l0 +iz, 0);
+                    Bxp[ip] += sx[ix]*sz0[iz]*
+                        bx_arr(lo.x+j+ix, lo.y+l0 +iz, 0);
+                }
+            }
+            // Gather field on particle Byp[i] from field on grid by_arr
+            for (int iz=0; iz<=depos_order-lower_in_v; iz++){
+                for (int ix=0; ix<=depos_order-lower_in_v; ix++){
+                    Byp[ip] += sx0[ix]*sz0[iz]*
+                        by_arr(lo.x+j0+ix, lo.y+l0+iz, 0);
+                }
+            }
+
+#ifdef WARPX_DIM_RZ
+            // Convert Exp and Eyp (which are actually Er and Etheta) to Ex and Ey
+            amrex::Real costheta;
+            amrex::Real sintheta;
+            if (r > 0.) {
+                costheta = xp[ip]/r;
+                sintheta = yp[ip]/r;
+            } else {
+                costheta = 1.;
+                sintheta = 0.;
+            }
+            const amrex::Real Exp_save = Exp[ip];
+            Exp[ip] = costheta*Exp[ip] - sintheta*Eyp[ip];
+            Eyp[ip] = costheta*Eyp[ip] + sintheta*Exp_save;
+            const amrex::Real Bxp_save = Bxp[ip];
+            Bxp[ip] = costheta*Bxp[ip] - sintheta*Byp[ip];
+            Byp[ip] = costheta*Byp[ip] + sintheta*Bxp_save;
+#endif
+
+#else // (AMREX_SPACEDIM == 3)
+            // Gather field on particle Exp[i] from field on grid ex_arr
+            for (int iz=0; iz<=depos_order; iz++){
+                for (int iy=0; iy<=depos_order; iy++){
+                    for (int ix=0; ix<=depos_order-lower_in_v; ix++){
+                        Exp[ip] += sx0[ix]*sy[iy]*sz[iz]*
+                            ex_arr(lo.x+j0+ix, lo.y+k+iy, lo.z+l+iz);
+                    }
+                }
+            }
+            // Gather field on particle Eyp[i] from field on grid ey_arr
+            for (int iz=0; iz<=depos_order; iz++){
+                for (int iy=0; iy<=depos_order-lower_in_v; iy++){
+                    for (int ix=0; ix<=depos_order; ix++){
+                        Eyp[ip] += sx[ix]*sy0[iy]*sz[iz]*
+                            ey_arr(lo.x+j+ix, lo.y+k0+iy, lo.z+l+iz);
+                    }
+                }
+            }
+            // Gather field on particle Ezp[i] from field on grid ez_arr
+            for (int iz=0; iz<=depos_order-lower_in_v; iz++){
+                for (int iy=0; iy<=depos_order; iy++){
+                    for (int ix=0; ix<=depos_order; ix++){
+                        Ezp[ip] += sx[ix]*sy[iy]*sz0[iz]*
+                            ez_arr(lo.x+j+ix, lo.y+k+iy, lo.z+l0+iz);
+                    }
+                }
+            }
+            // Gather field on particle Bzp[i] from field on grid bz_arr
+            for (int iz=0; iz<=depos_order; iz++){
+                for (int iy=0; iy<=depos_order-lower_in_v; iy++){
+                    for (int ix=0; ix<=depos_order-lower_in_v; ix++){
+                        Bzp[ip] += sx0[ix]*sy0[iy]*sz[iz]*
+                            bz_arr(lo.x+j0+ix, lo.y+k0+iy, lo.z+l+iz);
+                    }
+                }
+            }
+            // Gather field on particle Byp[i] from field on grid by_arr
+            for (int iz=0; iz<=depos_order-lower_in_v; iz++){
+                for (int iy=0; iy<=depos_order; iy++){
+                    for (int ix=0; ix<=depos_order-lower_in_v; ix++){
+                        Byp[ip] += sx0[ix]*sy[iy]*sz0[iz]*
+                            by_arr(lo.x+j0+ix, lo.y+k+iy, lo.z+l0+iz);
+                    }
+                }
+            }
+            // Gather field on particle Bxp[i] from field on grid bx_arr
+            for (int iz=0; iz<=depos_order-lower_in_v; iz++){
+                for (int iy=0; iy<=depos_order-lower_in_v; iy++){
+                    for (int ix=0; ix<=depos_order; ix++){
+                        Bxp[ip] += sx[ix]*sy0[iy]*sz0[iz]*
+                            bx_arr(lo.x+j+ix, lo.y+k0+iy, lo.z+l0+iz);
+                    }
+                }
+            }
+#endif
+        }
+        );
+}
+
+#endif // FIELDGATHER_H_
diff --git a/Source/Particles/Gather/Make.package b/Source/Particles/Gather/Make.package
new file mode 100644
index 000000000..10abfcaaf
--- /dev/null
+++ b/Source/Particles/Gather/Make.package
@@ -0,0 +1,3 @@
+CEXE_headers += FieldGather.H
+INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Gather
+VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Particles/Gather
diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package
index 2038472a1..db90de1dc 100644
--- a/Source/Particles/Make.package
+++ b/Source/Particles/Make.package
@@ -9,9 +9,11 @@ CEXE_headers += MultiParticleContainer.H
 CEXE_headers += WarpXParticleContainer.H
 CEXE_headers += RigidInjectedParticleContainer.H
 CEXE_headers += PhysicalParticleContainer.H
+CEXE_headers += ShapeFactors.H
 
 include $(WARPX_HOME)/Source/Particles/Pusher/Make.package
 include $(WARPX_HOME)/Source/Particles/Deposition/Make.package
+include $(WARPX_HOME)/Source/Particles/Gather/Make.package
 
 INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Particles
diff --git a/Source/Particles/MultiParticleContainer.H b/Source/Particles/MultiParticleContainer.H
index 869126fef..7c9ede411 100644
--- a/Source/Particles/MultiParticleContainer.H
+++ b/Source/Particles/MultiParticleContainer.H
@@ -85,8 +85,9 @@ public:
     /// in the MultiParticleContainer. This is the electromagnetic version of the field gather.
     ///
     void FieldGather (int lev,
-                      const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, const amrex::MultiFab& Ez,
-                      const amrex::MultiFab& Bx, const amrex::MultiFab& By, const amrex::MultiFab& Bz); 
+                      const amrex::MultiFab& Ex, const amrex::MultiFab& Ey,
+                      const amrex::MultiFab& Ez, const amrex::MultiFab& Bx,
+                      const amrex::MultiFab& By, const amrex::MultiFab& Bz); 
 
     ///
     /// This evolves all the particles by one PIC time step, including current deposition, the
diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp
index 9d39ec2f9..982e04e39 100644
--- a/Source/Particles/MultiParticleContainer.cpp
+++ b/Source/Particles/MultiParticleContainer.cpp
@@ -172,30 +172,6 @@ MultiParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<MultiF
 }
 
 void
-MultiParticleContainer::Evolve (int lev,
-                                const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
-                                const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz,
-                                MultiFab& jx, MultiFab& jy, MultiFab& jz,
-                                MultiFab* cjx,  MultiFab* cjy, MultiFab* cjz, 
-                                MultiFab* rho,
-                                const MultiFab* cEx, const MultiFab* cEy, const MultiFab* cEz,
-                                const MultiFab* cBx, const MultiFab* cBy, const MultiFab* cBz,
-                                Real t, Real dt)
-{
-    jx.setVal(0.0);
-    jy.setVal(0.0);
-    jz.setVal(0.0);
-    if (cjx) cjx->setVal(0.0);
-    if (cjy) cjy->setVal(0.0);
-    if (cjz) cjz->setVal(0.0);
-    if (rho) rho->setVal(0.0);
-    for (auto& pc : allcontainers) {
-	pc->Evolve(lev, Ex, Ey, Ez, Bx, By, Bz, jx, jy, jz, cjx, cjy, cjz,
-               rho, cEx, cEy, cEz, cBx, cBy, cBz, t, dt);
-    }    
-}
-
-void
 MultiParticleContainer::PushXES (Real dt)
 {
     for (auto& pc : allcontainers) {
@@ -240,8 +216,9 @@ MultiParticleContainer::sumParticleCharge (bool local)
 
 void
 MultiParticleContainer::FieldGather (int lev,
-                                     const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
-                                     const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz)
+                                     const MultiFab& Ex, const MultiFab& Ey,
+                                     const MultiFab& Ez, const MultiFab& Bx,
+                                     const MultiFab& By, const MultiFab& Bz)
 {
     for (auto& pc : allcontainers) {
         pc->FieldGather(lev, Ex, Ey, Ez, Bx, By, Bz);
@@ -331,7 +308,7 @@ MultiParticleContainer::RedistributeLocal (const int num_ghost)
 }
 
 Vector<long>
-MultiParticleContainer::NumberOfParticlesInGrid(int lev) const
+MultiParticleContainer::NumberOfParticlesInGrid (int lev) const
 {
     const bool only_valid=true, only_local=true;
     Vector<long> r = allcontainers[0]->NumberOfParticlesInGrid(lev,only_valid,only_local);
diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H
index d55764682..b80619733 100644
--- a/Source/Particles/PhysicalParticleContainer.H
+++ b/Source/Particles/PhysicalParticleContainer.H
@@ -27,17 +27,37 @@ public:
                                const amrex::Vector<std::unique_ptr<amrex::FabArray<amrex::BaseFab<int> > > >& masks) override;
 
     virtual void EvolveES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E,
-                                 amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rho,
+                           amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rho,
                            amrex::Real t, amrex::Real dt) override;
 #endif // WARPX_DO_ELECTROSTATIC
     
-    virtual void FieldGather(int lev,
-                             const amrex::MultiFab& Ex,
-                             const amrex::MultiFab& Ey,
-                             const amrex::MultiFab& Ez,
-                             const amrex::MultiFab& Bx,
-                             const amrex::MultiFab& By,
-                             const amrex::MultiFab& Bz) final;
+    virtual void FieldGather (int lev,
+                              const amrex::MultiFab& Ex,
+                              const amrex::MultiFab& Ey,
+                              const amrex::MultiFab& Ez,
+                              const amrex::MultiFab& Bx,
+                              const amrex::MultiFab& By,
+                              const amrex::MultiFab& Bz) final;
+
+    void FieldGather (WarpXParIter& pti,
+                      RealVector& Exp,
+                      RealVector& Eyp,
+                      RealVector& Ezp,
+                      RealVector& Bxp,
+                      RealVector& Byp,
+                      RealVector& Bzp,
+                      amrex::FArrayBox const * exfab,
+                      amrex::FArrayBox const * eyfab,
+                      amrex::FArrayBox const * ezfab,
+                      amrex::FArrayBox const * bxfab,
+                      amrex::FArrayBox const * byfab,
+                      amrex::FArrayBox const * bzfab,
+                      const int ngE, const int e_is_nodal,
+                      const long offset,
+                      const long np_to_gather,
+                      int thread_num,
+                      int lev,
+                      int depos_lev);
 
     virtual void Evolve (int lev,
 			 const amrex::MultiFab& Ex,
@@ -87,11 +107,8 @@ public:
 
     // Inject particles in Box 'part_box'
     virtual void AddParticles (int lev);
+
     void AddPlasma(int lev, amrex::RealBox part_realbox = amrex::RealBox());
-    void AddPlasmaCPU (int lev, amrex::RealBox part_realbox);
-#ifdef AMREX_USE_GPU
-    void AddPlasmaGPU (int lev, amrex::RealBox part_realbox);
-#endif
 
     void MapParticletoBoostedFrame(amrex::Real& x, amrex::Real& y, amrex::Real& z, std::array<amrex::Real, 3>& u);
 
@@ -120,16 +137,8 @@ protected:
     bool boost_adjust_transverse_positions = false;
     bool do_backward_propagation = false;
 
-    long NumParticlesToAdd (const amrex::Box& overlap_box,
-			    const amrex::RealBox& overlap_realbox,
-			    const amrex::RealBox& tile_real_box,
-			    const amrex::RealBox& particle_real_box);
-  
-    int GetRefineFac(const amrex::Real x, const amrex::Real y, const amrex::Real z);
-    std::unique_ptr<amrex::IArrayBox> m_refined_injection_mask = nullptr;
-
     // Inject particles during the whole simulation
-    void ContinuousInjection(const amrex::RealBox& injection_box) override;
+    void ContinuousInjection (const amrex::RealBox& injection_box) override;
 
 };
 
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index d47a7b220..d10390204 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -6,65 +6,16 @@
 #include <WarpX.H>
 #include <WarpXConst.H>
 #include <WarpXWrappers.h>
+#include <FieldGather.H>
 
+#include <WarpXAlgorithmSelection.H>
 
-using namespace amrex;
-
-long PhysicalParticleContainer::
-NumParticlesToAdd(const Box& overlap_box, const RealBox& overlap_realbox,
-                  const RealBox& tile_realbox, const RealBox& particle_real_box)
-{
-    const int lev = 0;
-    const Geometry& geom = Geom(lev);
-    int num_ppc = plasma_injector->num_particles_per_cell;
-    const Real* dx = geom.CellSize();
+// Import low-level single-particle kernels
+#include <UpdatePosition.H>
+#include <UpdateMomentumBoris.H>
+#include <UpdateMomentumVay.H>
 
-    long np = 0;
-    const auto& overlap_corner = overlap_realbox.lo();
-    for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv))
-    {
-        int fac;
-        if (do_continuous_injection) {
-#if ( AMREX_SPACEDIM == 3 )
-            Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
-            Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
-            Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
-#elif ( AMREX_SPACEDIM == 2 )
-            Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
-            Real y = 0;
-            Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
-#endif
-            fac = GetRefineFac(x, y, z);
-        } else {
-            fac = 1.0;
-        }
-	
-        int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
-        for (int i_part=0; i_part<ref_num_ppc;i_part++) {
-            std::array<Real, 3> r;
-            plasma_injector->getPositionUnitBox(r, i_part, fac);
-#if ( AMREX_SPACEDIM == 3 )
-            Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
-            Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
-            Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
-#elif ( AMREX_SPACEDIM == 2 )
-            Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
-            Real y = 0;
-            Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
-#endif
-            // If the new particle is not inside the tile box,
-            // go to the next generated particle.
-#if ( AMREX_SPACEDIM == 3 )
-            if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
-#elif ( AMREX_SPACEDIM == 2 )
-            if(!tile_realbox.contains( RealVect{x, z} )) continue;
-#endif
-            ++np;
-        }
-    }
-    
-    return np;
-}
+using namespace amrex;
 
 PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int ispecies,
                                                       const std::string& name)
@@ -127,9 +78,7 @@ PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core)
 void PhysicalParticleContainer::InitData()
 {
     AddParticles(0); // Note - add on level 0
-    if (maxLevel() > 0) {
-        Redistribute();  // We then redistribute
-    }
+    Redistribute();  // We then redistribute
 }
 
 void PhysicalParticleContainer::MapParticletoBoostedFrame(Real& x, Real& y, Real& z, std::array<Real, 3>& u)
@@ -193,45 +142,36 @@ PhysicalParticleContainer::AddGaussianBeam(Real x_m, Real y_m, Real z_m,
     std::normal_distribution<double> distz(z_m, z_rms);
 
     if (ParallelDescriptor::IOProcessor()) {
-        std::array<Real, 3> u;
-        Real weight;
         // If do_symmetrize, create 4x fewer particles, and 
         // Replicate each particle 4 times (x,y) (-x,y) (x,-y) (-x,-y)
         if (do_symmetrize){
             npart /= 4;
         }
         for (long i = 0; i < npart; ++i) {
-#if ( AMREX_SPACEDIM == 3 | WARPX_RZ)
-            weight = q_tot/npart/charge;
+#if ( AMREX_SPACEDIM == 3 | WARPX_DIM_RZ)
+            Real weight = q_tot/npart/charge;
             Real x = distx(mt);
             Real y = disty(mt);
             Real z = distz(mt);
 #elif ( AMREX_SPACEDIM == 2 )
-            weight = q_tot/npart/charge/y_rms;
+            Real weight = q_tot/npart/charge/y_rms;
             Real x = distx(mt);
             Real y = 0.;
             Real z = distz(mt);
 #endif
             if (plasma_injector->insideBounds(x, y, z)) {
-                plasma_injector->getMomentum(u, x, y, z);
+                XDim3 u = plasma_injector->getMomentum(x, y, z);
+                u.x *= PhysConst::c;
+                u.y *= PhysConst::c;
+                u.z *= PhysConst::c;
                 if (do_symmetrize){
-                    std::array<Real, 3> u_tmp;
-                    Real x_tmp, y_tmp;
                     // Add four particles to the beam:
-                    // (x,ux,y,uy) (-x,-ux,y,uy) (x,ux,-y,-uy) (-x,-ux,-y,-uy)
-                    for (int ix=0; ix<2; ix++){
-                        for (int iy=0; iy<2; iy++){
-                            u_tmp = u;
-                            x_tmp     = x*std::pow(-1,ix);
-                            u_tmp[0] *= std::pow(-1,ix);
-                            y_tmp     = y*std::pow(-1,iy);
-                            u_tmp[1] *= std::pow(-1,iy);
-                            CheckAndAddParticle(x_tmp, y_tmp, z, 
-                                                u_tmp, weight/4);
-                        }
-                    }
+                    CheckAndAddParticle( x, y, z, { u.x, u.y, u.z}, weight/4. );
+                    CheckAndAddParticle( x,-y, z, { u.x,-u.y, u.z}, weight/4. );
+                    CheckAndAddParticle(-x, y, z, {-u.x, u.y, u.z}, weight/4. );
+                    CheckAndAddParticle(-x,-y, z, {-u.x,-u.y, u.z}, weight/4. );
                 } else {
-                    CheckAndAddParticle(x, y, z, u, weight);
+                    CheckAndAddParticle(x, y, z, {u.x,u.y,u.z}, weight);
                 }
             }
         }
@@ -322,28 +262,19 @@ PhysicalParticleContainer::AddParticles (int lev)
 void
 PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
 {
-#ifdef AMREX_USE_GPU
-    AddPlasmaGPU(lev, part_realbox);
-#else
-    AddPlasmaCPU(lev, part_realbox);
-#endif
-}
-
-void
-PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox)
-{
-    BL_PROFILE("PhysicalParticleContainer::AddPlasmaCPU");
+    BL_PROFILE("PhysicalParticleContainer::AddPlasma");
 
     // If no part_realbox is provided, initialize particles in the whole domain
     const Geometry& geom = Geom(lev);
     if (!part_realbox.ok()) part_realbox = geom.ProbDomain();
 
     int num_ppc = plasma_injector->num_particles_per_cell;
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     Real rmax = std::min(plasma_injector->xmax, part_realbox.hi(0));
 #endif
 
-    const Real* dx = geom.CellSize();
+    const auto dx = geom.CellSizeArray();
+    const auto problo = geom.ProbLoArray();
 
     Real scale_fac;
 #if AMREX_SPACEDIM==3
@@ -358,490 +289,341 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox)
         const int grid_id = mfi.index();
         const int tile_id = mfi.LocalTileIndex();
         GetParticles(lev)[std::make_pair(grid_id, tile_id)];
+        if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) {
+            DefineAndReturnParticleTile(lev, grid_id, tile_id);
+        }
     }
 #endif
 
     MultiFab* cost = WarpX::getCosts(lev);
 
-    if ( (not m_refined_injection_mask) and WarpX::do_moving_window)
+    const int nlevs = numLevels();
+    static bool refine_injection = false;
+    static Box fine_injection_box;
+    static int rrfac = 1;
+    // This does not work if the mesh is dynamic.  But in that case, we should
+    // not use refined injected either.  We also assume there is only one fine level.
+    if (WarpX::do_moving_window and WarpX::refine_plasma
+        and do_continuous_injection and nlevs == 2)
     {
-        Box mask_box = geom.Domain();
-        mask_box.setSmall(WarpX::moving_window_dir, 0);
-        mask_box.setBig(WarpX::moving_window_dir, 0);
-        m_refined_injection_mask.reset( new IArrayBox(mask_box));
-        m_refined_injection_mask->setVal(-1);
+        refine_injection = true;
+        fine_injection_box = ParticleBoxArray(1).minimalBox();
+        fine_injection_box.setSmall(WarpX::moving_window_dir, std::numeric_limits<int>::lowest());
+        fine_injection_box.setBig(WarpX::moving_window_dir, std::numeric_limits<int>::max());
+        rrfac = m_gdb->refRatio(0)[0];
+        fine_injection_box.coarsen(rrfac);
     }
 
+    InjectorPosition* inj_pos = plasma_injector->getInjectorPosition();
+    InjectorDensity*  inj_rho = plasma_injector->getInjectorDensity();
+    InjectorMomentum* inj_mom = plasma_injector->getInjectorMomentum();
+    Real gamma_boost = WarpX::gamma_boost;
+    Real beta_boost = WarpX::beta_boost;
+    Real t = WarpX::GetInstance().gett_new(lev);
+    Real density_min = plasma_injector->density_min;
+    Real density_max = plasma_injector->density_max;
+
+#ifdef WARPX_DIM_RZ
+    bool radially_weighted = plasma_injector->radially_weighted;
+#endif
+
     MFItInfo info;
-    if (do_tiling) {
+    if (do_tiling && Gpu::notInLaunchRegion()) {
         info.EnableTiling(tile_size);
     }
-    info.SetDynamic(true);
-
 #ifdef _OPENMP
+    info.SetDynamic(true);
 #pragma omp parallel if (not WarpX::serialize_ics)
 #endif
+    for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi)
     {
-        std::array<Real,PIdx::nattribs> attribs;
-        attribs.fill(0.0);
-
-        // Loop through the tiles
-        for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) {
-
-            Real wt = amrex::second();
-
-            const Box& tile_box = mfi.tilebox();
-            const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev);
-
-            // Find the cells of part_box that overlap with tile_realbox
-            // If there is no overlap, just go to the next tile in the loop
-            RealBox overlap_realbox;
-            Box overlap_box;
-            Real ncells_adjust;
-            bool no_overlap = 0;
-
-            for (int dir=0; dir<AMREX_SPACEDIM; dir++) {
-                if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) {
-                    ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] );
-                    overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]);
-                } else {
-                    no_overlap = 1; break;
-                }
-                if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) {
-                    ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] );
-                    overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]);
-                } else {
-                    no_overlap = 1; break;
-                }
-                // Count the number of cells in this direction in overlap_realbox
-                overlap_box.setSmall( dir, 0 );
-                overlap_box.setBig( dir,
-                                    int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
+        Real wt = amrex::second();
+
+        const Box& tile_box = mfi.tilebox();
+        const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev);
+
+        // Find the cells of part_box that overlap with tile_realbox
+        // If there is no overlap, just go to the next tile in the loop
+        RealBox overlap_realbox;
+        Box overlap_box;
+        IntVect shifted;
+        bool no_overlap = false;
+
+        for (int dir=0; dir<AMREX_SPACEDIM; dir++) {
+            if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) {
+                Real ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] );
+                overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]);
+            } else {
+                no_overlap = true; break;
             }
-            if (no_overlap == 1) {
-                continue; // Go to the next tile
+            if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) {
+                Real ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] );
+                overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]);
+            } else {
+                no_overlap = true; break;
             }
+            // Count the number of cells in this direction in overlap_realbox
+            overlap_box.setSmall( dir, 0 );
+            overlap_box.setBig( dir,
+                int( std::round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))
+                                /dx[dir] )) - 1);
+            shifted[dir] = std::round((overlap_realbox.lo(dir)-problo[dir])/dx[dir]);
+            // shifted is exact in non-moving-window direction.  That's all we care.
+        }
+        if (no_overlap == 1) {
+            continue; // Go to the next tile
+        }
 
-            const int grid_id = mfi.index();
-            const int tile_id = mfi.LocalTileIndex();
-
-            // Loop through the cells of overlap_box and inject
-            // the corresponding particles
-            const auto& overlap_corner = overlap_realbox.lo();
-            for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv))
-            {
-                int fac;
-                if (do_continuous_injection) {
-#if ( AMREX_SPACEDIM == 3 )
-                    Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
-                    Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
-                    Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
-#elif ( AMREX_SPACEDIM == 2 )
-                    Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
-                    Real y = 0;
-                    Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
-#endif
-                    fac = GetRefineFac(x, y, z);
-                } else {
-                    fac = 1.0;
-                }
-
-                int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
-                for (int i_part=0; i_part<ref_num_ppc;i_part++) {
-                    std::array<Real, 3> r;
-                    plasma_injector->getPositionUnitBox(r, i_part, fac);
-#if ( AMREX_SPACEDIM == 3 )
-                    Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
-                    Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
-                    Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
-#elif ( AMREX_SPACEDIM == 2 )
-                    Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
-                    Real y = 0;
-                    Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
-#endif
-                    // If the new particle is not inside the tile box,
-                    // go to the next generated particle.
-#if ( AMREX_SPACEDIM == 3 )
-                    if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
-#elif ( AMREX_SPACEDIM == 2 )
-                    if(!tile_realbox.contains( RealVect{x, z} )) continue;
-#endif
+        const int grid_id = mfi.index();
+        const int tile_id = mfi.LocalTileIndex();
 
-                    // Save the x and y values to use in the insideBounds checks.
-                    // This is needed with WARPX_RZ since x and y are modified.
-                    Real xb = x;
-                    Real yb = y;
-
-#ifdef WARPX_RZ
-                    // Replace the x and y, choosing the angle randomly.
-                    // These x and y are used to get the momentum and density
-                    Real theta = 2.*MathConst::pi*amrex::Random();
-                    y = x*std::sin(theta);
-                    x = x*std::cos(theta);
-#endif
+        // Max number of new particles, if particles are created in the whole
+        // overlap_box. All of them are created, and invalid ones are then 
+        // discaded
+        int max_new_particles = overlap_box.numPts() * num_ppc;
 
-                    Real dens;
-                    std::array<Real, 3> u;
-                    if (WarpX::gamma_boost == 1.){
-                        // Lab-frame simulation
-                        // If the particle is not within the species's
-                        // xmin, xmax, ymin, ymax, zmin, zmax, go to
-                        // the next generated particle.
-                        if (!plasma_injector->insideBounds(xb, yb, z)) continue;
-                        plasma_injector->getMomentum(u, x, y, z);
-                        dens = plasma_injector->getDensity(x, y, z);
-                    } else {
-                        // Boosted-frame simulation
-                        Real c = PhysConst::c;
-                        Real gamma_boost = WarpX::gamma_boost;
-                        Real beta_boost = WarpX::beta_boost;
-                        // Since the user provides the density distribution
-                        // at t_lab=0 and in the lab-frame coordinates,
-                        // we need to find the lab-frame position of this
-                        // particle at t_lab=0, from its boosted-frame coordinates
-                        // Assuming ballistic motion, this is given by:
-                        // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
-                        // where betaz_lab is the speed of the particle in the lab frame
-                        //
-                        // In order for this equation to be solvable, betaz_lab
-                        // is explicitly assumed to have no dependency on z0_lab
-                        plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
-                        // At this point u is the lab-frame momentum
-                        // => Apply the above formula for z0_lab
-                        Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
-                        Real betaz_lab = u[2]/gamma_lab/c;
-                        Real t = WarpX::GetInstance().gett_new(lev);
-                        Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
-                        // If the particle is not within the lab-frame zmin, zmax, etc.
-                        // go to the next generated particle.
-                        if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue;
-                        // call `getDensity` with lab-frame parameters
-                        dens = plasma_injector->getDensity(x, y, z0_lab);
-                        // At this point u and dens are the lab-frame quantities
-                        // => Perform Lorentz transform
-                        dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
-                        u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
-                    }
-                    Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac));
-#ifdef WARPX_RZ
-                    if (plasma_injector->radially_weighted) {
-                        weight *= 2*MathConst::pi*xb;
-                    } else {
-                        // This is not correct since it might shift the particle
-                        // out of the local grid
-                        x = std::sqrt(xb*rmax);
-                        weight *= dx[0];
-                    }
-#endif
-                    attribs[PIdx::w ] = weight;
-                    attribs[PIdx::ux] = u[0];
-                    attribs[PIdx::uy] = u[1];
-                    attribs[PIdx::uz] = u[2];
-                    
-                    if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
-                    {
-                        auto& particle_tile = DefineAndReturnParticleTile(lev, grid_id, tile_id);
-                        particle_tile.push_back_real(particle_comps["xold"], x);
-                        particle_tile.push_back_real(particle_comps["yold"], y);
-                        particle_tile.push_back_real(particle_comps["zold"], z);
-
-                        particle_tile.push_back_real(particle_comps["uxold"], u[0]);
-                        particle_tile.push_back_real(particle_comps["uyold"], u[1]);
-                        particle_tile.push_back_real(particle_comps["uzold"], u[2]);
-                    }
-
-                    AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs);
+        // If refine injection, build pointer dp_cellid that holds pointer to 
+        // array of refined cell IDs.
+        Vector<int> cellid_v;
+        if (refine_injection and lev == 0)
+        {
+            // then how many new particles will be injected is not that simple
+            // We have to shift fine_injection_box because overlap_box has been shifted.
+            Box fine_overlap_box = overlap_box & amrex::shift(fine_injection_box,shifted);
+            max_new_particles += fine_overlap_box.numPts() * num_ppc
+                * (AMREX_D_TERM(rrfac,*rrfac,*rrfac)-1);
+            for (int icell = 0, ncells = overlap_box.numPts(); icell < ncells; ++icell) {
+                IntVect iv = overlap_box.atOffset(icell);
+                int r = (fine_overlap_box.contains(iv)) ? AMREX_D_TERM(rrfac,*rrfac,*rrfac) : 1;
+                for (int ipart = 0; ipart < r; ++ipart) {
+                    cellid_v.push_back(icell);
+                    cellid_v.push_back(ipart);
                 }
             }
+        }
+        int const* hp_cellid = (cellid_v.empty()) ? nullptr : cellid_v.data();
+        amrex::AsyncArray<int> cellid_aa(hp_cellid, cellid_v.size());
+        int const* dp_cellid = cellid_aa.data();
 
-            if (cost) {
-                wt = (amrex::second() - wt) / tile_box.d_numPts();
-                Array4<Real> const& costarr = cost->array(mfi);
-                amrex::ParallelFor(tile_box,
-                                   [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-                                   {
-                                       costarr(i,j,k) += wt;
-                                   });
-            }
+        // Update NextID to include particles created in this function
+        int pid;
+#pragma omp critical (add_plasma_nextid)
+        {
+            pid = ParticleType::NextID();
+            ParticleType::NextID(pid+max_new_particles);
         }
-    }
-}
+        const int cpuid = ParallelDescriptor::MyProc();
 
-#ifdef AMREX_USE_GPU
-void
-PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox)
-{
-    BL_PROFILE("PhysicalParticleContainer::AddPlasmaGPU");
+        auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)];
+        bool do_boosted = false;
+        if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) {
+            do_boosted = true;
+            DefineAndReturnParticleTile(lev, grid_id, tile_id);
+        }
+        auto old_size = particle_tile.GetArrayOfStructs().size();
+        auto new_size = old_size + max_new_particles;
+        particle_tile.resize(new_size);
+
+        ParticleType* pp = particle_tile.GetArrayOfStructs()().data() + old_size;
+        auto& soa = particle_tile.GetStructOfArrays();
+        GpuArray<Real*,PIdx::nattribs> pa;
+        for (int ia = 0; ia < PIdx::nattribs; ++ia) {
+            pa[ia] = soa.GetRealData(ia).data() + old_size;
+        }
+        GpuArray<Real*,6> pb;
+        if (do_boosted) {
+            pb[0] = soa.GetRealData(particle_comps[ "xold"]).data() + old_size;
+            pb[1] = soa.GetRealData(particle_comps[ "yold"]).data() + old_size;
+            pb[2] = soa.GetRealData(particle_comps[ "zold"]).data() + old_size;
+            pb[3] = soa.GetRealData(particle_comps["uxold"]).data() + old_size;
+            pb[4] = soa.GetRealData(particle_comps["uyold"]).data() + old_size;
+            pb[5] = soa.GetRealData(particle_comps["uzold"]).data() + old_size;
+        }
 
-    // If no part_realbox is provided, initialize particles in the whole domain
-    const Geometry& geom = Geom(lev);
-    if (!part_realbox.ok()) part_realbox = geom.ProbDomain();
+        const GpuArray<Real,AMREX_SPACEDIM> overlap_corner
+            {AMREX_D_DECL(overlap_realbox.lo(0),
+                          overlap_realbox.lo(1),
+                          overlap_realbox.lo(2))};
 
-    int num_ppc = plasma_injector->num_particles_per_cell;
-#ifdef WARPX_RZ
-    Real rmax = std::min(plasma_injector->xmax, part_realbox.hi(0));
-#endif
+        std::size_t shared_mem_bytes = plasma_injector->sharedMemoryNeeded();
+        int lrrfac = rrfac;
 
-    const Real* dx = geom.CellSize();
+        // Loop over all new particles and inject them (creates too many 
+        // particles, in particular does not consider xmin, xmax etc.).
+        // The invalid ones are given negative ID and are deleted during the 
+        // next redistribute.
+        amrex::For(max_new_particles, [=] AMREX_GPU_DEVICE (int ip) noexcept
+        {
+            ParticleType& p = pp[ip];
+            p.id() = pid+ip;
+            p.cpu() = cpuid;
+
+            int cellid, i_part;
+            Real fac;
+            if (dp_cellid == nullptr) {
+                cellid = ip/num_ppc;
+                i_part = ip - cellid*num_ppc;
+                fac = 1.0;
+            } else {
+                cellid = dp_cellid[2*ip];
+                i_part = dp_cellid[2*ip+1];
+                fac = lrrfac;
+            }
 
-    Real scale_fac;
-#if AMREX_SPACEDIM==3
-    scale_fac = dx[0]*dx[1]*dx[2]/num_ppc;
-#elif AMREX_SPACEDIM==2
-    scale_fac = dx[0]*dx[1]/num_ppc;
-#endif
+            IntVect iv = overlap_box.atOffset(cellid);
 
-#ifdef _OPENMP
-    // First touch all tiles in the map in serial
-    for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) {
-        const int grid_id = mfi.index();
-        const int tile_id = mfi.LocalTileIndex();
-        GetParticles(lev)[std::make_pair(grid_id, tile_id)];
-    }
+            const XDim3 r = inj_pos->getPositionUnitBox(i_part, fac);
+#if (AMREX_SPACEDIM == 3)
+            Real x = overlap_corner[0] + (iv[0]+r.x)*dx[0];
+            Real y = overlap_corner[1] + (iv[1]+r.y)*dx[1];
+            Real z = overlap_corner[2] + (iv[2]+r.z)*dx[2];
+#else
+            Real x = overlap_corner[0] + (iv[0]+r.x)*dx[0];
+            Real y = 0.0;
+            Real z = overlap_corner[1] + (iv[1]+r.y)*dx[1];
 #endif
 
-    MultiFab* cost = WarpX::getCosts(lev);
-
-    if ( (not m_refined_injection_mask) and WarpX::do_moving_window)
-    {
-        Box mask_box = geom.Domain();
-        mask_box.setSmall(WarpX::moving_window_dir, 0);
-        mask_box.setBig(WarpX::moving_window_dir, 0);
-        m_refined_injection_mask.reset( new IArrayBox(mask_box));
-        m_refined_injection_mask->setVal(-1);
-    }
-
-    MFItInfo info;
-    if (do_tiling) {
-        info.EnableTiling(tile_size);
-    }
-    info.SetDynamic(true);
-
-#ifdef _OPENMP
-#pragma omp parallel if (not WarpX::serialize_ics)
+#if (AMREX_SPACEDIM == 3)
+            if (!tile_realbox.contains(XDim3{x,y,z})) {
+                p.id() = -1;
+                return;
+            }
+#else
+            if (!tile_realbox.contains(XDim3{x,z,0.0})) {
+                p.id() = -1;
+                return;
+            }
 #endif
-    {
-        std::array<Real,PIdx::nattribs> attribs;
-        attribs.fill(0.0);
-
-        // Loop through the tiles
-        for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) {
 
-            Real wt = amrex::second();
-
-            const Box& tile_box = mfi.tilebox();
-            const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev);
-
-            // Find the cells of part_box that overlap with tile_realbox
-            // If there is no overlap, just go to the next tile in the loop
-            RealBox overlap_realbox;
-            Box overlap_box;
-            Real ncells_adjust;
-            bool no_overlap = 0;
+            // Save the x and y values to use in the insideBounds checks.
+            // This is needed with WARPX_DIM_RZ since x and y are modified.
+            Real xb = x;
+            Real yb = y;
+
+#ifdef WARPX_DIM_RZ
+            // Replace the x and y, choosing the angle randomly.
+            // These x and y are used to get the momentum and density
+            Real theta = 2.*MathConst::pi*amrex::Random();
+            x = xb*std::cos(theta);
+            y = xb*std::sin(theta);
+#endif
 
-            for (int dir=0; dir<AMREX_SPACEDIM; dir++) {
-                if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) {
-                    ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] );
-                    overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]);
-                } else {
-                    no_overlap = 1; break;
+            Real dens;
+            XDim3 u;
+            if (gamma_boost == 1.) {
+                // Lab-frame simulation
+                // If the particle is not within the species's
+                // xmin, xmax, ymin, ymax, zmin, zmax, go to
+                // the next generated particle.
+                if (!inj_pos->insideBounds(xb, yb, z)) {
+                    p.id() = -1;
+                    return;
                 }
-                if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) {
-                    ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] );
-                    overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]);
-                } else {
-                    no_overlap = 1; break;
+                u = inj_mom->getMomentum(x, y, z);
+                dens = inj_rho->getDensity(x, y, z);
+                // Remove particle if density below threshold
+                if ( dens < density_min ){
+                    p.id() = -1;
+                    return;
                 }
-                // Count the number of cells in this direction in overlap_realbox
-                overlap_box.setSmall( dir, 0 );
-                overlap_box.setBig( dir,
-                                    int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
-            }
-            if (no_overlap == 1) {
-                continue; // Go to the next tile
-            }
-
-            const int grid_id = mfi.index();
-            const int tile_id = mfi.LocalTileIndex();
-
-            Cuda::HostVector<ParticleType> host_particles;
-            std::array<Cuda::HostVector<Real>, PIdx::nattribs> host_attribs;
-	    
-            // Loop through the cells of overlap_box and inject
-            // the corresponding particles
-            const auto& overlap_corner = overlap_realbox.lo();
-            for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv))
-            {
-                int fac;
-                if (do_continuous_injection) {
-#if ( AMREX_SPACEDIM == 3 )
-                    Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
-                    Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
-                    Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
-#elif ( AMREX_SPACEDIM == 2 )
-                    Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
-                    Real y = 0;
-                    Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
-#endif
-                    fac = GetRefineFac(x, y, z);
-                } else {
-                    fac = 1.0;
+                // Cut density if above threshold
+                dens = amrex::min(dens, density_max);
+            } else {
+                // Boosted-frame simulation
+                // Since the user provides the density distribution
+                // at t_lab=0 and in the lab-frame coordinates,
+                // we need to find the lab-frame position of this
+                // particle at t_lab=0, from its boosted-frame coordinates
+                // Assuming ballistic motion, this is given by:
+                // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
+                // where betaz_lab is the speed of the particle in the lab frame
+                //
+                // In order for this equation to be solvable, betaz_lab
+                // is explicitly assumed to have no dependency on z0_lab
+                u = inj_mom->getMomentum(x, y, 0.); // No z0_lab dependency
+                // At this point u is the lab-frame momentum
+                // => Apply the above formula for z0_lab
+                Real gamma_lab = std::sqrt( 1.+(u.x*u.x+u.y*u.y+u.z*u.z) );
+                Real betaz_lab = u.z/(gamma_lab);
+                Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab)
+                                              - PhysConst::c*t*(betaz_lab-beta_boost) );
+                // If the particle is not within the lab-frame zmin, zmax, etc.
+                // go to the next generated particle.
+                if (!inj_pos->insideBounds(xb, yb, z0_lab)) {
+                    p.id() = -1;
+                    return;
                 }
+                // call `getDensity` with lab-frame parameters
+                dens = inj_rho->getDensity(x, y, z0_lab);
+                // Remove particle if density below threshold
+                if ( dens < density_min ){
+                    p.id() = -1;
+                    return;
+                }
+                // Cut density if above threshold
+                dens = amrex::min(dens, density_max);
+                // At this point u and dens are the lab-frame quantities
+                // => Perform Lorentz transform
+                dens = gamma_boost * dens * ( 1.0 - beta_boost*betaz_lab );
+                u.z = gamma_boost * ( u.z -beta_boost*gamma_lab );
+            }
 
-                int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
-                for (int i_part=0; i_part<ref_num_ppc;i_part++) {
-                    std::array<Real, 3> r;
-                    plasma_injector->getPositionUnitBox(r, i_part, fac);
-#if ( AMREX_SPACEDIM == 3 )
-                    Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
-                    Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
-                    Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
-#elif ( AMREX_SPACEDIM == 2 )
-                    Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
-                    Real y = 0;
-                    Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
-#endif
-                    // If the new particle is not inside the tile box,
-                    // go to the next generated particle.
-#if ( AMREX_SPACEDIM == 3 )
-                    if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
-#elif ( AMREX_SPACEDIM == 2 )
-                    if(!tile_realbox.contains( RealVect{x, z} )) continue;
-#endif
-
-                    // Save the x and y values to use in the insideBounds checks.
-                    // This is needed with WARPX_RZ since x and y are modified.
-                    Real xb = x;
-                    Real yb = y;
-
-#ifdef WARPX_RZ
-                    // Replace the x and y, choosing the angle randomly.
-                    // These x and y are used to get the momentum and density
-                    Real theta = 2.*MathConst::pi*amrex::Random();
-                    x = xb*std::cos(theta);
-                    y = xb*std::sin(theta);
-#endif
+            u.x *= PhysConst::c;
+            u.y *= PhysConst::c;
+            u.z *= PhysConst::c;
 
-                    Real dens;
-                    std::array<Real, 3> u;
-                    if (WarpX::gamma_boost == 1.){
-                        // Lab-frame simulation
-                        // If the particle is not within the species's
-                        // xmin, xmax, ymin, ymax, zmin, zmax, go to
-                        // the next generated particle.
-                        if (!plasma_injector->insideBounds(xb, yb, z)) continue;
-                        plasma_injector->getMomentum(u, x, y, z);
-                        dens = plasma_injector->getDensity(x, y, z);
-                    } else {
-                        // Boosted-frame simulation
-                        Real c = PhysConst::c;
-                        Real gamma_boost = WarpX::gamma_boost;
-                        Real beta_boost = WarpX::beta_boost;
-                        // Since the user provides the density distribution
-                        // at t_lab=0 and in the lab-frame coordinates,
-                        // we need to find the lab-frame position of this
-                        // particle at t_lab=0, from its boosted-frame coordinates
-                        // Assuming ballistic motion, this is given by:
-                        // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
-                        // where betaz_lab is the speed of the particle in the lab frame
-                        //
-                        // In order for this equation to be solvable, betaz_lab
-                        // is explicitly assumed to have no dependency on z0_lab
-                        plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
-                        // At this point u is the lab-frame momentum
-                        // => Apply the above formula for z0_lab
-                        Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
-                        Real betaz_lab = u[2]/gamma_lab/c;
-                        Real t = WarpX::GetInstance().gett_new(lev);
-                        Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
-                        // If the particle is not within the lab-frame zmin, zmax, etc.
-                        // go to the next generated particle.
-                        if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue;
-                        // call `getDensity` with lab-frame parameters
-                        dens = plasma_injector->getDensity(x, y, z0_lab);
-                        // At this point u and dens are the lab-frame quantities
-                        // => Perform Lorentz transform
-                        dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
-                        u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
-                    }
-                    Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac));
-#ifdef WARPX_RZ
-                    if (plasma_injector->radially_weighted) {
-                        weight *= 2*MathConst::pi*xb;
-                    } else {
-                        // This is not correct since it might shift the particle
-                        // out of the local grid
-                        x = std::sqrt(xb*rmax);
-                        weight *= dx[0];
-                    }
+            // Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac));
+            Real weight = dens * scale_fac;
+#ifdef WARPX_DIM_RZ
+            if (radially_weighted) {
+                weight *= 2.*MathConst::pi*xb;
+            } else {
+                // This is not correct since it might shift the particle
+                // out of the local grid
+                x = std::sqrt(xb*rmax);
+                weight *= dx[0];
+            }
 #endif
-                    attribs[PIdx::w ] = weight;
-                    attribs[PIdx::ux] = u[0];
-                    attribs[PIdx::uy] = u[1];
-                    attribs[PIdx::uz] = u[2];
-
-                    // note - this will be slow on the GPU, need to revisit
-                    if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
-                    {
-                        auto& particle_tile = DefineAndReturnParticleTile(lev, grid_id, tile_id);
-                        particle_tile.push_back_real(particle_comps["xold"], x);
-                        particle_tile.push_back_real(particle_comps["yold"], y);
-                        particle_tile.push_back_real(particle_comps["zold"], z);
-
-                        particle_tile.push_back_real(particle_comps["uxold"], u[0]);
-                        particle_tile.push_back_real(particle_comps["uyold"], u[1]);
-                        particle_tile.push_back_real(particle_comps["uzold"], u[2]);
-                    }
+            pa[PIdx::w ][ip] = weight;
+            pa[PIdx::ux][ip] = u.x;
+            pa[PIdx::uy][ip] = u.y;
+            pa[PIdx::uz][ip] = u.z;
+
+            if (do_boosted) {
+                pb[0][ip] = x;
+                pb[1][ip] = y;
+                pb[2][ip] = z;
+                pb[3][ip] = u.x;
+                pb[4][ip] = u.y;
+                pb[5][ip] = u.z;
+            }
 
-                    ParticleType p;
-                    p.id()  = ParticleType::NextID();
-                    p.cpu() = ParallelDescriptor::MyProc();
 #if (AMREX_SPACEDIM == 3)
-                    p.pos(0) = x;
-                    p.pos(1) = y;
-                    p.pos(2) = z;
+            p.pos(0) = x;
+            p.pos(1) = y;
+            p.pos(2) = z;
 #elif (AMREX_SPACEDIM == 2)
-#ifdef WARPX_RZ
-                    attribs[PIdx::theta] = theta;
+#ifdef WARPX_DIM_RZ
+            pa[PIdx::theta][ip] = theta;
 #endif
-                    p.pos(0) = xb;
-                    p.pos(1) = z;
+            p.pos(0) = xb;
+            p.pos(1) = z;
 #endif
-
-                    host_particles.push_back(p);
-                    for (int kk = 0; kk < PIdx::nattribs; ++kk)
-                        host_attribs[kk].push_back(attribs[kk]);
-                }
-            }
-
-            auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)];
-            auto old_size = particle_tile.GetArrayOfStructs().size();
-            auto new_size = old_size + host_particles.size();
-            particle_tile.resize(new_size);
-
-            Cuda::thrust_copy(host_particles.begin(),
-                              host_particles.end(),
-                              particle_tile.GetArrayOfStructs().begin() + old_size);
-
-            for (int kk = 0; kk < PIdx::nattribs; ++kk) {
-                Cuda::thrust_copy(host_attribs[kk].begin(),
-                                  host_attribs[kk].end(),
-                                  particle_tile.GetStructOfArrays().GetRealData(kk).begin() + old_size);
-            }
-	    			 
-            if (cost) {
-                wt = (amrex::second() - wt) / tile_box.d_numPts();
-                Array4<Real> const& costarr = cost->array(mfi);
-                amrex::ParallelFor(tile_box,
-                                   [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-                                   {
-                                       costarr(i,j,k) += wt;
-                                   });
-            }
-        }		
+        }, shared_mem_bytes);
+    			 
+        if (cost) {
+            wt = (amrex::second() - wt) / tile_box.d_numPts();
+            Array4<Real> const& costarr = cost->array(mfi);
+            amrex::ParallelFor(tile_box,
+            [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+            {
+                costarr(i,j,k) += wt;
+            });
+        }
     }
+
+    // The function that calls this is responsible for redistributing particles.
 }
-#endif
 
 #ifdef WARPX_DO_ELECTROSTATIC
 void
@@ -1066,11 +848,14 @@ PhysicalParticleContainer::FieldGather (int lev,
     MultiFab* cost = WarpX::getCosts(lev);
 
 #ifdef _OPENMP
-#pragma omp parallel
+#pragma omp parallel 
 #endif
     {
-        Cuda::ManagedDeviceVector<Real> xp, yp, zp;
-
+#ifdef _OPENMP
+        int thread_num = omp_get_thread_num();
+#else
+        int thread_num = 0;
+#endif
         for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
         {
             Real wt = amrex::second();
@@ -1106,35 +891,15 @@ PhysicalParticleContainer::FieldGather (int lev,
             //
             // copy data from particle container to temp arrays
             //
-            pti.GetPosition(xp, yp, zp);
-
-            const std::array<Real,3>& xyzmin = WarpX::LowerCorner(box, lev);
-            const int* ixyzmin = box.loVect();
+            pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
 
             //
             // Field Gather
             //
-            const int ll4symtry          = false;
-            long lvect_fieldgathe = 64;
-            warpx_geteb_energy_conserving(
-                &np,
-                xp.dataPtr(),
-                yp.dataPtr(),
-                zp.dataPtr(),
-                Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
-                Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
-                ixyzmin,
-                &xyzmin[0], &xyzmin[1], &xyzmin[2],
-                &dx[0], &dx[1], &dx[2],
-                &WarpX::nox, &WarpX::noy, &WarpX::noz,
-                BL_TO_FORTRAN_ANYD(exfab),
-                BL_TO_FORTRAN_ANYD(eyfab),
-                BL_TO_FORTRAN_ANYD(ezfab),
-                BL_TO_FORTRAN_ANYD(bxfab),
-                BL_TO_FORTRAN_ANYD(byfab),
-                BL_TO_FORTRAN_ANYD(bzfab),
-                &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
-                &lvect_fieldgathe, &WarpX::field_gathering_algo);
+            int e_is_nodal = Ex.is_nodal() and Ey.is_nodal() and Ez.is_nodal();
+            FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp,
+                        &exfab, &eyfab, &ezfab, &bxfab, &byfab, &bzfab, 
+                        Ex.nGrow(), e_is_nodal, 0, np, thread_num, lev, lev);
 
             if (cost) {
                 const Box& tbx = pti.tilebox();
@@ -1164,7 +929,7 @@ PhysicalParticleContainer::Evolve (int lev,
     BL_PROFILE("PPC::Evolve()");
     BL_PROFILE_VAR_NS("PPC::Evolve::Copy", blp_copy);
     BL_PROFILE_VAR_NS("PICSAR::FieldGather", blp_pxr_fg);
-    BL_PROFILE_VAR_NS("PICSAR::ParticlePush", blp_pxr_pp);
+    BL_PROFILE_VAR_NS("PPC::ParticlePush", blp_ppc_pp);
     BL_PROFILE_VAR_NS("PPC::Evolve::partition", blp_partition);
     
     const std::array<Real,3>& dx = WarpX::CellSize(lev);
@@ -1391,57 +1156,40 @@ PhysicalParticleContainer::Evolve (int lev,
             pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
             BL_PROFILE_VAR_STOP(blp_copy);
 
-            if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev);
+            if (rho) {
+                DepositCharge(pti, wp, rho, 0, 0, np_current, thread_num, lev, lev);
+                if (has_buffer){
+                    DepositCharge(pti, wp, crho, 0, np_current, np-np_current, thread_num, lev, lev-1);
+                }
+            }
             
             if (! do_not_push)
             {
+                const long np_gather = (cEx) ? nfine_gather : np;
+
+                int e_is_nodal = Ex.is_nodal() and Ey.is_nodal() and Ez.is_nodal();
+
                 //
                 // Field Gather of Aux Data (i.e., the full solution)
                 //
-                const int ll4symtry          = false;
-                long lvect_fieldgathe = 64;
-
-                const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
-                const int* ixyzmin_grid = box.loVect();
-                
-                const long np_gather = (cEx) ? nfine_gather : np;
-
                 BL_PROFILE_VAR_START(blp_pxr_fg);
-
-                warpx_geteb_energy_conserving(
-                    &np_gather,
-                    m_xp[thread_num].dataPtr(),
-                    m_yp[thread_num].dataPtr(),
-                    m_zp[thread_num].dataPtr(),
-                    Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
-                    Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
-                    ixyzmin_grid,
-                    &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2],
-                    &dx[0], &dx[1], &dx[2],
-                    &WarpX::nox, &WarpX::noy, &WarpX::noz,
-                    BL_TO_FORTRAN_ANYD(*exfab),
-                    BL_TO_FORTRAN_ANYD(*eyfab),
-                    BL_TO_FORTRAN_ANYD(*ezfab),
-                    BL_TO_FORTRAN_ANYD(*bxfab),
-                    BL_TO_FORTRAN_ANYD(*byfab),
-                    BL_TO_FORTRAN_ANYD(*bzfab),
-                    &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
-                    &lvect_fieldgathe, &WarpX::field_gathering_algo);
+                FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp,
+                            exfab, eyfab, ezfab, bxfab, byfab, bzfab, 
+                            Ex.nGrow(), e_is_nodal, 0, np_gather, thread_num, lev, lev);
 
                 if (np_gather < np)
                 {
                     const IntVect& ref_ratio = WarpX::RefRatio(lev-1);
                     const Box& cbox = amrex::coarsen(box,ref_ratio);
-                    const std::array<Real,3>& cxyzmin_grid = WarpX::LowerCorner(cbox, lev-1);
-                    const int* cixyzmin_grid = cbox.loVect();
-
-                    const FArrayBox* cexfab = &(*cEx)[pti];
-                    const FArrayBox* ceyfab = &(*cEy)[pti];
-                    const FArrayBox* cezfab = &(*cEz)[pti];
-                    const FArrayBox* cbxfab = &(*cBx)[pti];
-                    const FArrayBox* cbyfab = &(*cBy)[pti];
-                    const FArrayBox* cbzfab = &(*cBz)[pti];
 
+                    // Data on the grid
+                    FArrayBox const* cexfab = &(*cEx)[pti];
+                    FArrayBox const* ceyfab = &(*cEy)[pti];
+                    FArrayBox const* cezfab = &(*cEz)[pti];
+                    FArrayBox const* cbxfab = &(*cBx)[pti];
+                    FArrayBox const* cbyfab = &(*cBy)[pti];
+                    FArrayBox const* cbzfab = &(*cBz)[pti];
+                    
                     if (WarpX::use_fdtd_nci_corr)
                     {
 #if (AMREX_SPACEDIM == 2)
@@ -1494,26 +1242,14 @@ PhysicalParticleContainer::Evolve (int lev,
 #endif
                     }
                     
-                    long ncrse = np - nfine_gather;
-                    warpx_geteb_energy_conserving(
-                        &ncrse,
-                        m_xp[thread_num].dataPtr()+nfine_gather,
-                        m_yp[thread_num].dataPtr()+nfine_gather,
-                        m_zp[thread_num].dataPtr()+nfine_gather,
-                        Exp.dataPtr()+nfine_gather, Eyp.dataPtr()+nfine_gather, Ezp.dataPtr()+nfine_gather,
-                        Bxp.dataPtr()+nfine_gather, Byp.dataPtr()+nfine_gather, Bzp.dataPtr()+nfine_gather,
-                        cixyzmin_grid,
-                        &cxyzmin_grid[0], &cxyzmin_grid[1], &cxyzmin_grid[2],
-                        &cdx[0], &cdx[1], &cdx[2],
-                        &WarpX::nox, &WarpX::noy, &WarpX::noz,
-                        BL_TO_FORTRAN_ANYD(*cexfab),
-                        BL_TO_FORTRAN_ANYD(*ceyfab),
-                        BL_TO_FORTRAN_ANYD(*cezfab),
-                        BL_TO_FORTRAN_ANYD(*cbxfab),
-                        BL_TO_FORTRAN_ANYD(*cbyfab),
-                        BL_TO_FORTRAN_ANYD(*cbzfab),
-                        &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
-                        &lvect_fieldgathe, &WarpX::field_gathering_algo);
+                    // Field gather for particles in gather buffers
+                    e_is_nodal = cEx->is_nodal() and cEy->is_nodal() and cEz->is_nodal();
+                    FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp, 
+                                cexfab, ceyfab, cezfab,
+                                cbxfab, cbyfab, cbzfab,
+                                cEx->nGrow(), e_is_nodal, 
+                                nfine_gather, np-nfine_gather, 
+                                thread_num, lev, lev-1);
                 }
 
                 BL_PROFILE_VAR_STOP(blp_pxr_fg);
@@ -1521,10 +1257,10 @@ PhysicalParticleContainer::Evolve (int lev,
                 //
                 // Particle Push
                 //
-                BL_PROFILE_VAR_START(blp_pxr_pp);
+                BL_PROFILE_VAR_START(blp_ppc_pp);
                 PushPX(pti, m_xp[thread_num], m_yp[thread_num], m_zp[thread_num], 
                        m_giv[thread_num], dt);
-                BL_PROFILE_VAR_STOP(blp_pxr_pp);
+                BL_PROFILE_VAR_STOP(blp_ppc_pp);
 
                 //
                 // Current Deposition
@@ -1561,7 +1297,12 @@ PhysicalParticleContainer::Evolve (int lev,
                 BL_PROFILE_VAR_STOP(blp_copy);
             }
             
-            if (rho) DepositCharge(pti, wp, rho, crho, 1, np_current, np, thread_num, lev);
+            if (rho) {
+                DepositCharge(pti, wp, rho, 1, 0, np_current, thread_num, lev, lev);
+                if (has_buffer){
+                    DepositCharge(pti, wp, crho, 1, np_current, np-np_current, thread_num, lev, lev-1);
+                }
+            }
 
             if (cost) {
                 const Box& tbx = pti.tilebox();
@@ -1742,36 +1483,52 @@ PhysicalParticleContainer::PushPX(WarpXParIter& pti,
                                   Real dt)
 {
 
+    // This wraps the momentum and position advance so that inheritors can modify the call.
+    auto& attribs = pti.GetAttribs();
+    // Extract pointers to the different particle quantities
+    Real* const AMREX_RESTRICT x = xp.dataPtr();
+    Real* const AMREX_RESTRICT y = yp.dataPtr();
+    Real* const AMREX_RESTRICT z = zp.dataPtr();
+    Real* const AMREX_RESTRICT gi = giv.dataPtr();
+    Real* const AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
+    Real* const AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
+    Real* const AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
+    const Real* const AMREX_RESTRICT Ex = attribs[PIdx::Ex].dataPtr();
+    const Real* const AMREX_RESTRICT Ey = attribs[PIdx::Ey].dataPtr();
+    const Real* const AMREX_RESTRICT Ez = attribs[PIdx::Ez].dataPtr();
+    const Real* const AMREX_RESTRICT Bx = attribs[PIdx::Bx].dataPtr();
+    const Real* const AMREX_RESTRICT By = attribs[PIdx::By].dataPtr();
+    const Real* const AMREX_RESTRICT Bz = attribs[PIdx::Bz].dataPtr();
+
     if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
     {
-        copy_attribs(pti, xp.dataPtr(), yp.dataPtr(), zp.dataPtr());
+        copy_attribs(pti, x, y, z);
     }
 
-    // The following attributes should be included in CPP version of warpx_particle_pusher
-	// This wraps the call to warpx_particle_pusher so that inheritors can modify the call.
-    auto& attribs = pti.GetAttribs();
-    auto& uxp = attribs[PIdx::ux];
-    auto& uyp = attribs[PIdx::uy];
-    auto& uzp = attribs[PIdx::uz];
-    auto& Exp = attribs[PIdx::Ex];
-    auto& Eyp = attribs[PIdx::Ey];
-    auto& Ezp = attribs[PIdx::Ez];
-    auto& Bxp = attribs[PIdx::Bx];
-    auto& Byp = attribs[PIdx::By];
-    auto& Bzp = attribs[PIdx::Bz];
-    const long np  = pti.numParticles();
-    
-    warpx_particle_pusher(&np,
-                          xp.dataPtr(),
-                          yp.dataPtr(),
-                          zp.dataPtr(),
-                          uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
-                          giv.dataPtr(),
-                          Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
-                          Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
-                          &this->charge, &this->mass, &dt,
-                          &WarpX::particle_pusher_algo);
-
+    // Loop over the particles and update their momentum
+    const Real q = this->charge;
+    const Real m = this-> mass;
+    if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Boris){
+        amrex::ParallelFor( pti.numParticles(),
+            [=] AMREX_GPU_DEVICE (long i) {
+                UpdateMomentumBoris( ux[i], uy[i], uz[i], gi[i],
+                      Ex[i], Ey[i], Ez[i], Bx[i], By[i], Bz[i], q, m, dt);
+                UpdatePosition( x[i], y[i], z[i],
+                      ux[i], uy[i], uz[i], dt );
+            }
+        );
+    } else if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Vay) {
+        amrex::ParallelFor( pti.numParticles(),
+            [=] AMREX_GPU_DEVICE (long i) {
+                UpdateMomentumVay( ux[i], uy[i], uz[i], gi[i],
+                      Ex[i], Ey[i], Ez[i], Bx[i], By[i], Bz[i], q, m, dt);
+                UpdatePosition( x[i], y[i], z[i],
+                      ux[i], uy[i], uz[i], dt );
+            }
+        );
+    } else {
+      amrex::Abort("Unknown particle pusher");
+    };
 }
 
 void
@@ -1800,9 +1557,6 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
 
             auto& attribs = pti.GetAttribs();
 
-            auto& uxp = attribs[PIdx::ux];
-            auto& uyp = attribs[PIdx::uy];
-            auto& uzp = attribs[PIdx::uz];
             auto& Exp = attribs[PIdx::Ex];
             auto& Eyp = attribs[PIdx::Ey];
             auto& Ezp = attribs[PIdx::Ez];
@@ -1834,42 +1588,44 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
             //
             pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
 
-            const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
-            const int* ixyzmin_grid = box.loVect();
-
-            const int ll4symtry          = false;
-            long lvect_fieldgathe = 64;
-
-            warpx_geteb_energy_conserving(
-                &np,
-                m_xp[thread_num].dataPtr(),
-                m_yp[thread_num].dataPtr(),
-                m_zp[thread_num].dataPtr(),
-                Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
-                Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
-                ixyzmin_grid,
-                &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2],
-                &dx[0], &dx[1], &dx[2],
-                &WarpX::nox, &WarpX::noy, &WarpX::noz,
-                BL_TO_FORTRAN_ANYD(exfab),
-                BL_TO_FORTRAN_ANYD(eyfab),
-                BL_TO_FORTRAN_ANYD(ezfab),
-                BL_TO_FORTRAN_ANYD(bxfab),
-                BL_TO_FORTRAN_ANYD(byfab),
-                BL_TO_FORTRAN_ANYD(bzfab),
-                &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
-                &lvect_fieldgathe, &WarpX::field_gathering_algo);
-
-            warpx_particle_pusher_momenta(&np,
-                                          m_xp[thread_num].dataPtr(),
-                                          m_yp[thread_num].dataPtr(),
-                                          m_zp[thread_num].dataPtr(),
-                                          uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
-                                          m_giv[thread_num].dataPtr(),
-                                          Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
-                                          Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
-                                          &this->charge, &this->mass, &dt,
-                                          &WarpX::particle_pusher_algo);
+            int e_is_nodal = Ex.is_nodal() and Ey.is_nodal() and Ez.is_nodal();
+            FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp,
+                        &exfab, &eyfab, &ezfab, &bxfab, &byfab, &bzfab, 
+                        Ex.nGrow(), e_is_nodal, 0, np, thread_num, lev, lev);
+
+            // This wraps the momentum advance so that inheritors can modify the call.
+            // Extract pointers to the different particle quantities
+            Real* const AMREX_RESTRICT gi = m_giv[thread_num].dataPtr();
+            Real* const AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
+            Real* const AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
+            Real* const AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
+            const Real* const AMREX_RESTRICT Expp = Exp.dataPtr();
+            const Real* const AMREX_RESTRICT Eypp = Eyp.dataPtr();
+            const Real* const AMREX_RESTRICT Ezpp = Ezp.dataPtr();
+            const Real* const AMREX_RESTRICT Bxpp = Bxp.dataPtr();
+            const Real* const AMREX_RESTRICT Bypp = Byp.dataPtr();
+            const Real* const AMREX_RESTRICT Bzpp = Bzp.dataPtr();
+
+            // Loop over the particles and update their momentum
+            const Real q = this->charge;
+            const Real m = this-> mass;
+            if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Boris){
+                amrex::ParallelFor( pti.numParticles(),
+                    [=] AMREX_GPU_DEVICE (long i) {
+                        UpdateMomentumBoris( ux[i], uy[i], uz[i], gi[i],
+                              Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt);
+                    }
+                );
+            } else if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Vay) {
+                amrex::ParallelFor( pti.numParticles(),
+                    [=] AMREX_GPU_DEVICE (long i) {
+                        UpdateMomentumVay( ux[i], uy[i], uz[i], gi[i],
+                              Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt);
+                    }
+                );
+            } else {
+              amrex::Abort("Unknown particle pusher");
+            };
         }
     }
 }
@@ -2034,74 +1790,6 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real
     }
 }
 
-int PhysicalParticleContainer::GetRefineFac(const Real x, const Real y, const Real z)
-{
-    if (finestLevel() == 0) return 1;
-    if (not WarpX::refine_plasma) return 1;
-
-    IntVect iv;
-    const Geometry& geom = Geom(0);
-
-    std::array<Real, 3> offset;
-
-#if ( AMREX_SPACEDIM == 3)
-    offset[0] = geom.ProbLo(0);
-    offset[1] = geom.ProbLo(1);
-    offset[2] = geom.ProbLo(2);
-#elif ( AMREX_SPACEDIM == 2 )
-    offset[0] = geom.ProbLo(0);
-    offset[1] = 0.0;
-    offset[2] = geom.ProbLo(1);
-#endif
-
-    AMREX_D_TERM(iv[0]=static_cast<int>(floor((x-offset[0])*geom.InvCellSize(0)));,
-                 iv[1]=static_cast<int>(floor((y-offset[1])*geom.InvCellSize(1)));,
-                 iv[2]=static_cast<int>(floor((z-offset[2])*geom.InvCellSize(2))););
-
-    iv += geom.Domain().smallEnd();
-
-    const int dir = WarpX::moving_window_dir;
-
-    IntVect iv2 = iv;
-    iv2[dir] = 0;
-
-    if ( (*m_refined_injection_mask)(iv2) != -1) return (*m_refined_injection_mask)(iv2);
-
-    int ref_fac = 1;
-    for (int lev = 0; lev < finestLevel(); ++lev)
-    {
-        const IntVect rr = m_gdb->refRatio(lev);
-        const BoxArray& fine_ba = this->ParticleBoxArray(lev+1);
-        const int num_boxes = fine_ba.size();
-        Vector<Box> stretched_boxes;
-        const int safety_factor = 4;
-        for (int i = 0; i < num_boxes; ++i)
-        {
-            Box bx = fine_ba[i];
-            bx.coarsen(ref_fac*rr[dir]);
-            bx.setSmall(dir, std::numeric_limits<int>::min()/safety_factor);
-            bx.setBig(dir, std::numeric_limits<int>::max()/safety_factor);
-            stretched_boxes.push_back(bx);
-        }
-
-        BoxArray stretched_ba(stretched_boxes.dataPtr(), stretched_boxes.size());
-
-        const int num_ghost = 0;
-        if ( stretched_ba.intersects(Box(iv, iv), num_ghost) )
-        {
-            ref_fac *= rr[dir];
-        }
-        else
-        {
-            break;
-        }
-    }
-
-    (*m_refined_injection_mask)(iv2) = ref_fac;
-
-    return ref_fac;
-}
-
 /* \brief Inject particles during the simulation
  * \param injection_box: domain where particles should be injected.
  */
@@ -2112,3 +1800,134 @@ PhysicalParticleContainer::ContinuousInjection(const RealBox& injection_box)
     const int lev=0;
     AddPlasma(lev, injection_box);
 }
+
+/* \brief Gather fields from FArrayBox exfab, eyfab, ezfab, bxfab, byfab, 
+ * bzfab into arrays of fields on particles Exp, Eyp, Ezp, Bxp, Byp, Bzp.
+ * \param Exp-Bzp: fields on particles.
+ * \param exfab-bzfab: FAB of electric and magnetic fields for particles in pti
+ * \param ngE: number of guard cells for E
+ * \param e_is_nodal: 0 if E is staggered, 1 if E is nodal
+ * \param offset: index of first particle for which fields are gathered
+ * \param np_to_gather: number of particles onto which fields are gathered
+ * \param thread_num: if using OpenMP, thread number
+ * \param lev: level on which particles are located
+ * \param gather_lev: level from which particles gather fields (lev-1) for 
+          particles in buffers.
+ */
+void
+PhysicalParticleContainer::FieldGather (WarpXParIter& pti,
+                                        RealVector& Exp,
+                                        RealVector& Eyp,
+                                        RealVector& Ezp,
+                                        RealVector& Bxp,
+                                        RealVector& Byp,
+                                        RealVector& Bzp,
+                                        FArrayBox const * exfab,
+                                        FArrayBox const * eyfab,
+                                        FArrayBox const * ezfab,
+                                        FArrayBox const * bxfab,
+                                        FArrayBox const * byfab,
+                                        FArrayBox const * bzfab,
+                                        const int ngE, const int e_is_nodal,
+                                        const long offset,
+                                        const long np_to_gather,
+                                        int thread_num,
+                                        int lev,
+                                        int gather_lev)
+{
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE((gather_lev==(lev-1)) ||
+                                     (gather_lev==(lev  )),
+                                     "Gather buffers only work for lev-1");
+    
+    // If no particles, do not do anything
+    if (np_to_gather == 0) return;
+    // Get cell size on gather_lev
+    const std::array<Real,3>& dx = WarpX::CellSize(std::max(gather_lev,0));
+    // Set staggering shift depending on e_is_nodal
+    const Real stagger_shift = e_is_nodal ? 0.0 : 0.5;
+    
+    // Get box from which field is gathered.
+    // If not gathering from the finest level, the box is coarsened.
+    Box box;
+    if (lev == gather_lev) {
+        box = pti.tilebox();
+    } else {
+        const IntVect& ref_ratio = WarpX::RefRatio(gather_lev);
+        box = amrex::coarsen(pti.tilebox(),ref_ratio);
+    }
+    
+    // Add guard cells to the box.
+    box.grow(ngE);
+    
+    const Array4<const Real>& ex_arr = exfab->array();
+    const Array4<const Real>& ey_arr = eyfab->array();
+    const Array4<const Real>& ez_arr = ezfab->array();
+    const Array4<const Real>& bx_arr = bxfab->array();
+    const Array4<const Real>& by_arr = byfab->array();
+    const Array4<const Real>& bz_arr = bzfab->array();
+    
+    const Real * const AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
+    const Real * const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
+    const Real * const AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
+    
+    // Lower corner of tile box physical domain
+    const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(box, gather_lev);
+    
+    const Dim3 lo = lbound(box);
+    
+    // Depending on l_lower_in_v and WarpX::nox, call
+    // different versions of template function doGatherShapeN
+    if (WarpX::l_lower_order_in_v){
+        if        (WarpX::nox == 1){
+            doGatherShapeN<1,1>(xp, yp, zp,
+                                Exp.dataPtr() + offset, Eyp.dataPtr() + offset,
+                                Ezp.dataPtr() + offset, Bxp.dataPtr() + offset,
+                                Byp.dataPtr() + offset, Bzp.dataPtr() + offset,
+                                ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr,
+                                np_to_gather, dx,
+                                xyzmin, lo, stagger_shift);
+        } else if (WarpX::nox == 2){
+            doGatherShapeN<2,1>(xp, yp, zp,
+                                Exp.dataPtr() + offset, Eyp.dataPtr() + offset,
+                                Ezp.dataPtr() + offset, Bxp.dataPtr() + offset,
+                                Byp.dataPtr() + offset, Bzp.dataPtr() + offset,
+                                ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr,
+                                np_to_gather, dx,
+                                xyzmin, lo, stagger_shift);
+        } else if (WarpX::nox == 3){
+            doGatherShapeN<3,1>(xp, yp, zp,
+                                Exp.dataPtr() + offset, Eyp.dataPtr() + offset,
+                                Ezp.dataPtr() + offset, Bxp.dataPtr() + offset,
+                                Byp.dataPtr() + offset, Bzp.dataPtr() + offset,
+                                ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr,
+                                np_to_gather, dx,
+                                xyzmin, lo, stagger_shift);
+        }
+    } else {
+        if        (WarpX::nox == 1){
+            doGatherShapeN<1,0>(xp, yp, zp,
+                                Exp.dataPtr() + offset, Eyp.dataPtr() + offset,
+                                Ezp.dataPtr() + offset, Bxp.dataPtr() + offset,
+                                Byp.dataPtr() + offset, Bzp.dataPtr() + offset,
+                                ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr,
+                                np_to_gather, dx,
+                                xyzmin, lo, stagger_shift);
+        } else if (WarpX::nox == 2){
+            doGatherShapeN<2,0>(xp, yp, zp,
+                                Exp.dataPtr() + offset, Eyp.dataPtr() + offset,
+                                Ezp.dataPtr() + offset, Bxp.dataPtr() + offset,
+                                Byp.dataPtr() + offset, Bzp.dataPtr() + offset,
+                                ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr,
+                                np_to_gather, dx,
+                                xyzmin, lo, stagger_shift);
+        } else if (WarpX::nox == 3){
+            doGatherShapeN<3,0>(xp, yp, zp,
+                                Exp.dataPtr() + offset, Eyp.dataPtr() + offset,
+                                Ezp.dataPtr() + offset, Bxp.dataPtr() + offset,
+                                Byp.dataPtr() + offset, Bzp.dataPtr() + offset,
+                                ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr,
+                                np_to_gather, dx,
+                                xyzmin, lo, stagger_shift);
+        }
+    }
+}
diff --git a/Source/Particles/Pusher/GetAndSetPosition.H b/Source/Particles/Pusher/GetAndSetPosition.H
index 42c61343e..3c74baeb2 100644
--- a/Source/Particles/Pusher/GetAndSetPosition.H
+++ b/Source/Particles/Pusher/GetAndSetPosition.H
@@ -5,7 +5,7 @@
 #include <WarpXParticleContainer.H>
 #include <AMReX_REAL.H>
 
-#ifndef WARPX_RZ
+#ifndef WARPX_DIM_RZ
 
 /* \brief Extract the particle's coordinates from the ParticleType struct `p`,
  *        and stores them in the variables `x`, `y`, `z`. */
@@ -42,7 +42,7 @@ void SetPosition(
 #endif
 }
 
-# else // if WARPX_RZ is True
+# elif defined WARPX_DIM_RZ
 
 /* \brief Extract the particle's coordinates from `theta` and the attributes
  *         of the ParticleType struct `p` (which contains the radius),
@@ -71,6 +71,6 @@ void SetCylindricalPositionFromCartesian(
     p.pos(1) = z;
 }
 
-#endif // WARPX_RZ
+#endif // WARPX_DIM_RZ
 
 #endif // WARPX_PARTICLES_PUSHER_GETANDSETPOSITION_H_
diff --git a/Source/Particles/Pusher/Make.package b/Source/Particles/Pusher/Make.package
index 8c8e77905..95a38fa2d 100644
--- a/Source/Particles/Pusher/Make.package
+++ b/Source/Particles/Pusher/Make.package
@@ -1,4 +1,6 @@
 CEXE_headers += GetAndSetPosition.H
 CEXE_headers += UpdatePosition.H
+CEXE_headers += UpdateMomentumBoris.H
+CEXE_headers += UpdateMomentumVay.H
 INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Pusher
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Particles/Pusher
diff --git a/Source/Particles/Pusher/UpdateMomentumBoris.H b/Source/Particles/Pusher/UpdateMomentumBoris.H
new file mode 100644
index 000000000..71e9a8ed1
--- /dev/null
+++ b/Source/Particles/Pusher/UpdateMomentumBoris.H
@@ -0,0 +1,47 @@
+#ifndef WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_BORIS_H_
+#define WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_BORIS_H_
+
+#include <AMReX_REAL.H>
+
+/* \brief Push the particle's positions over one timestep,
+ *    given the value of its momenta `ux`, `uy`, `uz` */
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+void UpdateMomentumBoris(
+    amrex::Real& ux, amrex::Real& uy, amrex::Real& uz, amrex::Real& gaminv,
+    const amrex::Real Ex, const amrex::Real Ey, const amrex::Real Ez,
+    const amrex::Real Bx, const amrex::Real By, const amrex::Real Bz,
+    const amrex::Real q, const amrex::Real m, const amrex::Real dt )
+{
+    const amrex::Real econst = 0.5*q*dt/m;
+
+    // First half-push for E
+    ux += econst*Ex;
+    uy += econst*Ey;
+    uz += econst*Ez;
+    // Compute temporary gamma factor
+    constexpr amrex::Real inv_c2 = 1./(PhysConst::c*PhysConst::c);
+    const amrex::Real inv_gamma = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*inv_c2);
+    // Magnetic rotation
+    // - Compute temporary variables
+    const amrex::Real tx = econst*inv_gamma*Bx;
+    const amrex::Real ty = econst*inv_gamma*By;
+    const amrex::Real tz = econst*inv_gamma*Bz;
+    const amrex::Real tsqi = 2./(1. + tx*tx + ty*ty + tz*tz);
+    const amrex::Real sx = tx*tsqi;
+    const amrex::Real sy = ty*tsqi;
+    const amrex::Real sz = tz*tsqi;
+    const amrex::Real ux_p = ux + uy*tz - uz*ty;
+    const amrex::Real uy_p = uy + uz*tx - ux*tz;
+    const amrex::Real uz_p = uz + ux*ty - uy*tx;
+    // - Update momentum
+    ux += uy_p*sz - uz_p*sy;
+    uy += uz_p*sx - ux_p*sz;
+    uz += ux_p*sy - uy_p*sx;
+    // Second half-push for E
+    ux += econst*Ex;
+    uy += econst*Ey;
+    uz += econst*Ez;
+    gaminv = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*inv_c2);
+}
+
+#endif // WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_BORIS_H_
diff --git a/Source/Particles/Pusher/UpdateMomentumVay.H b/Source/Particles/Pusher/UpdateMomentumVay.H
new file mode 100644
index 000000000..044297e22
--- /dev/null
+++ b/Source/Particles/Pusher/UpdateMomentumVay.H
@@ -0,0 +1,54 @@
+#ifndef WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_VAY_H_
+#define WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_VAY_H_
+
+#include <AMReX_FArrayBox.H>
+#include <WarpXConst.H>
+#include <AMReX_REAL.H>
+
+/* \brief Push the particle's positions over one timestep,
+ *    given the value of its momenta `ux`, `uy`, `uz` */
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+void UpdateMomentumVay(
+    amrex::Real& ux, amrex::Real& uy, amrex::Real& uz, amrex::Real& gaminv,
+    const amrex::Real Ex, const amrex::Real Ey, const amrex::Real Ez,
+    const amrex::Real Bx, const amrex::Real By, const amrex::Real Bz,
+    const amrex::Real q, const amrex::Real m, const amrex::Real dt )
+{
+    // Constants
+    const amrex::Real econst = q*dt/m;
+    const amrex::Real bconst = 0.5*q*dt/m;
+    constexpr amrex::Real invclight = 1./PhysConst::c;
+    constexpr amrex::Real invclightsq = 1./(PhysConst::c*PhysConst::c);
+    // Compute initial gamma
+    const amrex::Real inv_gamma = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*invclightsq);
+    // Get tau
+    const amrex::Real taux = bconst*Bx;
+    const amrex::Real tauy = bconst*By;
+    const amrex::Real tauz = bconst*Bz;
+    const amrex::Real tausq = taux*taux+tauy*tauy+tauz*tauz;
+    // Get U', gamma'^2
+    const amrex::Real uxpr = ux + econst*Ex + (uy*tauz-uz*tauy)*inv_gamma;
+    const amrex::Real uypr = uy + econst*Ey + (uz*taux-ux*tauz)*inv_gamma;
+    const amrex::Real uzpr = uz + econst*Ez + (ux*tauy-uy*taux)*inv_gamma;
+    const amrex::Real gprsq = (1. + (uxpr*uxpr + uypr*uypr + uzpr*uzpr)*invclightsq);
+    // Get u*
+    const amrex::Real ust = (uxpr*taux + uypr*tauy + uzpr*tauz)*invclight;
+    // Get new gamma
+    const amrex::Real sigma = gprsq-tausq;
+    const amrex::Real gisq = 2./(sigma + std::sqrt(sigma*sigma + 4.*(tausq + ust*ust)) );
+    // Get t, s
+    const amrex::Real bg = bconst*std::sqrt(gisq);
+    const amrex::Real tx = bg*Bx;
+    const amrex::Real ty = bg*By;
+    const amrex::Real tz = bg*Bz;
+    const amrex::Real s = 1./(1.+tausq*gisq);
+    // Get t.u'
+    const amrex::Real tu = tx*uxpr + ty*uypr + tz*uzpr;
+    // Get new U
+    ux = s*(uxpr+tx*tu+uypr*tz-uzpr*ty);
+    uy = s*(uypr+ty*tu+uzpr*tx-uxpr*tz);
+    uz = s*(uzpr+tz*tu+uxpr*ty-uypr*tx);
+    gaminv = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*invclightsq);
+}
+
+#endif // WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_VAY_H_
diff --git a/Source/Particles/Pusher/UpdatePosition.H b/Source/Particles/Pusher/UpdatePosition.H
index 0a4f579f4..a9df63a30 100644
--- a/Source/Particles/Pusher/UpdatePosition.H
+++ b/Source/Particles/Pusher/UpdatePosition.H
@@ -20,7 +20,7 @@ void UpdatePosition(
     const amrex::Real inv_gamma = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*inv_c2);
     // Update positions over one time step
     x += ux * inv_gamma * dt;
-#if (AMREX_SPACEDIM == 3) || (defined WARPX_RZ) // RZ pushes particles in 3D
+#if (AMREX_SPACEDIM == 3) || (defined WARPX_DIM_RZ) // RZ pushes particles in 3D
     y += uy * inv_gamma * dt;
 #endif
     z += uz * inv_gamma * dt;
diff --git a/Source/Particles/RigidInjectedParticleContainer.H b/Source/Particles/RigidInjectedParticleContainer.H
index 0b27a2f2f..b920ece0a 100644
--- a/Source/Particles/RigidInjectedParticleContainer.H
+++ b/Source/Particles/RigidInjectedParticleContainer.H
@@ -43,7 +43,7 @@ public:
                          amrex::Real dt) override;
 
     virtual void PushPX(WarpXParIter& pti,
-	                amrex::Cuda::ManagedDeviceVector<amrex::Real>& xp,
+                        amrex::Cuda::ManagedDeviceVector<amrex::Real>& xp,
                         amrex::Cuda::ManagedDeviceVector<amrex::Real>& yp,
                         amrex::Cuda::ManagedDeviceVector<amrex::Real>& zp,
                         amrex::Cuda::ManagedDeviceVector<amrex::Real>& giv,
@@ -77,7 +77,6 @@ private:
     // Temporary quantites
     amrex::Real zinject_plane_lev;
     amrex::Real zinject_plane_lev_previous;
-    amrex::Vector<int> done_injecting_temp;
     bool done_injecting_lev;
 
 };
diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp
index 9bd4cb4fc..36cb9d224 100644
--- a/Source/Particles/RigidInjectedParticleContainer.cpp
+++ b/Source/Particles/RigidInjectedParticleContainer.cpp
@@ -10,6 +10,9 @@
 #include <WarpX_f.H>
 #include <WarpX.H>
 #include <WarpXConst.H>
+#include <WarpXAlgorithmSelection.H>
+#include <UpdateMomentumBoris.H>
+#include <UpdateMomentumVay.H>
 
 using namespace amrex;
 
@@ -204,48 +207,58 @@ RigidInjectedParticleContainer::BoostandRemapParticles()
 
 void
 RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
-	                               Cuda::ManagedDeviceVector<Real>& xp,
+                                       Cuda::ManagedDeviceVector<Real>& xp,
                                        Cuda::ManagedDeviceVector<Real>& yp,
                                        Cuda::ManagedDeviceVector<Real>& zp,
                                        Cuda::ManagedDeviceVector<Real>& giv,
                                        Real dt)
 {
 
-    if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
-    {
-        copy_attribs(pti, xp.dataPtr(), yp.dataPtr(), zp.dataPtr());
-    }
-    
-    // This wraps the call to warpx_particle_pusher so that inheritors can modify the call.
+    // This wraps the momentum and position advance so that inheritors can modify the call.
     auto& attribs = pti.GetAttribs();
     auto& uxp = attribs[PIdx::ux];
     auto& uyp = attribs[PIdx::uy];
     auto& uzp = attribs[PIdx::uz];
-    auto& Exp = attribs[PIdx::Ex];
-    auto& Eyp = attribs[PIdx::Ey];
-    auto& Ezp = attribs[PIdx::Ez];
-    auto& Bxp = attribs[PIdx::Bx];
-    auto& Byp = attribs[PIdx::By];
-    auto& Bzp = attribs[PIdx::Bz];
-    const long np  = pti.numParticles();
 
     // Save the position and momenta, making copies
     Cuda::ManagedDeviceVector<Real> xp_save, yp_save, zp_save;
     RealVector uxp_save, uyp_save, uzp_save;
 
+    Real* const AMREX_RESTRICT x = xp.dataPtr();
+    Real* const AMREX_RESTRICT y = yp.dataPtr();
+    Real* const AMREX_RESTRICT z = zp.dataPtr();
+    Real* const AMREX_RESTRICT gi = giv.dataPtr();
+    Real* const AMREX_RESTRICT ux = uxp.dataPtr();
+    Real* const AMREX_RESTRICT uy = uyp.dataPtr();
+    Real* const AMREX_RESTRICT uz = uzp.dataPtr();
+    Real* const AMREX_RESTRICT Exp = attribs[PIdx::Ex].dataPtr();
+    Real* const AMREX_RESTRICT Eyp = attribs[PIdx::Ey].dataPtr();
+    Real* const AMREX_RESTRICT Ezp = attribs[PIdx::Ez].dataPtr();
+    Real* const AMREX_RESTRICT Bxp = attribs[PIdx::Bx].dataPtr();
+    Real* const AMREX_RESTRICT Byp = attribs[PIdx::By].dataPtr();
+    Real* const AMREX_RESTRICT Bzp = attribs[PIdx::Bz].dataPtr();
+
     if (!done_injecting_lev) {
-        xp_save = xp;
-        yp_save = yp;
-        zp_save = zp;
-        uxp_save = uxp;
-        uyp_save = uyp;
-        uzp_save = uzp;
+        if (!(WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)) {
+            // If the old values are not already saved, create copies here.
+            xp_save = xp;
+            yp_save = yp;
+            zp_save = zp;
+            uxp_save = uxp;
+            uyp_save = uyp;
+            uzp_save = uzp;
+        }
+
         // Scale the fields of particles about to cross the injection plane.
         // This only approximates what should be happening. The particles
         // should by advanced a fraction of a time step instead.
         // Scaling the fields is much easier and may be good enough.
-        for (int i=0 ; i < zp.size() ; i++) {
-            const Real dtscale = dt - (zinject_plane_lev_previous - zp[i])/(vzbeam_ave_boosted + WarpX::beta_boost*PhysConst::c);
+        const Real v_boost = WarpX::beta_boost*PhysConst::c;
+        const Real z_plane_previous = zinject_plane_lev_previous;
+        const Real vz_ave_boosted = vzbeam_ave_boosted;
+        amrex::ParallelFor( pti.numParticles(),
+            [=] AMREX_GPU_DEVICE (long i) {
+            const Real dtscale = dt - (z_plane_previous - z[i])/(vz_ave_boosted + v_boost);
             if (0. < dtscale && dtscale < dt) {
                 Exp[i] *= dtscale;
                 Eyp[i] *= dtscale;
@@ -255,46 +268,60 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
                 Bzp[i] *= dtscale;
             }
         }
+        );
     }
 
-    warpx_particle_pusher(&np,
-                          xp.dataPtr(),
-                          yp.dataPtr(),
-                          zp.dataPtr(),
-                          uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
-                          giv.dataPtr(),
-                          Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
-                          Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
-                          &this->charge, &this->mass, &dt,
-                          &WarpX::particle_pusher_algo);
+    PhysicalParticleContainer::PushPX(pti, xp, yp, zp, giv, dt);
 
     if (!done_injecting_lev) {
-#ifdef _OPENMP
-        const int tid = omp_get_thread_num();
-#else
-        const int tid = 0;
-#endif
+
+        Real* AMREX_RESTRICT x_save;
+        Real* AMREX_RESTRICT y_save;
+        Real* AMREX_RESTRICT z_save;
+        Real* AMREX_RESTRICT ux_save;
+        Real* AMREX_RESTRICT uy_save;
+        Real* AMREX_RESTRICT uz_save;
+        if (!(WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)) {
+            x_save = xp_save.dataPtr();
+            y_save = yp_save.dataPtr();
+            z_save = zp_save.dataPtr();
+            ux_save = uxp_save.dataPtr();
+            uy_save = uyp_save.dataPtr();
+            uz_save = uzp_save.dataPtr();
+        } else {
+            x_save = pti.GetAttribs(particle_comps["xold"]).dataPtr();
+            y_save = pti.GetAttribs(particle_comps["yold"]).dataPtr();
+            z_save = pti.GetAttribs(particle_comps["zold"]).dataPtr();
+            ux_save = pti.GetAttribs(particle_comps["uxold"]).dataPtr();
+            uy_save = pti.GetAttribs(particle_comps["uyold"]).dataPtr();
+            uz_save = pti.GetAttribs(particle_comps["uzold"]).dataPtr();
+        }
+
         // Undo the push for particles not injected yet.
         // The zp are advanced a fixed amount.
-        for (int i=0 ; i < zp.size() ; i++) {
-            if (zp[i] <= zinject_plane_lev) {
-                uxp[i] = uxp_save[i];
-                uyp[i] = uyp_save[i];
-                uzp[i] = uzp_save[i];
-                giv[i] = 1./std::sqrt(1. + (uxp[i]*uxp[i] + uyp[i]*uyp[i] + uzp[i]*uzp[i])/(PhysConst::c*PhysConst::c));
-                xp[i] = xp_save[i];
-                yp[i] = yp_save[i];
-                if (rigid_advance) {
-                    zp[i] = zp_save[i] + dt*vzbeam_ave_boosted;
+        const Real z_plane_lev = zinject_plane_lev;
+        const Real vz_ave_boosted = vzbeam_ave_boosted;
+        const bool rigid = rigid_advance;
+        const Real inv_csq = 1./(PhysConst::c*PhysConst::c);
+        amrex::ParallelFor( pti.numParticles(),
+            [=] AMREX_GPU_DEVICE (long i) {
+            if (z[i] <= z_plane_lev) {
+                ux[i] = ux_save[i];
+                uy[i] = uy_save[i];
+                uz[i] = uz_save[i];
+                gi[i] = 1./std::sqrt(1. + (ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i])*inv_csq);
+                x[i] = x_save[i];
+                y[i] = y_save[i];
+                if (rigid) {
+                    z[i] = z_save[i] + dt*vz_ave_boosted;
                 }
                 else {
-                    zp[i] = zp_save[i] + dt*uzp[i]*giv[i];
+                    z[i] = z_save[i] + dt*uz[i]*gi[i];
                 }
-                done_injecting_temp[tid] = 0;
             }
         }
+        );
     }
-
 }
 
 void
@@ -314,28 +341,26 @@ RigidInjectedParticleContainer::Evolve (int lev,
     zinject_plane_levels[lev] -= dt*WarpX::beta_boost*PhysConst::c;
     zinject_plane_lev = zinject_plane_levels[lev];
 
-    // Setup check of whether more particles need to be injected
-#ifdef _OPENMP
-    const int nthreads = omp_get_max_threads();
-#else
-    const int nthreads = 1;
-#endif
-    done_injecting_temp.assign(nthreads, 1); // We do not use bool because vector<bool> is special.
+    // Set the done injecting flag whan the inject plane moves out of the
+    // simulation domain.
+    // It is much easier to do this check, rather than checking if all of the
+    // particles have crossed the inject plane.
+    const Real* plo = Geom(lev).ProbLo();
+    const Real* phi = Geom(lev).ProbHi();
+    const int zdir = AMREX_SPACEDIM-1;
+    done_injecting[lev] = ((zinject_plane_levels[lev] < plo[zdir] && WarpX::moving_window_v + WarpX::beta_boost*PhysConst::c >= 0.) ||
+                           (zinject_plane_levels[lev] > phi[zdir] && WarpX::moving_window_v + WarpX::beta_boost*PhysConst::c <= 0.));
     done_injecting_lev = done_injecting[lev];
 
     PhysicalParticleContainer::Evolve (lev,
-				       Ex, Ey, Ez,
-				       Bx, By, Bz,
-				       jx, jy, jz,
+                                       Ex, Ey, Ez,
+                                       Bx, By, Bz,
+                                       jx, jy, jz,
                                        cjx, cjy, cjz,
                                        rho, crho,
                                        cEx, cEy, cEz,
                                        cBx, cBy, cBz,
                                        t, dt);
-
-    // Check if all done_injecting_temp are still true.
-    done_injecting[lev] = std::all_of(done_injecting_temp.begin(), done_injecting_temp.end(),
-                                      [](int i) -> bool { return i; });
 }
 
 void
@@ -343,6 +368,8 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
                                        const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
                                        const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz)
 {
+    BL_PROFILE("RigidInjectedParticleContainer::PushP");
+
     if (do_not_push) return;
 
     const std::array<Real,3>& dx = WarpX::CellSize(lev);
@@ -351,8 +378,11 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
 #pragma omp parallel
 #endif
     {
-        Cuda::ManagedDeviceVector<Real> xp, yp, zp, giv;
-
+#ifdef _OPENMP
+        int thread_num = omp_get_thread_num();
+#else
+        int thread_num = 0;
+#endif
         for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
         {
             const Box& box = pti.validbox();
@@ -386,65 +416,74 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
             Byp.assign(np,WarpX::B_external[1]);
             Bzp.assign(np,WarpX::B_external[2]);
 
-            giv.resize(np);
+            m_giv[thread_num].resize(np);
 
             //
             // copy data from particle container to temp arrays
             //
-            pti.GetPosition(xp, yp, zp);
+            pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
 
-            const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
-            const int* ixyzmin_grid = box.loVect();
-
-            const int ll4symtry          = false;
-            const int l_lower_order_in_v = true;
-            long lvect_fieldgathe = 64;
-            warpx_geteb_energy_conserving(
-                &np,
-                xp.dataPtr(),
-                yp.dataPtr(),
-                zp.dataPtr(),
-                Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
-                Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
-                ixyzmin_grid,
-                &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2],
-                &dx[0], &dx[1], &dx[2],
-                &WarpX::nox, &WarpX::noy, &WarpX::noz,
-                BL_TO_FORTRAN_ANYD(exfab),
-                BL_TO_FORTRAN_ANYD(eyfab),
-                BL_TO_FORTRAN_ANYD(ezfab),
-                BL_TO_FORTRAN_ANYD(bxfab),
-                BL_TO_FORTRAN_ANYD(byfab),
-                BL_TO_FORTRAN_ANYD(bzfab),
-                &ll4symtry, &l_lower_order_in_v, &WarpX::do_nodal,
-                &lvect_fieldgathe, &WarpX::field_gathering_algo);
+            int e_is_nodal = Ex.is_nodal() and Ey.is_nodal() and Ez.is_nodal();
+            FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp,
+                        &exfab, &eyfab, &ezfab, &bxfab, &byfab, &bzfab,
+                        Ex.nGrow(), e_is_nodal, 0, np, thread_num, lev, lev);
 
             // Save the position and momenta, making copies
             auto uxp_save = uxp;
             auto uyp_save = uyp;
             auto uzp_save = uzp;
 
-            warpx_particle_pusher_momenta(&np,
-                                          xp.dataPtr(),
-                                          yp.dataPtr(),
-                                          zp.dataPtr(),
-                                          uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
-                                          giv.dataPtr(),
-                                          Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
-                                          Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
-                                          &this->charge, &this->mass, &dt,
-                                          &WarpX::particle_pusher_algo);
+            // This wraps the momentum advance so that inheritors can modify the call.
+            // Extract pointers to the different particle quantities
+            const Real* const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr();
+            Real* const AMREX_RESTRICT gi = m_giv[thread_num].dataPtr();
+            Real* const AMREX_RESTRICT uxpp = uxp.dataPtr();
+            Real* const AMREX_RESTRICT uypp = uyp.dataPtr();
+            Real* const AMREX_RESTRICT uzpp = uzp.dataPtr();
+            const Real* const AMREX_RESTRICT Expp = Exp.dataPtr();
+            const Real* const AMREX_RESTRICT Eypp = Eyp.dataPtr();
+            const Real* const AMREX_RESTRICT Ezpp = Ezp.dataPtr();
+            const Real* const AMREX_RESTRICT Bxpp = Bxp.dataPtr();
+            const Real* const AMREX_RESTRICT Bypp = Byp.dataPtr();
+            const Real* const AMREX_RESTRICT Bzpp = Bzp.dataPtr();
+
+            // Loop over the particles and update their momentum
+            const Real q = this->charge;
+            const Real m = this->mass;
+            if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Boris){
+                amrex::ParallelFor( pti.numParticles(),
+                    [=] AMREX_GPU_DEVICE (long i) {
+                        UpdateMomentumBoris( uxpp[i], uypp[i], uzpp[i], gi[i],
+                              Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt);
+                    }
+                );
+            } else if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Vay) {
+                amrex::ParallelFor( pti.numParticles(),
+                    [=] AMREX_GPU_DEVICE (long i) {
+                        UpdateMomentumVay( uxpp[i], uypp[i], uzpp[i], gi[i],
+                              Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt);
+                    }
+                );
+            } else {
+              amrex::Abort("Unknown particle pusher");
+            };
 
             // Undo the push for particles not injected yet.
             // It is assumed that PushP will only be called on the first and last steps
             // and that no particles will cross zinject_plane.
-            for (int i=0 ; i < zp.size() ; i++) {
-                if (zp[i] <= zinject_plane_levels[lev]) {
-                    uxp[i] = uxp_save[i];
-                    uyp[i] = uyp_save[i];
-                    uzp[i] = uzp_save[i];
+            const Real* const AMREX_RESTRICT ux_save = uxp_save.dataPtr();
+            const Real* const AMREX_RESTRICT uy_save = uyp_save.dataPtr();
+            const Real* const AMREX_RESTRICT uz_save = uzp_save.dataPtr();
+            const Real zz = zinject_plane_levels[lev];
+            amrex::ParallelFor( pti.numParticles(),
+                [=] AMREX_GPU_DEVICE (long i) {
+                if (zp[i] <= zz) {
+                    uxpp[i] = ux_save[i];
+                    uypp[i] = uy_save[i];
+                    uzpp[i] = uz_save[i];
                 }
             }
+            );
 
         }
     }
diff --git a/Source/Particles/ShapeFactors.H b/Source/Particles/ShapeFactors.H
new file mode 100644
index 000000000..9d185714a
--- /dev/null
+++ b/Source/Particles/ShapeFactors.H
@@ -0,0 +1,117 @@
+#ifndef SHAPEFACTORS_H_
+#define SHAPEFACTORS_H_
+
+// Compute shape factor and return index of leftmost cell where 
+// particle writes.
+// Specialized templates are defined below for orders 0 to 3.
+template <int depos_order>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor(amrex::Real* const sx, amrex::Real xint)
+{
+    return 0;
+};
+
+// Compute shape factor for order 0.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor <0> (amrex::Real* const sx, amrex::Real xmid){
+    const int j = (int) (xmid+0.5);
+    sx[0] = 1.0;
+    return j;
+}
+
+// Compute shape factor for order 1.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor <1> (amrex::Real* const sx, amrex::Real xmid){
+    const int j = (int) xmid;
+    const amrex::Real xint = xmid-j;
+    sx[0] = 1.0 - xint;
+    sx[1] = xint;
+    return j;
+}
+
+// Compute shape factor for order 2.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor <2> (amrex::Real* const sx, amrex::Real xmid){
+    const int j = (int) (xmid+0.5);
+    const amrex::Real xint = xmid-j;
+    sx[0] = 0.5*(0.5-xint)*(0.5-xint);
+    sx[1] = 0.75-xint*xint;
+    sx[2] = 0.5*(0.5+xint)*(0.5+xint);
+    // index of the leftmost cell where particle deposits
+    return j-1;
+}
+
+// Compute shape factor for order 3.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor <3> (amrex::Real* const sx, amrex::Real xmid){
+    const int j = (int) xmid;
+    const amrex::Real xint = xmid-j;
+    sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint);
+    sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0);
+    sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint));
+    sx[3] = 1.0/6.0*xint*xint*xint;
+    // index of the leftmost cell where particle deposits
+    return j-1;
+}
+
+// Compute shifted shape factor and return index of leftmost cell where
+// particle writes, for Esirkepov algorithm.
+// Specialized templates are defined below for orders 1, 2 and 3.
+template <int depos_order>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shifted_shape_factor (amrex::Real* const sx,
+                                  const amrex::Real x_old,
+                                  const int i_new);
+
+// Compute shape factor for order 1.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shifted_shape_factor <1> (amrex::Real* const sx,
+                                      const amrex::Real x_old,
+                                      const int i_new){
+    const int i = (int) x_old;
+    const int i_shift = i - i_new;
+    const amrex::Real xint = x_old - i;
+    sx[1+i_shift] = 1.0 - xint;
+    sx[2+i_shift] = xint;
+    return i;
+}
+
+// Compute shape factor for order 2.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shifted_shape_factor <2> (amrex::Real* const sx,
+                                      const amrex::Real x_old,
+                                      const int i_new){
+    const int i = (int) (x_old+0.5);
+    const int i_shift = i - (i_new + 1);
+    const amrex::Real xint = x_old - i;
+    sx[1+i_shift] = 0.5*(0.5-xint)*(0.5-xint);
+    sx[2+i_shift] = 0.75-xint*xint;
+    sx[3+i_shift] = 0.5*(0.5+xint)*(0.5+xint);
+    // index of the leftmost cell where particle deposits
+    return i-1;
+}
+
+// Compute shape factor for order 3.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shifted_shape_factor <3> (amrex::Real* const sx,
+                                      const amrex::Real x_old,
+                                      const int i_new){
+    const int i = (int) x_old;
+    const int i_shift = i - (i_new + 1);
+    const amrex::Real xint = x_old - i;
+    sx[1+i_shift] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint);
+    sx[2+i_shift] = 2.0/3.0-xint*xint*(1-xint/2.0);
+    sx[3+i_shift] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint));
+    sx[4+i_shift] = 1.0/6.0*xint*xint*xint;
+    // index of the leftmost cell where particle deposits
+    return i-1;
+}
+
+#endif // SHAPEFACTORS_H_
diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H
index 662b2e1b8..ac5b47ada 100644
--- a/Source/Particles/WarpXParticleContainer.H
+++ b/Source/Particles/WarpXParticleContainer.H
@@ -13,7 +13,7 @@ struct PIdx
     enum { // Particle Attributes stored in amrex::ParticleContainer's struct of array
 	w = 0,  // weight
 	ux, uy, uz, Ex, Ey, Ez, Bx, By, Bz,
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
         theta, // RZ needs all three position components
 #endif
 	nattribs
@@ -104,8 +104,9 @@ public:
                                 const amrex::Vector<std::unique_ptr<amrex::FabArray<amrex::BaseFab<int> > > >& masks) {}
 
     virtual void FieldGather (int lev,
-                              const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, const amrex::MultiFab& Ez,
-                              const amrex::MultiFab& Bx, const amrex::MultiFab& By, const amrex::MultiFab& Bz) {}
+                              const amrex::MultiFab& Ex, const amrex::MultiFab& Ey,
+                              const amrex::MultiFab& Ez, const amrex::MultiFab& Bx,
+                              const amrex::MultiFab& By, const amrex::MultiFab& Bz) {}
 
 #ifdef WARPX_DO_ELECTROSTATIC    
     virtual void EvolveES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E,
@@ -166,13 +167,13 @@ public:
 
     virtual void DepositCharge(WarpXParIter& pti,
                                RealVector& wp,
-                               amrex::MultiFab* rhomf,
-                               amrex::MultiFab* crhomf,
+                               amrex::MultiFab* rho,
                                int icomp,
-                               const long np_current,
-                               const long np,
+                               const long offset,
+                               const long np_to_depose,
                                int thread_num,
-                               int lev );
+                               int lev,
+                               int depos_lev);
 
     virtual void DepositCurrent(WarpXParIter& pti,
                                 RealVector& wp,
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index a20f0035e..befa5cfed 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -12,6 +12,7 @@
 #include <GetAndSetPosition.H>
 #include <UpdatePosition.H>
 #include <CurrentDeposition.H>
+#include <ChargeDeposition.H>
 
 using namespace amrex;
 
@@ -27,7 +28,7 @@ void
 WarpXParIter::GetPosition (Cuda::ManagedDeviceVector<Real>& x, Cuda::ManagedDeviceVector<Real>& y, Cuda::ManagedDeviceVector<Real>& z) const
 {
     amrex::ParIter<0,0,PIdx::nattribs>::GetPosition(x, z);
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     const auto& attribs = GetAttribs();
     const auto& theta = attribs[PIdx::theta];
     y.resize(x.size());
@@ -44,10 +45,10 @@ WarpXParIter::GetPosition (Cuda::ManagedDeviceVector<Real>& x, Cuda::ManagedDevi
 void
 WarpXParIter::SetPosition (const Cuda::ManagedDeviceVector<Real>& x, const Cuda::ManagedDeviceVector<Real>& y, const Cuda::ManagedDeviceVector<Real>& z)
 {
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     auto& attribs = GetAttribs();
     auto& theta = attribs[PIdx::theta];
-    Cuda::DeviceVector<Real> r(x.size());
+    Cuda::ManagedDeviceVector<Real> r(x.size());
     for (unsigned int i=0 ; i < x.size() ; i++) {
         theta[i] = std::atan2(y[i], x[i]);
         r[i] = std::sqrt(x[i]*x[i] + y[i]*y[i]);
@@ -80,7 +81,7 @@ WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies)
     particle_comps["Bx"] = PIdx::Bx;
     particle_comps["By"] = PIdx::By;
     particle_comps["Bz"] = PIdx::Bz;
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     particle_comps["theta"] = PIdx::theta;
 #endif
 
@@ -163,7 +164,7 @@ WarpXParticleContainer::AddOneParticle (ParticleTileType& particle_tile,
     p.pos(1) = y;
     p.pos(2) = z;
 #elif (AMREX_SPACEDIM == 2)
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     attribs[PIdx::theta] = std::atan2(y, x);
     x = std::sqrt(x*x + y*y);
 #endif
@@ -209,7 +210,7 @@ WarpXParticleContainer::AddNParticles (int lev,
 
     std::size_t np = iend-ibegin;
 
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     Vector<Real> theta(np);
 #endif
 
@@ -228,7 +229,7 @@ WarpXParticleContainer::AddNParticles (int lev,
         p.pos(1) = y[i];
         p.pos(2) = z[i];
 #elif (AMREX_SPACEDIM == 2)
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
         theta[i-ibegin] = std::atan2(y[i], x[i]);
         p.pos(0) = std::sqrt(x[i]*x[i] + y[i]*y[i]);
 #else
@@ -265,7 +266,7 @@ WarpXParticleContainer::AddNParticles (int lev,
 
         for (int comp = PIdx::uz+1; comp < PIdx::nattribs; ++comp)
         {
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
             if (comp == PIdx::theta) {
                 particle_tile.push_back_real(comp, theta.front(), theta.back());
             }
@@ -394,14 +395,6 @@ WarpXParticleContainer::DepositCurrentFortran(WarpXParIter& pti,
         &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal,
         &lvect,&WarpX::current_deposition_algo);
 
-#ifdef WARPX_RZ
-    // Rescale current in r-z mode
-    warpx_current_deposition_rz_volume_scaling(
-        jx_ptr, &ngJ, jxntot.getVect(),
-        jy_ptr, &ngJ, jyntot.getVect(),
-        jz_ptr, &ngJ, jzntot.getVect(),
-        &xyzmin[0], &dx[0]);
-#endif
     BL_PROFILE_VAR_STOP(blp_pxr_cd);
 
 #ifndef AMREX_USE_GPU
@@ -503,7 +496,8 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
     Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
 
     // Lower corner of tile box physical domain
-    const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);;
+    // Note that this includes guard cells since it is after tilebox.ngrow
+    const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);
     // xyzmin is built on pti.tilebox(), so it does 
     // not include staggering, so the stagger_shift has to be done by hand.
     // Alternatively, we could define xyzminx from tbx (and the same for 3 
@@ -513,36 +507,36 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
 
     if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) {
         if        (WarpX::nox == 1){
-            doEsirkepovDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), 
-                                           uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, 
+            doEsirkepovDepositionShapeN<1>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, 
+                                           uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, 
                                            jz_arr, np_to_depose, dt, dx,
                                            xyzmin, lo, q);
         } else if (WarpX::nox == 2){
-            doEsirkepovDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), 
-                                           uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, 
+            doEsirkepovDepositionShapeN<2>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, 
+                                           uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, 
                                            jz_arr, np_to_depose, dt, dx,
                                            xyzmin, lo, q);
         } else if (WarpX::nox == 3){
-            doEsirkepovDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), 
-                                           uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, 
+            doEsirkepovDepositionShapeN<3>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, 
+                                           uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, 
                                            jz_arr, np_to_depose, dt, dx,
                                            xyzmin, lo, q);
         }
     } else {
         if        (WarpX::nox == 1){
-            doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), 
-                                  uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, 
-                                  jz_arr, offset, np_to_depose, dt, dx,
+            doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, 
+                                  uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, 
+                                  jz_arr, np_to_depose, dt, dx,
                                   xyzmin, lo, stagger_shift, q);
         } else if (WarpX::nox == 2){
-            doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), 
-                                  uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, 
-                                  jz_arr, offset, np_to_depose, dt, dx,
+            doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, 
+                                  uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, 
+                                  jz_arr, np_to_depose, dt, dx,
                                   xyzmin, lo, stagger_shift, q);
         } else if (WarpX::nox == 3){
-            doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), 
-                                  uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, 
-                                  jz_arr, offset, np_to_depose, dt, dx,
+            doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, 
+                                  uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, 
+                                  jz_arr, np_to_depose, dt, dx,
                                   xyzmin, lo, stagger_shift, q);
         }
     }
@@ -559,140 +553,87 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
 }
 
 void
-WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
-                                        MultiFab* rhomf, MultiFab* crhomf, int icomp,
-                                        const long np_current,
-                                        const long np, int thread_num, int lev )
+WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector& wp,
+                                       MultiFab* rho, int icomp,
+                                       const long offset, const long np_to_depose,
+                                       int thread_num, int lev, int depos_lev)
 {
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) ||
+                                     (depos_lev==(lev  )),
+                                     "Deposition buffers only work for lev-1");
 
-  BL_PROFILE_VAR_NS("PICSAR::ChargeDeposition", blp_pxr_chd);
-  BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
-
-  const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev);
-  const long lvect = 8;
+    // If no particles, do not do anything
+    if (np_to_depose == 0) return;
 
-  long ngRho = rhomf->nGrow();
-  Real* data_ptr;
-  Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector());
+    const long ngRho = rho->nGrow();
+    const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0));
+    const Real q = this->charge;
 
-  const std::array<Real,3>& dx = WarpX::CellSize(lev);
-  const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0));
+    BL_PROFILE_VAR_NS("PPC::ChargeDeposition", blp_ppc_chd);
+    BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
 
-  // Deposit charge for particles that are not in the current buffers
-  if (np_current > 0)
-  {
-      const std::array<Real, 3>& xyzmin = xyzmin_tile;
+    // Get tile box where charge is deposited.
+    // The tile box is different when depositing in the buffers (depos_lev<lev)
+    // or when depositing inside the level (depos_lev=lev)
+    Box tilebox;
+    if (lev == depos_lev) {
+        tilebox = pti.tilebox();
+    } else {
+        const IntVect& ref_ratio = WarpX::RefRatio(depos_lev);
+        tilebox = amrex::coarsen(pti.tilebox(),ref_ratio);
+    }
+    
+    tilebox.grow(ngRho);
 
 #ifdef AMREX_USE_GPU
-      data_ptr = (*rhomf)[pti].dataPtr(icomp);
-      auto rholen = (*rhomf)[pti].length();
+    // No tiling on GPU: rho_arr points to the full rho array.
+    MultiFab rhoi(*rho, amrex::make_alias, icomp, 1);
+    Array4<Real> const& rho_arr = rhoi.array(pti);
 #else
-      tile_box.grow(ngRho);
-      local_rho[thread_num].resize(tile_box);
+    // Tiling is on: rho_arr points to local_rho[thread_num]
+    const Box tb = amrex::convert(tilebox, IntVect::TheUnitVector());
 
-      data_ptr = local_rho[thread_num].dataPtr();
-      auto rholen = local_rho[thread_num].length();
+    local_rho[thread_num].resize(tb);
 
-      local_rho[thread_num].setVal(0.0);
-#endif
+    // local_rho[thread_num] is set to zero
+    local_rho[thread_num].setVal(0.0);
 
-#if (AMREX_SPACEDIM == 3)
-      const long nx = rholen[0]-1-2*ngRho;
-      const long ny = rholen[1]-1-2*ngRho;
-      const long nz = rholen[2]-1-2*ngRho;
-#else
-      const long nx = rholen[0]-1-2*ngRho;
-      const long ny = 0;
-      const long nz = rholen[1]-1-2*ngRho;
+    Array4<Real> const& rho_arr = local_rho[thread_num].array();
 #endif
-      BL_PROFILE_VAR_START(blp_pxr_chd);
-      warpx_charge_deposition(data_ptr, &np_current,
-                              m_xp[thread_num].dataPtr(),
-                              m_yp[thread_num].dataPtr(),
-                              m_zp[thread_num].dataPtr(),
-                              wp.dataPtr(),
-                              &this->charge,
-                              &xyzmin[0], &xyzmin[1], &xyzmin[2],
-                              &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
-                              &ngRho, &ngRho, &ngRho,
-                              &WarpX::nox,&WarpX::noy,&WarpX::noz,
-                              &lvect, &WarpX::charge_deposition_algo);
-#ifdef WARPX_RZ
-      warpx_charge_deposition_rz_volume_scaling(
-                               data_ptr, &ngRho, rholen.getVect(),
-                               &xyzmin[0], &dx[0]);
-#endif
-      BL_PROFILE_VAR_STOP(blp_pxr_chd);
-
-#ifndef AMREX_USE_GPU
-      BL_PROFILE_VAR_START(blp_accumulate);
-
-      (*rhomf)[pti].atomicAdd(local_rho[thread_num], tile_box, tile_box, 0, icomp, 1);
-
-      BL_PROFILE_VAR_STOP(blp_accumulate);
-#endif
-  }
-
-  // Deposit charge for particles that are in the current buffers
-  if (np_current < np)
-  {
-      const IntVect& ref_ratio = WarpX::RefRatio(lev-1);
-      const Box& ctilebox = amrex::coarsen(pti.tilebox(), ref_ratio);
-      const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1);
-
-#ifdef AMREX_USE_GPU
-      data_ptr = (*crhomf)[pti].dataPtr(icomp);
-      auto rholen = (*crhomf)[pti].length();
-#else
-      tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector());
-      tile_box.grow(ngRho);
-      local_rho[thread_num].resize(tile_box);
-
-      data_ptr = local_rho[thread_num].dataPtr();
-      auto rholen = local_rho[thread_num].length();
+    // GPU, no tiling: deposit directly in rho
+    // CPU, tiling: deposit into local_rho
 
-      local_rho[thread_num].setVal(0.0);
-#endif
+    Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
+    Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
+    Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
 
-#if (AMREX_SPACEDIM == 3)
-      const long nx = rholen[0]-1-2*ngRho;
-      const long ny = rholen[1]-1-2*ngRho;
-      const long nz = rholen[2]-1-2*ngRho;
-#else
-      const long nx = rholen[0]-1-2*ngRho;
-      const long ny = 0;
-      const long nz = rholen[1]-1-2*ngRho;
-#endif
+    // Lower corner of tile box physical domain
+    // Note that this includes guard cells since it is after tilebox.ngrow
+    const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);
+    // Indices of the lower bound
+    const Dim3 lo = lbound(tilebox);
 
-      long ncrse = np - np_current;
-      BL_PROFILE_VAR_START(blp_pxr_chd);
-      warpx_charge_deposition(data_ptr, &ncrse,
-                              m_xp[thread_num].dataPtr() + np_current,
-                              m_yp[thread_num].dataPtr() + np_current,
-                              m_zp[thread_num].dataPtr() + np_current,
-                              wp.dataPtr() + np_current,
-                              &this->charge,
-                              &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
-                              &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz,
-                              &ngRho, &ngRho, &ngRho,
-                              &WarpX::nox,&WarpX::noy,&WarpX::noz,
-                              &lvect, &WarpX::charge_deposition_algo);
-#ifdef WARPX_RZ
-      warpx_charge_deposition_rz_volume_scaling(
-                               data_ptr, &ngRho, rholen.getVect(),
-                               &cxyzmin_tile[0], &cdx[0]);
-#endif
-      BL_PROFILE_VAR_STOP(blp_pxr_chd);
+    BL_PROFILE_VAR_START(blp_ppc_chd);
+    if        (WarpX::nox == 1){
+        doChargeDepositionShapeN<1>(xp, yp, zp, wp.dataPtr()+offset, rho_arr,
+                                    np_to_depose, dx, xyzmin, lo, q);
+    } else if (WarpX::nox == 2){
+        doChargeDepositionShapeN<2>(xp, yp, zp, wp.dataPtr()+offset, rho_arr,
+                                    np_to_depose, dx, xyzmin, lo, q);
+    } else if (WarpX::nox == 3){
+        doChargeDepositionShapeN<3>(xp, yp, zp, wp.dataPtr()+offset, rho_arr,
+                                    np_to_depose, dx, xyzmin, lo, q);
+    }
+    BL_PROFILE_VAR_STOP(blp_ppc_chd);
 
 #ifndef AMREX_USE_GPU
-      BL_PROFILE_VAR_START(blp_accumulate);
+    BL_PROFILE_VAR_START(blp_accumulate);
 
-      (*crhomf)[pti].atomicAdd(local_rho[thread_num], tile_box, tile_box, 0, icomp, 1);
+    (*rho)[pti].atomicAdd(local_rho[thread_num], tb, tb, 0, icomp, 1);
 
-      BL_PROFILE_VAR_STOP(blp_accumulate);
+    BL_PROFILE_VAR_STOP(blp_accumulate);
 #endif
-    }
-};
+}
 
 void
 WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, bool local)
@@ -769,8 +710,6 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local)
     BoxArray nba = ba;
     nba.surroundingNodes();
 
-    const std::array<Real,3>& dx = WarpX::CellSize(lev);
-
     const int ng = WarpX::nox;
 
     auto rho = std::unique_ptr<MultiFab>(new MultiFab(nba,dm,1,ng));
@@ -780,75 +719,28 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local)
 #pragma omp parallel
     {
 #endif
-        Cuda::ManagedDeviceVector<Real> xp, yp, zp;
 #ifdef _OPENMP
-        FArrayBox rho_loc;
+        int thread_num = omp_get_thread_num();
+#else
+        int thread_num = 0;
 #endif
 
         for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
         {
+            const long np = pti.numParticles();
             auto& wp = pti.GetAttribs(PIdx::w);
 
-            const long np  = pti.numParticles();
-
-            pti.GetPosition(xp, yp, zp);
+            pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
 
-            // Data on the grid
-            Real* data_ptr;
-            FArrayBox& rhofab = (*rho)[pti];
+            DepositCharge(pti, wp, rho.get(), 0, 0, np, thread_num, lev, lev);
+        }
 #ifdef _OPENMP
-            const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev);
-            Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector());
-            const std::array<Real, 3>& xyzmin = xyzmin_tile;
-            tile_box.grow(ng);
-            rho_loc.resize(tile_box);
-            rho_loc = 0.0;
-            data_ptr = rho_loc.dataPtr();
-            auto rholen = rho_loc.length();
-#else
-            const Box& box = pti.validbox();
-            const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
-            const std::array<Real, 3>& xyzmin = xyzmin_grid;
-            data_ptr = rhofab.dataPtr();
-            auto rholen = rhofab.length();
-#endif
-
-#if (AMREX_SPACEDIM == 3)
-            const long nx = rholen[0]-1-2*ng;
-            const long ny = rholen[1]-1-2*ng;
-            const long nz = rholen[2]-1-2*ng;
-#else
-            const long nx = rholen[0]-1-2*ng;
-            const long ny = 0;
-            const long nz = rholen[1]-1-2*ng;
-#endif
-
-            long nxg = ng;
-            long nyg = ng;
-            long nzg = ng;
-            long lvect = 8;
-
-            warpx_charge_deposition(data_ptr,
-                                    &np,
-                                    xp.dataPtr(),
-                                    yp.dataPtr(),
-                                    zp.dataPtr(), wp.dataPtr(),
-                                    &this->charge, &xyzmin[0], &xyzmin[1], &xyzmin[2],
-                                    &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
-                                    &nxg, &nyg, &nzg, &WarpX::nox,&WarpX::noy,&WarpX::noz,
-                                    &lvect, &WarpX::charge_deposition_algo);
-#ifdef WARPX_RZ
-            long ngRho = WarpX::nox;
-            warpx_charge_deposition_rz_volume_scaling(
-                                     data_ptr, &ngRho, rholen.getVect(),
-                                     &xyzmin[0], &dx[0]);
+    }
 #endif
 
-#ifdef _OPENMP
-            rhofab.atomicAdd(rho_loc);
-        }
+#ifdef WARPX_DIM_RZ
+    WarpX::GetInstance().ApplyInverseVolumeScalingToChargeDensity(rho.get(), lev);
 #endif
-    }
 
     if (!local) rho->SumBoundary(gm.periodicity());
 
@@ -1022,7 +914,7 @@ WarpXParticleContainer::PushX (int lev, Real dt)
             Real* AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
             Real* AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
             Real* AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
             Real* AMREX_RESTRICT theta = attribs[PIdx::theta].dataPtr();
 #endif
             // Loop over the particles and update their position
@@ -1030,12 +922,12 @@ WarpXParticleContainer::PushX (int lev, Real dt)
                 [=] AMREX_GPU_DEVICE (long i) {
                     ParticleType& p = pstructs[i]; // Particle object that gets updated
                     Real x, y, z; // Temporary variables
-#ifndef WARPX_RZ
+#ifndef WARPX_DIM_RZ
                     GetPosition( x, y, z, p ); // Initialize x, y, z
                     UpdatePosition( x, y, z, ux[i], uy[i], uz[i], dt);
                     SetPosition( p, x, y, z ); // Update the object p
 #else
-                    // For WARPX_RZ, the particles are still pushed in 3D Cartesian
+                    // For WARPX_DIM_RZ, the particles are still pushed in 3D Cartesian
                     GetCartesianPositionFromCylindrical( x, y, z, p, theta[i] );
                     UpdatePosition( x, y, z, ux[i], uy[i], uz[i], dt);
                     SetCylindricalPositionFromCartesian( p, theta[i], x, y, z );
diff --git a/Source/Utils/WarpXAlgorithmSelection.H b/Source/Utils/WarpXAlgorithmSelection.H
index 3fb23698a..6a32513b7 100644
--- a/Source/Utils/WarpXAlgorithmSelection.H
+++ b/Source/Utils/WarpXAlgorithmSelection.H
@@ -34,11 +34,9 @@ struct CurrentDepositionAlgo {
 };
 
 struct ChargeDepositionAlgo {
-    // These numbers corresponds to the algorithm code in WarpX's
-    // `warpx_charge_deposition` function
+    // Only the Standard algorithm is implemented
     enum {
-         Vectorized = 0,
-         Standard = 1
+         Standard = 0
     };
 };
 
diff --git a/Source/Utils/WarpXAlgorithmSelection.cpp b/Source/Utils/WarpXAlgorithmSelection.cpp
index 2c8038ccd..842085a36 100644
--- a/Source/Utils/WarpXAlgorithmSelection.cpp
+++ b/Source/Utils/WarpXAlgorithmSelection.cpp
@@ -8,7 +8,7 @@
 
 const std::map<std::string, int> maxwell_solver_algo_to_int = {
     {"yee",     MaxwellSolverAlgo::Yee },
-#ifndef WARPX_RZ // Not available in RZ
+#ifndef WARPX_DIM_RZ // Not available in RZ
     {"ckc",     MaxwellSolverAlgo::CKC },
 #endif
     {"default", MaxwellSolverAlgo::Yee }
@@ -31,12 +31,7 @@ const std::map<std::string, int> current_deposition_algo_to_int = {
 
 const std::map<std::string, int> charge_deposition_algo_to_int = {
     {"standard",   ChargeDepositionAlgo::Standard },
-#if (!defined AMREX_USE_GPU)&&(AMREX_SPACEDIM == 3) // Only available on CPU and 3D
-    {"vectorized", ChargeDepositionAlgo::Vectorized },
-    {"default",    ChargeDepositionAlgo::Vectorized }
-#else
     {"default",    ChargeDepositionAlgo::Standard }
-#endif
 };
 
 const std::map<std::string, int> gathering_algo_to_int = {
diff --git a/Source/WarpX.H b/Source/WarpX.H
index a25eef9e4..927cc1f32 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -152,12 +152,12 @@ public:
     BilinearFilter bilinear_filter;
     amrex::Vector< std::unique_ptr<NCIGodfreyFilter> > nci_godfrey_filter_exeybz;
     amrex::Vector< std::unique_ptr<NCIGodfreyFilter> > nci_godfrey_filter_bxbyez;
-    
+
     static int num_mirrors;
     amrex::Vector<amrex::Real> mirror_z;
     amrex::Vector<amrex::Real> mirror_z_width;
     amrex::Vector<int> mirror_z_npoints;
-    
+
     void applyMirrors(amrex::Real time);
 
     void ComputeDt ();
@@ -178,6 +178,16 @@ public:
     void EvolveE (int lev, PatchType patch_type, amrex::Real dt);
     void EvolveF (int lev, PatchType patch_type, amrex::Real dt, DtType dt_type);
 
+#ifdef WARPX_DIM_RZ
+    void ApplyInverseVolumeScalingToCurrentDensity(amrex::MultiFab* Jx,
+                                                   amrex::MultiFab* Jy,
+                                                   amrex::MultiFab* Jz,
+                                                   int lev);
+
+    void ApplyInverseVolumeScalingToChargeDensity(amrex::MultiFab* Rho,
+                                                  int lev);
+#endif
+
     void DampPML ();
     void DampPML (int lev);
     void DampPML (int lev, PatchType patch_type);
@@ -247,6 +257,7 @@ public:
 
     static int do_moving_window;
     static int moving_window_dir;
+    static amrex::Real moving_window_v;
 
     // slice generation //
     void InitializeSliceMultiFabs ();
@@ -489,17 +500,18 @@ private:
     int do_pml = 1;
     int pml_ncell = 10;
     int pml_delta = 10;
+    amrex::IntVect do_pml_Lo = amrex::IntVect::TheUnitVector();
+    amrex::IntVect do_pml_Hi = amrex::IntVect::TheUnitVector();
     amrex::Vector<std::unique_ptr<PML> > pml;
 
     amrex::Real moving_window_x = std::numeric_limits<amrex::Real>::max();
-    amrex::Real moving_window_v = std::numeric_limits<amrex::Real>::max();
     amrex::Real current_injection_position = 0;
 
     // Plasma injection parameters
     int warpx_do_continuous_injection = 0;
     int num_injected_species = -1;
     amrex::Vector<int> injected_plasma_species;
-    
+
     int do_electrostatic = 0;
     int n_buffer = 4;
     amrex::Real const_dt = 0.5e-11;
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 1f5ade13a..1b653fd7f 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -30,6 +30,7 @@ Vector<Real> WarpX::B_external(3, 0.0);
 
 int WarpX::do_moving_window = 0;
 int WarpX::moving_window_dir = -1;
+Real WarpX::moving_window_v = std::numeric_limits<amrex::Real>::max();
 
 Real WarpX::gamma_boost = 1.;
 Real WarpX::beta_boost = 0.;
@@ -334,7 +335,19 @@ WarpX::ReadParameters ()
                "The boosted frame diagnostic currently only works if the boost is in the z direction.");
 
         pp.get("num_snapshots_lab", num_snapshots_lab);
-        pp.get("dt_snapshots_lab", dt_snapshots_lab);
+
+        // Read either dz_snapshots_lab or dt_snapshots_lab
+        bool snapshot_interval_is_specified = 0;
+        Real dz_snapshots_lab = 0;
+        snapshot_interval_is_specified += pp.query("dt_snapshots_lab", dt_snapshots_lab);
+        if ( pp.query("dz_snapshots_lab", dz_snapshots_lab) ){
+            dt_snapshots_lab = dz_snapshots_lab/PhysConst::c;
+            snapshot_interval_is_specified = 1;
+        }
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
+            snapshot_interval_is_specified,
+            "When using back-transformed diagnostics, user should specify either dz_snapshots_lab or dt_snapshots_lab.");
+
         pp.get("gamma_boost", gamma_boost);
 
         pp.query("do_boosted_frame_fields", do_boosted_frame_fields);
@@ -383,6 +396,22 @@ WarpX::ReadParameters ()
         pp.query("pml_ncell", pml_ncell);
         pp.query("pml_delta", pml_delta);
 
+        Vector<int> parse_do_pml_Lo(AMREX_SPACEDIM,1);
+        pp.queryarr("do_pml_Lo", parse_do_pml_Lo);
+        do_pml_Lo[0] = parse_do_pml_Lo[0];
+        do_pml_Lo[1] = parse_do_pml_Lo[1];
+#if (AMREX_SPACEDIM == 3)
+        do_pml_Lo[2] = parse_do_pml_Lo[2];
+#endif
+        Vector<int> parse_do_pml_Hi(AMREX_SPACEDIM,1);
+        pp.queryarr("do_pml_Hi", parse_do_pml_Hi);
+        do_pml_Hi[0] = parse_do_pml_Hi[0];
+        do_pml_Hi[1] = parse_do_pml_Hi[1];
+#if (AMREX_SPACEDIM == 3)
+        do_pml_Hi[2] = parse_do_pml_Hi[2];
+#endif
+
+
         pp.query("dump_openpmd", dump_openpmd);
         pp.query("dump_plotfiles", dump_plotfiles);
         pp.query("plot_raw_fields", plot_raw_fields);
@@ -393,7 +422,7 @@ WarpX::ReadParameters ()
         if (not user_fields_to_plot){
             // If not specified, set default values
             fields_to_plot = {"Ex", "Ey", "Ez", "Bx", "By",
-                              "Bz", "jx", "jy", "jz", 
+                              "Bz", "jx", "jy", "jz",
                               "part_per_cell"};
         }
         // set plot_rho to true of the users requests it, so that
@@ -411,9 +440,9 @@ WarpX::ReadParameters ()
         // If user requests to plot proc_number for a serial run,
         // delete proc_number from fields_to_plot
         if (ParallelDescriptor::NProcs() == 1){
-            fields_to_plot.erase(std::remove(fields_to_plot.begin(), 
-                                             fields_to_plot.end(), 
-                                             "proc_number"), 
+            fields_to_plot.erase(std::remove(fields_to_plot.begin(),
+                                             fields_to_plot.end(),
+                                             "proc_number"),
                                  fields_to_plot.end());
         }
 
@@ -497,11 +526,9 @@ WarpX::ReadParameters ()
     {
         ParmParse pp("algo");
         // If not in RZ mode, read use_picsar_deposition
-        // In RZ mode, use_picsar_deposition is on, as the C++ version 
+        // In RZ mode, use_picsar_deposition is on, as the C++ version
         // of the deposition does not support RZ
-#ifndef WARPX_RZ
         pp.query("use_picsar_deposition", use_picsar_deposition);
-#endif
         current_deposition_algo = GetAlgorithmInteger(pp, "current_deposition");
         charge_deposition_algo = GetAlgorithmInteger(pp, "charge_deposition");
         field_gathering_algo = GetAlgorithmInteger(pp, "field_gathering");
@@ -876,6 +903,21 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm
             rho_cp[lev].reset(new MultiFab(amrex::convert(cba,IntVect::TheUnitVector()),dm,2,ngRho));
             rho_cp_owner_masks[lev] = std::move(rho_cp[lev]->OwnerMask(cperiod));
         }
+        if (fft_hybrid_mpi_decomposition == false){
+            // Allocate and initialize the spectral solver
+            std::array<Real,3> cdx = CellSize(lev-1);
+    #if (AMREX_SPACEDIM == 3)
+            RealVect cdx_vect(cdx[0], cdx[1], cdx[2]);
+    #elif (AMREX_SPACEDIM == 2)
+            RealVect cdx_vect(cdx[0], cdx[2]);
+    #endif
+            // Get the cell-centered box, with guard cells
+            BoxArray realspace_ba = cba;  // Copy box
+            realspace_ba.enclosedCells().grow(ngE); // cell-centered + guard cells
+            // Define spectral solver
+            spectral_solver_cp[lev].reset( new SpectralSolver( realspace_ba, dm,
+                nox_fft, noy_fft, noz_fft, do_nodal, cdx_vect, dt[lev] ) );
+        }
 #endif
     }
 
@@ -907,7 +949,7 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm
             current_buf[lev][0].reset( new MultiFab(amrex::convert(cba,jx_nodal_flag),dm,1,ngJ));
             current_buf[lev][1].reset( new MultiFab(amrex::convert(cba,jy_nodal_flag),dm,1,ngJ));
             current_buf[lev][2].reset( new MultiFab(amrex::convert(cba,jz_nodal_flag),dm,1,ngJ));
-            if (do_dive_cleaning || plot_rho) {
+            if (rho_cp[lev]) {
                 charge_buf[lev].reset( new MultiFab(amrex::convert(cba,IntVect::TheUnitVector()),dm,2,ngRho));
             }
             current_buffer_masks[lev].reset( new iMultiFab(ba, dm, 1, 1) );
@@ -995,7 +1037,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp,
 {
     Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2];
 
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     const Real rmin = GetInstance().Geom(0).ProbLo(0);
 #endif
 
@@ -1014,7 +1056,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp,
         [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
         {
             warpx_computedivb(i, j, k, dcomp, divBfab, Bxfab, Byfab, Bzfab, dxinv, dyinv, dzinv
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
                               ,rmin
 #endif
                               );
@@ -1029,7 +1071,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp,
 {
     Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2];
 
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     const Real rmin = GetInstance().Geom(0).ProbLo(0);
 #endif
 
@@ -1048,7 +1090,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp,
         [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
         {
             warpx_computedivb(i, j, k, dcomp, divBfab, Bxfab, Byfab, Bzfab, dxinv, dyinv, dzinv
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
                               ,rmin
 #endif
                               );
@@ -1063,7 +1105,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp,
 {
     Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2];
 
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     const Real rmin = GetInstance().Geom(0).ProbLo(0);
 #endif
 
@@ -1082,7 +1124,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp,
         [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
         {
             warpx_computedive(i, j, k, dcomp, divEfab, Exfab, Eyfab, Ezfab, dxinv, dyinv, dzinv
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
                               ,rmin
 #endif
                               );
@@ -1097,7 +1139,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp,
 {
     Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2];
 
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     const Real rmin = GetInstance().Geom(0).ProbLo(0);
 #endif
 
@@ -1116,7 +1158,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp,
         [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
         {
             warpx_computedive(i, j, k, dcomp, divEfab, Exfab, Eyfab, Ezfab, dxinv, dyinv, dzinv
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
                               ,rmin
 #endif
                               );