12 files changed, 5291 insertions, 0 deletions
diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package
new file mode 100644
index 000000000..acbfe3390
--- /dev/null
+++ b/Source/Particles/Make.package
@@ -0,0 +1,14 @@
+F90EXE_sources += deposit_cic.F90
+F90EXE_sources += interpolate_cic.F90
+F90EXE_sources += push_particles_ES.F90
+CEXE_sources += PhysicalParticleContainer.cpp
+CEXE_sources += MultiParticleContainer.cpp
+CEXE_sources += WarpXParticleContainer.cpp
+CEXE_sources += RigidInjectedParticleContainer.cpp
+CEXE_headers += MultiParticleContainer.H
+CEXE_headers += WarpXParticleContainer.H
+CEXE_headers += RigidInjectedParticleContainer.H
+CEXE_headers += PhysicalParticleContainer.H
+
+INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles
+VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Particles
diff --git a/Source/Particles/MultiParticleContainer.H b/Source/Particles/MultiParticleContainer.H
new file mode 100644
index 000000000..b21247ff6
--- /dev/null
+++ b/Source/Particles/MultiParticleContainer.H
@@ -0,0 +1,209 @@
+
+#ifndef WARPX_ParticleContainer_H_
+#define WARPX_ParticleContainer_H_
+
+#include <memory>
+#include <map>
+#include <string>
+
+#include <AMReX_Particles.H>
+
+#include <WarpXParticleContainer.H>
+#include <PhysicalParticleContainer.H>
+#include <RigidInjectedParticleContainer.H>
+#include <LaserParticleContainer.H>
+
+//
+// MultiParticleContainer holds multiple (nspecies or npsecies+1 when
+// using laser) WarpXParticleContainer.  WarpXParticleContainer is
+// derived from amrex::ParticleContainer, and it is
+// polymorphic. PhysicalParticleContainer and LaserParticleContainer are
+// concrete class dervied from WarpXParticlContainer.
+//
+
+class MultiParticleContainer
+{
+
+public:
+
+    MultiParticleContainer (amrex::AmrCore* amr_core);
+
+    ~MultiParticleContainer() {}
+
+    WarpXParticleContainer& GetParticleContainer (int ispecies) {
+	return *allcontainers[ispecies];
+    }
+
+    std::array<amrex::Real, 3> meanParticleVelocity(int ispecies) {
+        return allcontainers[ispecies]->meanParticleVelocity();
+    }
+
+    void AllocData ();
+
+    void InitData ();
+
+#ifdef WARPX_DO_ELECTROSTATIC
+    ///
+    /// Performs the field gather operation using the input field E, for all the species
+    /// in the MultiParticleContainer. This is the electrostatic version of the field gather.
+    ///
+    void FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E,
+                        const amrex::Vector<std::unique_ptr<amrex::FabArray<amrex::BaseFab<int> > > >& masks);
+
+    ///
+    /// This evolves all the particles by one PIC time step, including charge deposition, the 
+    /// field solve, and pushing the particles, for all the species in the MultiParticleContainer.
+    /// This is the electrostatic version.
+    ///
+    void EvolveES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E, 
+                         amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rho, 
+                   amrex::Real t, amrex::Real dt);
+
+    ///
+    /// This pushes the particle positions by one half time step for all the species in the 
+    /// MultiParticleContainer. It is used to desynchronize the particles after initializaton
+    /// or when restarting from a checkpoint. This is the electrostatic version.
+    /// 
+    void PushXES (amrex::Real dt);
+
+    ///
+    /// This deposits the particle charge onto rho, accumulating the value for all the species
+    /// in the MultiParticleContainer. rho is assumed to contain node-centered multifabs. 
+    /// This version is hard-coded for CIC deposition.
+    ///
+    void DepositCharge(amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rho, bool local = false);
+
+    ///
+    /// This returns the total particle charge for all the species in this MultiParticleContainer.
+    /// This is needed to subtract the offset for periodic boundary conditions.
+    ///
+    amrex::Real sumParticleCharge(bool local = false);
+#endif // WARPX_DO_ELECTROSTATIC
+    
+    ///
+    /// Performs the field gather operation using the input fields E and B, for all the species
+    /// in the MultiParticleContainer. This is the electromagnetic version of the field gather.
+    ///
+    void FieldGather (int lev,
+                      const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, const amrex::MultiFab& Ez,
+                      const amrex::MultiFab& Bx, const amrex::MultiFab& By, const amrex::MultiFab& Bz); 
+
+    ///
+    /// This evolves all the particles by one PIC time step, including current deposition, the
+    /// field solve, and pushing the particles, for all the species in the MultiParticleContainer.
+    /// This is the electromagnetic version.
+    ///
+    void Evolve (int lev,
+		 const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, const amrex::MultiFab& Ez,
+		 const amrex::MultiFab& Bx, const amrex::MultiFab& By, const amrex::MultiFab& Bz,
+		 amrex::MultiFab& jx,  amrex::MultiFab& jy, amrex::MultiFab& jz, 
+		 amrex::MultiFab* cjx,  amrex::MultiFab* cjy, amrex::MultiFab* cjz, 
+                 amrex::MultiFab* rho, amrex::MultiFab* crho,
+		 const amrex::MultiFab* cEx, const amrex::MultiFab* cEy, const amrex::MultiFab* cEz,
+		 const amrex::MultiFab* cBx, const amrex::MultiFab* cBy, const amrex::MultiFab* cBz,
+                 amrex::Real t, amrex::Real dt);
+
+    ///
+    /// This pushes the particle positions by one half time step for all the species in the
+    /// MultiParticleContainer. It is used to desynchronize the particles after initializaton
+    /// or when restarting from a checkpoint. This is the electromagnetic version.
+    /// 
+    void PushX (amrex::Real dt);
+
+    ///
+    /// This pushes the particle momenta by dt for all the species in the
+    /// MultiParticleContainer. It is used to desynchronize the particles after initializaton
+    /// or when restarting from a checkpoint.  It is also used to synchronize particles at the
+    /// the end of the run.  This is the electromagnetic version.
+    /// 
+    void PushP (int lev, amrex::Real dt,
+                const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, const amrex::MultiFab& Ez,
+                const amrex::MultiFab& Bx, const amrex::MultiFab& By, const amrex::MultiFab& Bz);
+
+    ///
+    /// This deposits the particle charge onto a node-centered MultiFab and returns a unique ptr
+    /// to it. The charge density is accumulated over all the particles in the MultiParticleContainer
+    /// This version uses PICSAR routines to perform the deposition.
+    ///    
+    std::unique_ptr<amrex::MultiFab> GetChargeDensity(int lev, bool local = false);
+
+    void Checkpoint (const std::string& dir,
+		     bool is_checkpoint,
+                     const amrex::Vector<std::string>& varnames = amrex::Vector<std::string>()) const;
+
+    void Restart (const std::string& dir);
+
+    void PostRestart ();
+
+    void ReadHeader (std::istream& is);
+
+    void WriteHeader (std::ostream& os) const;
+
+    void SortParticlesByCell ();
+
+    void Redistribute ();
+
+    void RedistributeLocal (const int num_ghost);
+
+    amrex::Vector<long> NumberOfParticlesInGrid(int lev) const;
+
+    void Increment (amrex::MultiFab& mf, int lev);
+
+    void SetParticleBoxArray (int lev, amrex::BoxArray& new_ba);
+    void SetParticleDistributionMap (int lev, amrex::DistributionMapping& new_dm);
+
+    int nSpecies() const {return nspecies;}
+
+    int nSpeciesDepositOnMainGrid () const {
+        int r = 0;
+        for (int i : deposit_on_main_grid) {
+            if (i) ++r;
+        }
+        return r;
+    }
+
+    void GetLabFrameData(const std::string& snapshot_name,
+                         const int i_lab, const int direction,
+                         const amrex::Real z_old, const amrex::Real z_new,
+                         const amrex::Real t_boost, const amrex::Real t_lab, const amrex::Real dt,
+                         amrex::Vector<WarpXParticleContainer::DiagnosticParticleData>& parts) const;
+    
+    // 
+    // Parameters for the Cherenkov corrector in the FDTD solver.
+    // Both stencils are calculated ar runtime.
+    // 
+    // Number of coefficients for the stencil of the NCI corrector.
+    // The stencil is applied in the z direction only.
+    static constexpr int nstencilz_fdtd_nci_corr=5;
+
+    amrex::Vector<amrex::Array<amrex::Real, nstencilz_fdtd_nci_corr> > fdtd_nci_stencilz_ex;
+    amrex::Vector<amrex::Array<amrex::Real, nstencilz_fdtd_nci_corr> > fdtd_nci_stencilz_by;
+
+    std::vector<std::string> GetSpeciesNames() const { return species_names; }
+
+    PhysicalParticleContainer& GetPCtmp () { return *pc_tmp; }
+    
+protected:
+
+    // Particle container types
+    enum struct PCTypes {Physical, RigidInjected};
+
+    std::vector<std::string> species_names;
+
+    std::vector<int> deposit_on_main_grid;
+
+    std::vector<PCTypes> species_types;
+
+private:
+
+    // physical particles (+ laser)
+    amrex::Vector<std::unique_ptr<WarpXParticleContainer> > allcontainers;
+    // Temporary particle container, used e.g. for particle splitting.
+    std::unique_ptr<PhysicalParticleContainer> pc_tmp;
+
+    void ReadParameters ();
+
+    // runtime parameters
+    int nspecies = 1;   // physical particles only. If WarpX::use_laser, nspecies+1 == allcontainers.size().
+};
+#endif /*WARPX_ParticleContainer_H_*/
diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp
new file mode 100644
index 000000000..1b644b543
--- /dev/null
+++ b/Source/Particles/MultiParticleContainer.cpp
@@ -0,0 +1,389 @@
+#include <limits>
+#include <algorithm>
+#include <string>
+
+#include <MultiParticleContainer.H>
+#include <WarpX_f.H>
+#include <WarpX.H>
+
+using namespace amrex;
+
+constexpr int MultiParticleContainer::nstencilz_fdtd_nci_corr;
+
+MultiParticleContainer::MultiParticleContainer (AmrCore* amr_core)
+{
+    ReadParameters();
+
+    int n = WarpX::use_laser ? nspecies+1 : nspecies;
+    allcontainers.resize(n);
+    for (int i = 0; i < nspecies; ++i) {
+        if (species_types[i] == PCTypes::Physical) {
+            allcontainers[i].reset(new PhysicalParticleContainer(amr_core, i, species_names[i]));
+        }
+        else if (species_types[i] == PCTypes::RigidInjected) {
+            allcontainers[i].reset(new RigidInjectedParticleContainer(amr_core, i, species_names[i]));
+        }
+        allcontainers[i]->deposit_on_main_grid = deposit_on_main_grid[i];
+    }
+    if (WarpX::use_laser) {
+	allcontainers[n-1].reset(new LaserParticleContainer(amr_core,n-1));
+    }
+    pc_tmp.reset(new PhysicalParticleContainer(amr_core));	
+}
+
+void
+MultiParticleContainer::ReadParameters ()
+{
+    static bool initialized = false;
+    if (!initialized)
+    {
+	ParmParse pp("particles");
+
+	pp.query("nspecies", nspecies);
+	BL_ASSERT(nspecies >= 0);
+
+        if (nspecies > 0) {
+            pp.getarr("species_names", species_names);
+            BL_ASSERT(species_names.size() == nspecies);
+
+            deposit_on_main_grid.resize(nspecies, 0);
+            std::vector<std::string> tmp;
+            pp.queryarr("deposit_on_main_grid", tmp);
+            for (auto const& name : tmp) {
+                auto it = std::find(species_names.begin(), species_names.end(), name);
+                AMREX_ALWAYS_ASSERT_WITH_MESSAGE(it != species_names.end(), "ERROR: species in particles.deposit_on_main_grid must be part of particles.species_names");
+                int i = std::distance(species_names.begin(), it);
+                deposit_on_main_grid[i] = 1;
+            }
+
+            species_types.resize(nspecies, PCTypes::Physical);
+
+            std::vector<std::string> rigid_injected_species;
+            pp.queryarr("rigid_injected_species", rigid_injected_species);
+
+            if (!rigid_injected_species.empty()) {
+                for (auto const& name : rigid_injected_species) {
+                    auto it = std::find(species_names.begin(), species_names.end(), name);
+                    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(it != species_names.end(), "ERROR: species in particles.rigid_injected_species must be part of particles.species_names");
+                    int i = std::distance(species_names.begin(), it);
+                    species_types[i] = PCTypes::RigidInjected;
+                }
+            }
+        }
+	pp.query("use_fdtd_nci_corr", WarpX::use_fdtd_nci_corr);
+	pp.query("l_lower_order_in_v", WarpX::l_lower_order_in_v);
+	initialized = true;
+    }
+}
+
+void
+MultiParticleContainer::AllocData ()
+{
+    for (auto& pc : allcontainers) {
+	pc->AllocData();
+    }
+    pc_tmp->AllocData();
+}
+
+void
+MultiParticleContainer::InitData ()
+{
+    for (auto& pc : allcontainers) {
+	pc->InitData();
+    }
+    pc_tmp->InitData();
+}
+
+
+#ifdef WARPX_DO_ELECTROSTATIC
+void
+MultiParticleContainer::FieldGatherES (const Vector<std::array<std::unique_ptr<MultiFab>, 3> >& E,
+                                       const amrex::Vector<std::unique_ptr<amrex::FabArray<amrex::BaseFab<int> > > >& masks)
+{
+    for (auto& pc : allcontainers) {
+	pc->FieldGatherES(E, masks);
+    }
+}
+
+void
+MultiParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<MultiFab>, 3> >& E,
+                                        Vector<std::unique_ptr<MultiFab> >& rho,
+                                  Real t, Real dt)
+{
+
+    int nlevs = rho.size();
+    int ng = rho[0]->nGrow();
+
+    for (unsigned i = 0; i < nlevs; i++) {
+        rho[i]->setVal(0.0, ng);
+    }
+
+    for (auto& pc : allcontainers) {
+	pc->EvolveES(E, rho, t, dt);
+    }
+
+    for (unsigned i = 0; i < nlevs; i++) {
+        const Geometry& gm = allcontainers[0]->Geom(i);
+        rho[i]->SumBoundary(gm.periodicity());
+    }
+}
+
+void
+MultiParticleContainer::Evolve (int lev,
+                                const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
+                                const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz,
+                                MultiFab& jx, MultiFab& jy, MultiFab& jz,
+                                MultiFab* cjx,  MultiFab* cjy, MultiFab* cjz, 
+                                MultiFab* rho,
+                                const MultiFab* cEx, const MultiFab* cEy, const MultiFab* cEz,
+                                const MultiFab* cBx, const MultiFab* cBy, const MultiFab* cBz,
+                                Real t, Real dt)
+{
+    jx.setVal(0.0);
+    jy.setVal(0.0);
+    jz.setVal(0.0);
+    if (cjx) cjx->setVal(0.0);
+    if (cjy) cjy->setVal(0.0);
+    if (cjz) cjz->setVal(0.0);
+    if (rho) rho->setVal(0.0);
+    for (auto& pc : allcontainers) {
+	pc->Evolve(lev, Ex, Ey, Ez, Bx, By, Bz, jx, jy, jz, cjx, cjy, cjz,
+                   rho, cEx, cEy, cEz, cBx, cBy, cBz, t, dt);
+    }    
+}
+
+void
+MultiParticleContainer::PushXES (Real dt)
+{
+    for (auto& pc : allcontainers) {
+	pc->PushXES(dt);
+    }
+}
+
+void
+MultiParticleContainer::
+DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, bool local)
+{
+    int nlevs = rho.size();
+    int ng = rho[0]->nGrow();
+
+    for (unsigned i = 0; i < nlevs; i++) {
+        rho[i]->setVal(0.0, ng);
+    }
+
+    for (unsigned i = 0, n = allcontainers.size(); i < n; ++i) {
+	allcontainers[i]->DepositCharge(rho, true);
+    }
+
+    if (!local) {
+        for (unsigned i = 0; i < nlevs; i++) {
+            const Geometry& gm = allcontainers[0]->Geom(i);
+            rho[i]->SumBoundary(gm.periodicity());
+        }
+    }
+}
+
+amrex::Real
+MultiParticleContainer::sumParticleCharge (bool local)
+{
+    amrex::Real total_charge = allcontainers[0]->sumParticleCharge(local);
+    for (unsigned i = 1, n = allcontainers.size(); i < n; ++i) {
+        total_charge += allcontainers[i]->sumParticleCharge(local);
+    }
+    return total_charge;
+}
+
+#endif // WARPX_DO_ELECTROSTATIC
+
+void
+MultiParticleContainer::FieldGather (int lev,
+                                     const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
+                                     const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz)
+{
+    for (auto& pc : allcontainers) {
+	pc->FieldGather(lev, Ex, Ey, Ez, Bx, By, Bz);
+    }
+}
+
+void
+MultiParticleContainer::Evolve (int lev,
+                                const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
+                                const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz,
+                                MultiFab& jx, MultiFab& jy, MultiFab& jz,
+                                MultiFab* cjx,  MultiFab* cjy, MultiFab* cjz,
+                                MultiFab* rho, MultiFab* crho,
+                                const MultiFab* cEx, const MultiFab* cEy, const MultiFab* cEz,
+                                const MultiFab* cBx, const MultiFab* cBy, const MultiFab* cBz,
+                                Real t, Real dt)
+{
+    jx.setVal(0.0);
+    jy.setVal(0.0);
+    jz.setVal(0.0);
+    if (cjx) cjx->setVal(0.0);
+    if (cjy) cjy->setVal(0.0);
+    if (cjz) cjz->setVal(0.0);
+    if (rho) rho->setVal(0.0);
+    if (crho) crho->setVal(0.0);
+    for (auto& pc : allcontainers) {
+	pc->Evolve(lev, Ex, Ey, Ez, Bx, By, Bz, jx, jy, jz, cjx, cjy, cjz,
+                   rho, crho, cEx, cEy, cEz, cBx, cBy, cBz, t, dt);
+    }
+}
+
+void
+MultiParticleContainer::PushX (Real dt)
+{
+    for (auto& pc : allcontainers) {
+	pc->PushX(dt);
+    }
+}
+
+void
+MultiParticleContainer::PushP (int lev, Real dt,
+                               const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
+                               const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz)
+{
+    for (auto& pc : allcontainers) {
+       pc->PushP(lev, dt, Ex, Ey, Ez, Bx, By, Bz);
+    }
+}
+
+std::unique_ptr<MultiFab>
+MultiParticleContainer::GetChargeDensity (int lev, bool local)
+{
+    std::unique_ptr<MultiFab> rho = allcontainers[0]->GetChargeDensity(lev, true);
+    for (unsigned i = 1, n = allcontainers.size(); i < n; ++i) {
+	std::unique_ptr<MultiFab> rhoi = allcontainers[i]->GetChargeDensity(lev, true);
+	MultiFab::Add(*rho, *rhoi, 0, 0, 1, rho->nGrow());
+    }
+    if (!local) {
+	const Geometry& gm = allcontainers[0]->Geom(lev);
+	rho->SumBoundary(gm.periodicity());
+    }
+    return rho;
+}
+
+void
+MultiParticleContainer::SortParticlesByCell ()
+{
+    for (auto& pc : allcontainers) {
+	pc->SortParticlesByCell();
+    }
+}
+
+void
+MultiParticleContainer::Redistribute ()
+{
+    for (auto& pc : allcontainers) {
+	pc->Redistribute();
+    }
+}
+
+void
+MultiParticleContainer::RedistributeLocal (const int num_ghost)
+{
+    for (auto& pc : allcontainers) {
+	pc->Redistribute(0, 0, 0, num_ghost);
+    }
+}
+
+Vector<long>
+MultiParticleContainer::NumberOfParticlesInGrid(int lev) const
+{
+    const bool only_valid=true, only_local=true;
+    Vector<long> r = allcontainers[0]->NumberOfParticlesInGrid(lev,only_valid,only_local);
+    for (unsigned i = 1, n = allcontainers.size(); i < n; ++i) {
+	const auto& ri = allcontainers[i]->NumberOfParticlesInGrid(lev,only_valid,only_local);
+	for (unsigned j=0, m=ri.size(); j<m; ++j) {
+	    r[j] += ri[j];
+	}
+    }
+    ParallelDescriptor::ReduceLongSum(r.data(),r.size());
+    return r;
+}
+
+void
+MultiParticleContainer::Increment (MultiFab& mf, int lev)
+{
+    for (auto& pc : allcontainers) {
+	pc->Increment(mf,lev);
+    }
+}
+
+void
+MultiParticleContainer::SetParticleBoxArray (int lev, BoxArray& new_ba)
+{
+    for (auto& pc : allcontainers) {
+	pc->SetParticleBoxArray(lev,new_ba);
+    }
+}
+
+void
+MultiParticleContainer::SetParticleDistributionMap (int lev, DistributionMapping& new_dm)
+{
+    for (auto& pc : allcontainers) {
+	pc->SetParticleDistributionMap(lev,new_dm);
+    }
+}
+
+void
+MultiParticleContainer::PostRestart ()
+{
+    for (auto& pc : allcontainers) {
+	pc->PostRestart();
+    }
+    pc_tmp->PostRestart();
+}
+
+void
+MultiParticleContainer
+::GetLabFrameData(const std::string& snapshot_name,
+                  const int i_lab, const int direction,
+                  const Real z_old, const Real z_new,
+                  const Real t_boost, const Real t_lab, const Real dt,
+                  Vector<WarpXParticleContainer::DiagnosticParticleData>& parts) const
+{
+
+    BL_PROFILE("MultiParticleContainer::GetLabFrameData");
+    
+    for (int i = 0; i < nspecies; ++i)
+    {
+        WarpXParticleContainer* pc = allcontainers[i].get();
+        WarpXParticleContainer::DiagnosticParticles diagnostic_particles;
+        pc->GetParticleSlice(direction, z_old, z_new, t_boost, t_lab, dt, diagnostic_particles);
+
+        for (int lev = 0; lev <= pc->finestLevel(); ++lev)
+        {
+            for (auto it = diagnostic_particles[lev].begin(); it != diagnostic_particles[lev].end(); ++it)
+            {
+                parts[i].GetRealData(DiagIdx::w).insert(  parts[i].GetRealData(DiagIdx::w  ).end(),
+                                                          it->second.GetRealData(DiagIdx::w  ).begin(),
+                                                          it->second.GetRealData(DiagIdx::w  ).end());
+                
+                parts[i].GetRealData(DiagIdx::x).insert(  parts[i].GetRealData(DiagIdx::x  ).end(),
+                                                          it->second.GetRealData(DiagIdx::x  ).begin(),
+                                                          it->second.GetRealData(DiagIdx::x  ).end());
+                
+                parts[i].GetRealData(DiagIdx::y).insert(  parts[i].GetRealData(DiagIdx::y  ).end(),
+                                                          it->second.GetRealData(DiagIdx::y  ).begin(),
+                                                          it->second.GetRealData(DiagIdx::y  ).end());
+                
+                parts[i].GetRealData(DiagIdx::z).insert(  parts[i].GetRealData(DiagIdx::z  ).end(),
+                                                          it->second.GetRealData(DiagIdx::z  ).begin(),
+                                                          it->second.GetRealData(DiagIdx::z  ).end());
+                
+                parts[i].GetRealData(DiagIdx::ux).insert(  parts[i].GetRealData(DiagIdx::ux).end(),
+                                                           it->second.GetRealData(DiagIdx::ux).begin(),
+                                                           it->second.GetRealData(DiagIdx::ux).end());
+                
+                parts[i].GetRealData(DiagIdx::uy).insert(  parts[i].GetRealData(DiagIdx::uy).end(),
+                                                           it->second.GetRealData(DiagIdx::uy).begin(),
+                                                           it->second.GetRealData(DiagIdx::uy).end());
+                
+                parts[i].GetRealData(DiagIdx::uz).insert(  parts[i].GetRealData(DiagIdx::uz).end(),
+                                                           it->second.GetRealData(DiagIdx::uz).begin(),
+                                                           it->second.GetRealData(DiagIdx::uz).end());
+            }
+        }
+    }
+}
diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H
new file mode 100644
index 000000000..5847a6359
--- /dev/null
+++ b/Source/Particles/PhysicalParticleContainer.H
@@ -0,0 +1,127 @@
+#ifndef WARPX_PhysicalParticleContainer_H_
+#define WARPX_PhysicalParticleContainer_H_
+
+#include <map>
+
+#include <AMReX_IArrayBox.H>
+
+#include <PlasmaInjector.H>
+#include <WarpXParticleContainer.H>
+
+class PhysicalParticleContainer
+    : public WarpXParticleContainer
+{
+public:
+    PhysicalParticleContainer (amrex::AmrCore* amr_core,
+                               int ispecies,
+                               const std::string& name);
+
+    PhysicalParticleContainer (amrex::AmrCore* amr_core);
+
+    virtual ~PhysicalParticleContainer () {}
+
+    virtual void InitData () override;
+
+#ifdef WARPX_DO_ELECTROSTATIC    
+    virtual void FieldGatherES(const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E,
+                               const amrex::Vector<std::unique_ptr<amrex::FabArray<amrex::BaseFab<int> > > >& masks) override;
+
+    virtual void EvolveES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E,
+                                 amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rho,
+                           amrex::Real t, amrex::Real dt) override;
+#endif // WARPX_DO_ELECTROSTATIC
+    
+    virtual void FieldGather(int lev,
+                             const amrex::MultiFab& Ex,
+                             const amrex::MultiFab& Ey,
+                             const amrex::MultiFab& Ez,
+                             const amrex::MultiFab& Bx,
+                             const amrex::MultiFab& By,
+                             const amrex::MultiFab& Bz) final;
+
+    virtual void Evolve (int lev,
+			 const amrex::MultiFab& Ex,
+                         const amrex::MultiFab& Ey,
+                         const amrex::MultiFab& Ez,
+			 const amrex::MultiFab& Bx,
+                         const amrex::MultiFab& By,
+                         const amrex::MultiFab& Bz,
+			 amrex::MultiFab& jx,
+                         amrex::MultiFab& jy,
+                         amrex::MultiFab& jz,
+                         amrex::MultiFab* cjx,
+                         amrex::MultiFab* cjy,
+                         amrex::MultiFab* cjz,
+                         amrex::MultiFab* rho,
+                         amrex::MultiFab* crho,
+                         const amrex::MultiFab* cEx,
+                         const amrex::MultiFab* cEy,
+                         const amrex::MultiFab* cEz,
+                         const amrex::MultiFab* cBx,
+                         const amrex::MultiFab* cBy,
+                         const amrex::MultiFab* cBz,
+                         amrex::Real t,
+                         amrex::Real dt) override;
+
+    virtual void PushPX(WarpXParIter& pti,
+	                amrex::Cuda::DeviceVector<amrex::Real>& xp,
+                        amrex::Cuda::DeviceVector<amrex::Real>& yp,
+                        amrex::Cuda::DeviceVector<amrex::Real>& zp,
+                        amrex::Cuda::DeviceVector<amrex::Real>& giv,
+                        amrex::Real dt);
+
+    virtual void PushP (int lev, amrex::Real dt,
+                        const amrex::MultiFab& Ex,
+                        const amrex::MultiFab& Ey,
+                        const amrex::MultiFab& Ez,
+                        const amrex::MultiFab& Bx,
+                        const amrex::MultiFab& By,
+                        const amrex::MultiFab& Bz) override;
+
+    virtual void PostRestart () final {}
+
+    void SplitParticles(int lev);
+
+    // Inject particles in Box 'part_box'
+    virtual void AddParticles (int lev);
+    void AddPlasma(int lev, amrex::RealBox part_realbox = amrex::RealBox());
+    void AddPlasmaCPU (int lev, amrex::RealBox part_realbox);
+#ifdef AMREX_USE_GPU
+    void AddPlasmaGPU (int lev, amrex::RealBox part_realbox);
+#endif
+
+    void MapParticletoBoostedFrame(amrex::Real& x, amrex::Real& y, amrex::Real& z, std::array<amrex::Real, 3>& u);
+
+    void AddGaussianBeam(amrex::Real x_m, amrex::Real y_m, amrex::Real z_m,
+                         amrex::Real x_rms, amrex::Real y_rms, amrex::Real z_rms,
+                         amrex::Real q_tot, long npart);
+
+    virtual void GetParticleSlice(const int direction, const amrex::Real z_old,
+                                  const amrex::Real z_new, const amrex::Real t_boost, 
+                                  const amrex::Real t_lab, const amrex::Real dt,
+                                  DiagnosticParticles& diagnostic_particles) final;
+
+    bool injected = false;
+    
+protected:
+
+    std::string species_name;
+    std::unique_ptr<PlasmaInjector> plasma_injector;
+
+    // When true, adjust the transverse particle positions accounting
+    // for the difference between the Lorentz transformed time of the
+    // particle and the time of the boosted frame.
+    bool boost_adjust_transverse_positions = false;
+    bool do_backward_propagation = false;
+
+    long NumParticlesToAdd (const amrex::Box& overlap_box,
+			    const amrex::RealBox& overlap_realbox,
+			    const amrex::RealBox& tile_real_box,
+			    const amrex::RealBox& particle_real_box);
+  
+    int GetRefineFac(const amrex::Real x, const amrex::Real y, const amrex::Real z);
+    std::unique_ptr<amrex::IArrayBox> m_refined_injection_mask = nullptr;
+
+};
+
+#endif
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
new file mode 100644
index 000000000..b534de916
--- /dev/null
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -0,0 +1,1941 @@
+#include <limits>
+#include <sstream>
+
+#include <MultiParticleContainer.H>
+#include <WarpX_f.H>
+#include <WarpX.H>
+#include <WarpXConst.H>
+#include <WarpXWrappers.h>
+
+
+using namespace amrex;
+
+long PhysicalParticleContainer::
+NumParticlesToAdd(const Box& overlap_box, const RealBox& overlap_realbox,
+		  const RealBox& tile_realbox, const RealBox& particle_real_box)
+{
+    const int lev = 0;
+    const Geometry& geom = Geom(lev);
+    int num_ppc = plasma_injector->num_particles_per_cell;
+    const Real* dx = geom.CellSize();
+
+    long np = 0;
+    const auto& overlap_corner = overlap_realbox.lo();
+    for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv))
+    {
+        int fac;
+	if (injected) {
+#if ( AMREX_SPACEDIM == 3 )
+	    Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
+	    Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
+	    Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
+#elif ( AMREX_SPACEDIM == 2 )
+	    Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
+	    Real y = 0;
+	    Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
+#endif
+	    fac = GetRefineFac(x, y, z);
+	} else {
+	    fac = 1.0;
+	}
+	
+	int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
+	for (int i_part=0; i_part<ref_num_ppc;i_part++) {
+	    std::array<Real, 3> r;
+	    plasma_injector->getPositionUnitBox(r, i_part, fac);
+#if ( AMREX_SPACEDIM == 3 )
+	    Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
+	    Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
+	    Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
+#elif ( AMREX_SPACEDIM == 2 )
+	    Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
+	    Real y = 0;
+	    Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
+#endif
+	    // If the new particle is not inside the tile box,
+	    // go to the next generated particle.
+#if ( AMREX_SPACEDIM == 3 )
+	    if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
+#elif ( AMREX_SPACEDIM == 2 )
+	    if(!tile_realbox.contains( RealVect{x, z} )) continue;
+#endif
+	    ++np;
+	}
+    }
+    
+    return np;
+}
+
+PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int ispecies,
+                                                      const std::string& name)
+    : WarpXParticleContainer(amr_core, ispecies),
+      species_name(name)
+{
+    plasma_injector.reset(new PlasmaInjector(species_id, species_name));
+    charge = plasma_injector->getCharge();
+    mass = plasma_injector->getMass();
+
+    ParmParse pp(species_name);
+
+    pp.query("boost_adjust_transverse_positions", boost_adjust_transverse_positions);
+    pp.query("do_backward_propagation", do_backward_propagation);
+    pp.query("do_splitting", do_splitting);
+    pp.query("split_type", split_type);
+}
+
+PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core)
+    : WarpXParticleContainer(amr_core, 0)
+{
+    plasma_injector.reset(new PlasmaInjector());
+}
+
+void PhysicalParticleContainer::InitData()
+{
+    AddParticles(0); // Note - add on level 0
+    if (maxLevel() > 0) {
+        Redistribute();  // We then redistribute
+    }
+}
+
+void PhysicalParticleContainer::MapParticletoBoostedFrame(Real& x, Real& y, Real& z, std::array<Real, 3>& u)
+{
+    // Map the particles from the lab frame to the boosted frame.
+    // This boosts the particle to the lab frame and calculates
+    // the particle time in the boosted frame. It then maps
+    // the position to the time in the boosted frame.
+
+    // For now, start with the assumption that this will only happen
+    // at the start of the simulation.
+    const Real t_lab = 0.;
+
+    const Real uz_boost = WarpX::gamma_boost*WarpX::beta_boost*PhysConst::c;
+
+    // tpr is the particle's time in the boosted frame
+    Real tpr = WarpX::gamma_boost*t_lab - uz_boost*z/(PhysConst::c*PhysConst::c);
+
+    // The particle's transformed location in the boosted frame
+    Real xpr = x;
+    Real ypr = y;
+    Real zpr = WarpX::gamma_boost*z - uz_boost*t_lab;
+
+    // transform u and gamma to the boosted frame
+    Real gamma_lab = std::sqrt(1. + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(PhysConst::c*PhysConst::c));
+    // u[0] = u[0];
+    // u[1] = u[1];
+    u[2] = WarpX::gamma_boost*u[2] - uz_boost*gamma_lab;
+    Real gammapr = std::sqrt(1. + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(PhysConst::c*PhysConst::c));
+
+    Real vxpr = u[0]/gammapr;
+    Real vypr = u[1]/gammapr;
+    Real vzpr = u[2]/gammapr;
+
+    if (do_backward_propagation){
+        u[2] = -u[2];
+    }
+
+    // Move the particles to where they will be at t = 0 in the boosted frame
+    if (boost_adjust_transverse_positions) {
+      x = xpr - tpr*vxpr;
+      y = ypr - tpr*vypr;
+    }
+
+    z = zpr - tpr*vzpr;
+
+}
+
+void
+PhysicalParticleContainer::AddGaussianBeam(Real x_m, Real y_m, Real z_m,
+                                           Real x_rms, Real y_rms, Real z_rms,
+                                           Real q_tot, long npart) {
+
+    const Geometry& geom     = m_gdb->Geom(0);
+    RealBox containing_bx = geom.ProbDomain();
+
+    std::mt19937_64 mt(0451);
+    std::normal_distribution<double> distx(x_m, x_rms);
+    std::normal_distribution<double> disty(y_m, y_rms);
+    std::normal_distribution<double> distz(z_m, z_rms);
+
+    std::array<Real,PIdx::nattribs> attribs;
+    attribs.fill(0.0);
+
+    if (ParallelDescriptor::IOProcessor()) {
+       std::array<Real, 3> u;
+       Real weight;
+       for (long i = 0; i < npart; ++i) {
+#if ( AMREX_SPACEDIM == 3 | WARPX_RZ)
+            weight = q_tot/npart/charge;
+            Real x = distx(mt);
+            Real y = disty(mt);
+            Real z = distz(mt);
+#elif ( AMREX_SPACEDIM == 2 )
+            weight = q_tot/npart/charge/y_rms;
+            Real x = distx(mt);
+            Real y = 0.;
+            Real z = distz(mt);
+#endif
+        if (plasma_injector->insideBounds(x, y, z)) {
+	    plasma_injector->getMomentum(u, x, y, z);
+            if (WarpX::gamma_boost > 1.) {
+                MapParticletoBoostedFrame(x, y, z, u);
+            }
+            attribs[PIdx::ux] = u[0];
+            attribs[PIdx::uy] = u[1];
+            attribs[PIdx::uz] = u[2];
+            attribs[PIdx::w ] = weight;
+
+            AddOneParticle(0, 0, 0, x, y, z, attribs);
+            }
+        }
+    }
+    Redistribute();
+}
+
+void
+PhysicalParticleContainer::AddParticles (int lev)
+{
+    BL_PROFILE("PhysicalParticleContainer::AddParticles()");
+
+    if (plasma_injector->add_single_particle) {
+        AddNParticles(lev, 1,
+                      &(plasma_injector->single_particle_pos[0]),
+                      &(plasma_injector->single_particle_pos[1]),
+                      &(plasma_injector->single_particle_pos[2]),
+                      &(plasma_injector->single_particle_vel[0]),
+                      &(plasma_injector->single_particle_vel[1]),
+                      &(plasma_injector->single_particle_vel[2]),
+                      1, &(plasma_injector->single_particle_weight), 0);
+        return;
+    }
+
+    if (plasma_injector->gaussian_beam) {
+        AddGaussianBeam(plasma_injector->x_m,
+                        plasma_injector->y_m,
+                        plasma_injector->z_m,
+                        plasma_injector->x_rms,
+                        plasma_injector->y_rms,
+                        plasma_injector->z_rms,
+                        plasma_injector->q_tot,
+                        plasma_injector->npart);
+
+
+        return;
+    }
+
+    if ( plasma_injector->doInjection() ) {
+        AddPlasma( lev );
+    }
+}
+
+/**
+ * Create new macroparticles for this species, with a fixed
+ * number of particles per cell (in the cells of `part_realbox`).
+ * The new particles are only created inside the intersection of `part_realbox`
+ * with the local grid for the current proc.
+ * @param lev the index of the refinement level
+ * @param part_realbox the box in which new particles should be created
+ * (this box should correspond to an integer number of cells in each direction,
+ * but its boundaries need not be aligned with the actual cells of the simulation)
+ */
+void
+PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
+{
+#ifdef AMREX_USE_GPU
+  AddPlasmaGPU(lev, part_realbox);
+#else
+  AddPlasmaCPU(lev, part_realbox);
+#endif
+}
+
+void
+PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox)
+{
+    BL_PROFILE("PhysicalParticleContainer::AddPlasmaCPU");
+
+    // If no part_realbox is provided, initialize particles in the whole domain
+    const Geometry& geom = Geom(lev);
+    if (!part_realbox.ok()) part_realbox = geom.ProbDomain();
+
+    int num_ppc = plasma_injector->num_particles_per_cell;
+
+    const Real* dx = geom.CellSize();
+
+    Real scale_fac;
+#if AMREX_SPACEDIM==3
+    scale_fac = dx[0]*dx[1]*dx[2]/num_ppc;
+#elif AMREX_SPACEDIM==2
+    scale_fac = dx[0]*dx[1]/num_ppc;
+#endif
+
+#ifdef _OPENMP
+    // First touch all tiles in the map in serial
+    for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) {
+        const int grid_id = mfi.index();
+        const int tile_id = mfi.LocalTileIndex();
+        GetParticles(lev)[std::make_pair(grid_id, tile_id)];
+    }
+#endif
+
+    MultiFab* cost = WarpX::getCosts(lev);
+
+    if ( (not m_refined_injection_mask) and WarpX::do_moving_window)
+    {
+        Box mask_box = geom.Domain();
+        mask_box.setSmall(WarpX::moving_window_dir, 0);
+        mask_box.setBig(WarpX::moving_window_dir, 0);
+        m_refined_injection_mask.reset( new IArrayBox(mask_box));
+        m_refined_injection_mask->setVal(-1);
+    }
+
+    MFItInfo info;
+    if (do_tiling) {
+        info.EnableTiling(tile_size);
+    }
+    info.SetDynamic(true);
+
+#ifdef _OPENMP
+#pragma omp parallel if (not WarpX::serialize_ics)
+#endif
+    {
+        std::array<Real,PIdx::nattribs> attribs;
+        attribs.fill(0.0);
+
+        // Loop through the tiles
+        for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) {
+
+            Real wt = amrex::second();
+
+            const Box& tile_box = mfi.tilebox();
+            const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev);
+
+            // Find the cells of part_box that overlap with tile_realbox
+            // If there is no overlap, just go to the next tile in the loop
+            RealBox overlap_realbox;
+            Box overlap_box;
+            Real ncells_adjust;
+            bool no_overlap = 0;
+
+            for (int dir=0; dir<AMREX_SPACEDIM; dir++) {
+                if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) {
+                    ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] );
+                    overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]);
+                } else {
+                    no_overlap = 1; break;
+                }
+                if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) {
+                    ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] );
+                    overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]);
+                } else {
+                    no_overlap = 1; break;
+                }
+                // Count the number of cells in this direction in overlap_realbox
+                overlap_box.setSmall( dir, 0 );
+                overlap_box.setBig( dir,
+				    int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
+            }
+            if (no_overlap == 1) {
+                continue; // Go to the next tile
+            }
+
+            const int grid_id = mfi.index();
+            const int tile_id = mfi.LocalTileIndex();
+
+            // Loop through the cells of overlap_box and inject
+            // the corresponding particles
+            const auto& overlap_corner = overlap_realbox.lo();
+            for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv))
+            {
+                int fac;
+                if (injected) {
+#if ( AMREX_SPACEDIM == 3 )
+                    Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
+                    Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
+                    Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
+#elif ( AMREX_SPACEDIM == 2 )
+                    Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
+                    Real y = 0;
+                    Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
+#endif
+                    fac = GetRefineFac(x, y, z);
+                } else {
+                    fac = 1.0;
+                }
+
+                int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
+                for (int i_part=0; i_part<ref_num_ppc;i_part++) {
+                    std::array<Real, 3> r;
+                    plasma_injector->getPositionUnitBox(r, i_part, fac);
+#if ( AMREX_SPACEDIM == 3 )
+                    Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
+                    Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
+                    Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
+#elif ( AMREX_SPACEDIM == 2 )
+                    Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
+                    Real y = 0;
+                    Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
+#endif
+                    // If the new particle is not inside the tile box,
+                    // go to the next generated particle.
+#if ( AMREX_SPACEDIM == 3 )
+                    if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
+#elif ( AMREX_SPACEDIM == 2 )
+                    if(!tile_realbox.contains( RealVect{x, z} )) continue;
+#endif
+
+                    Real dens;
+                    std::array<Real, 3> u;
+                    if (WarpX::gamma_boost == 1.){
+                      // Lab-frame simulation
+                      // If the particle is not within the species's
+                      // xmin, xmax, ymin, ymax, zmin, zmax, go to
+                      // the next generated particle.
+                      if (!plasma_injector->insideBounds(x, y, z)) continue;
+                      plasma_injector->getMomentum(u, x, y, z);
+                      dens = plasma_injector->getDensity(x, y, z);
+                    } else {
+                      // Boosted-frame simulation
+                      Real c = PhysConst::c;
+                      Real gamma_boost = WarpX::gamma_boost;
+                      Real beta_boost = WarpX::beta_boost;
+                      // Since the user provides the density distribution
+                      // at t_lab=0 and in the lab-frame coordinates,
+                      // we need to find the lab-frame position of this
+                      // particle at t_lab=0, from its boosted-frame coordinates
+                      // Assuming ballistic motion, this is given by:
+                      // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
+                      // where betaz_lab is the speed of the particle in the lab frame
+                      //
+                      // In order for this equation to be solvable, betaz_lab
+                      // is explicitly assumed to have no dependency on z0_lab
+                      plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
+                      // At this point u is the lab-frame momentum
+                      // => Apply the above formula for z0_lab
+                      Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
+                      Real betaz_lab = u[2]/gamma_lab/c;
+                      Real t = WarpX::GetInstance().gett_new(lev);
+                      Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
+                      // If the particle is not within the lab-frame zmin, zmax, etc.
+                      // go to the next generated particle.
+                      if (!plasma_injector->insideBounds(x, y, z0_lab)) continue;
+                      // call `getDensity` with lab-frame parameters
+                      dens = plasma_injector->getDensity(x, y, z0_lab);
+                      // At this point u and dens are the lab-frame quantities
+                      // => Perform Lorentz transform
+                      dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
+                      u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
+                    }
+                    attribs[PIdx::w ] = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac));
+                    attribs[PIdx::ux] = u[0];
+                    attribs[PIdx::uy] = u[1];
+                    attribs[PIdx::uz] = u[2];
+
+#ifdef WARPX_STORE_OLD_PARTICLE_ATTRIBS
+                    attribs[PIdx::xold] = x;
+                    attribs[PIdx::yold] = y;
+                    attribs[PIdx::zold] = z;
+
+                    attribs[PIdx::uxold] = u[0];
+                    attribs[PIdx::uyold] = u[1];
+                    attribs[PIdx::uzold] = u[2];
+#endif
+
+		    AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs);
+                }
+            }
+
+            if (cost) {
+	        wt = (amrex::second() - wt) / tile_box.d_numPts();
+                FArrayBox* costfab = cost->fabPtr(mfi);
+                AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tile_box, work_box,
+                {
+                    costfab->plus(wt, work_box);
+                });
+            }
+        }
+    }
+}
+
+#ifdef AMREX_USE_GPU
+void
+PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox)
+{
+    BL_PROFILE("PhysicalParticleContainer::AddPlasmaGPU");
+
+    // If no part_realbox is provided, initialize particles in the whole domain
+    const Geometry& geom = Geom(lev);
+    if (!part_realbox.ok()) part_realbox = geom.ProbDomain();
+
+    int num_ppc = plasma_injector->num_particles_per_cell;
+
+    const Real* dx = geom.CellSize();
+
+    Real scale_fac;
+#if AMREX_SPACEDIM==3
+    scale_fac = dx[0]*dx[1]*dx[2]/num_ppc;
+#elif AMREX_SPACEDIM==2
+    scale_fac = dx[0]*dx[1]/num_ppc;
+#endif
+
+#ifdef _OPENMP
+    // First touch all tiles in the map in serial
+    for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) {
+        const int grid_id = mfi.index();
+        const int tile_id = mfi.LocalTileIndex();
+        GetParticles(lev)[std::make_pair(grid_id, tile_id)];
+    }
+#endif
+
+    MultiFab* cost = WarpX::getCosts(lev);
+
+    if ( (not m_refined_injection_mask) and WarpX::do_moving_window)
+    {
+        Box mask_box = geom.Domain();
+        mask_box.setSmall(WarpX::moving_window_dir, 0);
+        mask_box.setBig(WarpX::moving_window_dir, 0);
+        m_refined_injection_mask.reset( new IArrayBox(mask_box));
+        m_refined_injection_mask->setVal(-1);
+    }
+
+    MFItInfo info;
+    if (do_tiling) {
+        info.EnableTiling(tile_size);
+    }
+    info.SetDynamic(true);
+
+#ifdef _OPENMP
+#pragma omp parallel if (not WarpX::serialize_ics)
+#endif
+    {
+        std::array<Real,PIdx::nattribs> attribs;
+        attribs.fill(0.0);
+
+        // Loop through the tiles
+        for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) {
+
+            Real wt = amrex::second();
+
+            const Box& tile_box = mfi.tilebox();
+            const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev);
+
+            // Find the cells of part_box that overlap with tile_realbox
+            // If there is no overlap, just go to the next tile in the loop
+            RealBox overlap_realbox;
+            Box overlap_box;
+            Real ncells_adjust;
+            bool no_overlap = 0;
+
+            for (int dir=0; dir<AMREX_SPACEDIM; dir++) {
+                if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) {
+                    ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] );
+                    overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]);
+                } else {
+                    no_overlap = 1; break;
+                }
+                if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) {
+                    ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] );
+                    overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]);
+                } else {
+                    no_overlap = 1; break;
+                }
+                // Count the number of cells in this direction in overlap_realbox
+                overlap_box.setSmall( dir, 0 );
+                overlap_box.setBig( dir,
+				    int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
+            }
+            if (no_overlap == 1) {
+                continue; // Go to the next tile
+            }
+
+            const int grid_id = mfi.index();
+            const int tile_id = mfi.LocalTileIndex();
+
+	    Cuda::HostVector<ParticleType> host_particles;
+	    std::array<Cuda::HostVector<Real>, PIdx::nattribs> host_attribs;
+	    
+            // Loop through the cells of overlap_box and inject
+            // the corresponding particles
+            const auto& overlap_corner = overlap_realbox.lo();
+            for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv))
+            {
+                int fac;
+                if (injected) {
+#if ( AMREX_SPACEDIM == 3 )
+                    Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
+                    Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
+                    Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
+#elif ( AMREX_SPACEDIM == 2 )
+                    Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
+                    Real y = 0;
+                    Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
+#endif
+                    fac = GetRefineFac(x, y, z);
+                } else {
+                    fac = 1.0;
+                }
+
+                int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
+                for (int i_part=0; i_part<ref_num_ppc;i_part++) {
+                    std::array<Real, 3> r;
+                    plasma_injector->getPositionUnitBox(r, i_part, fac);
+#if ( AMREX_SPACEDIM == 3 )
+                    Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
+                    Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
+                    Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
+#elif ( AMREX_SPACEDIM == 2 )
+                    Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
+                    Real y = 0;
+                    Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
+#endif
+                    // If the new particle is not inside the tile box,
+                    // go to the next generated particle.
+#if ( AMREX_SPACEDIM == 3 )
+                    if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
+#elif ( AMREX_SPACEDIM == 2 )
+                    if(!tile_realbox.contains( RealVect{x, z} )) continue;
+#endif
+
+                    Real dens;
+                    std::array<Real, 3> u;
+                    if (WarpX::gamma_boost == 1.){
+                      // Lab-frame simulation
+                      // If the particle is not within the species's
+                      // xmin, xmax, ymin, ymax, zmin, zmax, go to
+                      // the next generated particle.
+                      if (!plasma_injector->insideBounds(x, y, z)) continue;
+                      plasma_injector->getMomentum(u, x, y, z);
+                      dens = plasma_injector->getDensity(x, y, z);
+                    } else {
+                      // Boosted-frame simulation
+                      Real c = PhysConst::c;
+                      Real gamma_boost = WarpX::gamma_boost;
+                      Real beta_boost = WarpX::beta_boost;
+                      // Since the user provides the density distribution
+                      // at t_lab=0 and in the lab-frame coordinates,
+                      // we need to find the lab-frame position of this
+                      // particle at t_lab=0, from its boosted-frame coordinates
+                      // Assuming ballistic motion, this is given by:
+                      // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
+                      // where betaz_lab is the speed of the particle in the lab frame
+                      //
+                      // In order for this equation to be solvable, betaz_lab
+                      // is explicitly assumed to have no dependency on z0_lab
+                      plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
+                      // At this point u is the lab-frame momentum
+                      // => Apply the above formula for z0_lab
+                      Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
+                      Real betaz_lab = u[2]/gamma_lab/c;
+                      Real t = WarpX::GetInstance().gett_new(lev);
+                      Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
+                      // If the particle is not within the lab-frame zmin, zmax, etc.
+                      // go to the next generated particle.
+                      if (!plasma_injector->insideBounds(x, y, z0_lab)) continue;
+                      // call `getDensity` with lab-frame parameters
+                      dens = plasma_injector->getDensity(x, y, z0_lab);
+                      // At this point u and dens are the lab-frame quantities
+                      // => Perform Lorentz transform
+                      dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
+                      u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
+                    }
+                    attribs[PIdx::w ] = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac));
+                    attribs[PIdx::ux] = u[0];
+                    attribs[PIdx::uy] = u[1];
+                    attribs[PIdx::uz] = u[2];
+
+#ifdef WARPX_STORE_OLD_PARTICLE_ATTRIBS
+                    attribs[PIdx::xold] = x;
+                    attribs[PIdx::yold] = y;
+                    attribs[PIdx::zold] = z;
+
+                    attribs[PIdx::uxold] = u[0];
+                    attribs[PIdx::uyold] = u[1];
+                    attribs[PIdx::uzold] = u[2];
+#endif
+
+		    ParticleType p;
+		    p.id()  = ParticleType::NextID();
+		    p.cpu() = ParallelDescriptor::MyProc();
+#if (AMREX_SPACEDIM == 3)
+		    p.pos(0) = x;
+		    p.pos(1) = y;
+		    p.pos(2) = z;
+#elif (AMREX_SPACEDIM == 2)
+		    p.pos(0) = x;
+		    p.pos(1) = z;
+#endif
+
+		    host_particles.push_back(p);
+		    for (int kk = 0; kk < PIdx::nattribs; ++kk)
+		      host_attribs[kk].push_back(attribs[kk]);
+                }
+            }
+
+	    auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)];
+            auto old_size = particle_tile.GetArrayOfStructs().size();
+            auto new_size = old_size + host_particles.size();
+	    particle_tile.resize(new_size);
+
+            Cuda::thrust_copy(host_particles.begin(),
+                              host_particles.end(),
+                              particle_tile.GetArrayOfStructs().begin() + old_size);
+
+	    for (int kk = 0; kk < PIdx::nattribs; ++kk) {
+                Cuda::thrust_copy(host_attribs[kk].begin(),
+                                  host_attribs[kk].end(),
+                                  particle_tile.GetStructOfArrays().GetRealData(kk).begin() + old_size);
+	    }
+	    			 
+            if (cost) {
+	        wt = (amrex::second() - wt) / tile_box.d_numPts();
+                FArrayBox* costfab = cost->fabPtr(mfi);
+                AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tile_box, work_box,
+                {
+                    costfab->plus(wt, work_box);
+                });
+            }
+        }		
+    }
+}
+#endif
+
+#ifdef WARPX_DO_ELECTROSTATIC
+void
+PhysicalParticleContainer::
+FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E,
+               const amrex::Vector<std::unique_ptr<amrex::FabArray<amrex::BaseFab<int> > > >& masks)
+{
+
+    const int num_levels = E.size();
+    const int ng = E[0][0]->nGrow();
+
+    if (num_levels == 1) {
+        const int lev = 0;
+        const auto& gm = m_gdb->Geom(lev);
+        const auto& ba = m_gdb->ParticleBoxArray(lev);
+
+        BoxArray nba = ba;
+        nba.surroundingNodes();
+
+        const Real* dx  = gm.CellSize();
+        const Real* plo = gm.ProbLo();
+
+        BL_ASSERT(OnSameGrids(lev, *E[lev][0]));
+
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) {
+            const Box& box = nba[pti];
+
+            const auto& particles = pti.GetArrayOfStructs();
+            int nstride = particles.dataShape().first;
+            const long np  = pti.numParticles();
+
+            auto& attribs = pti.GetAttribs();
+            auto& Exp = attribs[PIdx::Ex];
+            auto& Eyp = attribs[PIdx::Ey];
+#if AMREX_SPACEDIM == 3
+            auto& Ezp = attribs[PIdx::Ez];
+#endif
+            Exp.assign(np,0.0);
+            Eyp.assign(np,0.0);
+#if AMREX_SPACEDIM == 3
+            Ezp.assign(np,0.0);
+#endif
+
+            const FArrayBox& exfab = (*E[lev][0])[pti];
+            const FArrayBox& eyfab = (*E[lev][1])[pti];
+#if AMREX_SPACEDIM == 3
+            const FArrayBox& ezfab = (*E[lev][2])[pti];
+#endif
+
+            WRPX_INTERPOLATE_CIC(particles.dataPtr(), nstride, np,
+                                 Exp.dataPtr(), Eyp.dataPtr(),
+#if AMREX_SPACEDIM == 3
+                                 Ezp.dataPtr(),
+#endif
+                                 exfab.dataPtr(), eyfab.dataPtr(),
+#if AMREX_SPACEDIM == 3
+                                 ezfab.dataPtr(),
+#endif
+                                 box.loVect(), box.hiVect(), plo, dx, &ng);
+        }
+
+        return;
+    }
+
+    const BoxArray& fine_BA = E[1][0]->boxArray();
+    const DistributionMapping& fine_dm = E[1][0]->DistributionMap();
+    BoxArray coarsened_fine_BA = fine_BA;
+    coarsened_fine_BA.coarsen(IntVect(AMREX_D_DECL(2,2,2)));
+
+    MultiFab coarse_Ex(coarsened_fine_BA, fine_dm, 1, 1);
+    MultiFab coarse_Ey(coarsened_fine_BA, fine_dm, 1, 1);
+#if AMREX_SPACEDIM == 3
+    MultiFab coarse_Ez(coarsened_fine_BA, fine_dm, 1, 1);
+#endif
+
+    coarse_Ex.copy(*E[0][0], 0, 0, 1, 1, 1);
+    coarse_Ey.copy(*E[0][1], 0, 0, 1, 1, 1);
+#if AMREX_SPACEDIM == 3
+    coarse_Ez.copy(*E[0][2], 0, 0, 1, 1, 1);
+#endif
+
+    for (int lev = 0; lev < num_levels; ++lev) {
+        const auto& gm = m_gdb->Geom(lev);
+        const auto& ba = m_gdb->ParticleBoxArray(lev);
+
+        BoxArray nba = ba;
+        nba.surroundingNodes();
+
+        const Real* dx  = gm.CellSize();
+        const Real* plo = gm.ProbLo();
+
+        BL_ASSERT(OnSameGrids(lev, *E[lev][0]));
+
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) {
+            const Box& box = nba[pti];
+
+            const auto& particles = pti.GetArrayOfStructs();
+            int nstride = particles.dataShape().first;
+            const long np  = pti.numParticles();
+
+            auto& attribs = pti.GetAttribs();
+            auto& Exp = attribs[PIdx::Ex];
+            auto& Eyp = attribs[PIdx::Ey];
+#if AMREX_SPACEDIM == 3
+            auto& Ezp = attribs[PIdx::Ez];
+#endif
+            Exp.assign(np,0.0);
+            Eyp.assign(np,0.0);
+#if AMREX_SPACEDIM == 3
+            Ezp.assign(np,0.0);
+#endif
+
+            const FArrayBox& exfab = (*E[lev][0])[pti];
+            const FArrayBox& eyfab = (*E[lev][1])[pti];
+#if AMREX_SPACEDIM == 3
+            const FArrayBox& ezfab = (*E[lev][2])[pti];
+#endif
+
+            if (lev == 0) {
+                WRPX_INTERPOLATE_CIC(particles.dataPtr(), nstride, np,
+                                     Exp.dataPtr(), Eyp.dataPtr(),
+#if AMREX_SPACEDIM == 3
+                Ezp.dataPtr(),
+#endif
+                                exfab.dataPtr(), eyfab.dataPtr(),
+#if AMREX_SPACEDIM == 3
+                                ezfab.dataPtr(),
+#endif
+                                box.loVect(), box.hiVect(), plo, dx, &ng);
+            } else {
+
+                const FArrayBox& exfab_coarse = coarse_Ex[pti];
+                const FArrayBox& eyfab_coarse = coarse_Ey[pti];
+#if AMREX_SPACEDIM == 3
+                const FArrayBox& ezfab_coarse = coarse_Ez[pti];
+#endif
+                const Box& coarse_box = coarsened_fine_BA[pti];
+                const Real* coarse_dx = Geom(0).CellSize();
+
+                WRPX_INTERPOLATE_CIC_TWO_LEVELS(particles.dataPtr(), nstride, np,
+                                                Exp.dataPtr(), Eyp.dataPtr(),
+#if AMREX_SPACEDIM == 3
+                                                Ezp.dataPtr(),
+#endif
+                                                exfab.dataPtr(), eyfab.dataPtr(),
+#if AMREX_SPACEDIM == 3
+                                                ezfab.dataPtr(),
+#endif
+                                                box.loVect(), box.hiVect(), dx,
+                                                exfab_coarse.dataPtr(), eyfab_coarse.dataPtr(),
+#if AMREX_SPACEDIM == 3
+                                                ezfab_coarse.dataPtr(),
+#endif
+                                                (*masks[1])[pti].dataPtr(),
+                                                coarse_box.loVect(), coarse_box.hiVect(), coarse_dx,
+                                                plo, &ng, &lev);
+            }
+        }
+    }
+}
+
+void
+PhysicalParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<MultiFab>, 3> >& E,
+                                           Vector<std::unique_ptr<MultiFab> >& rho,
+                                     Real t, Real dt)
+{
+    BL_PROFILE("PPC::EvolveES()");
+
+    int num_levels = rho.size();
+    for (int lev = 0; lev < num_levels; ++lev) {
+        BL_ASSERT(OnSameGrids(lev, *rho[lev]));
+        const auto& gm = m_gdb->Geom(lev);
+        const RealBox& prob_domain = gm.ProbDomain();
+	for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) {
+            // Particle structs
+            auto& particles = pti.GetArrayOfStructs();
+            int nstride = particles.dataShape().first;
+            const long np  = pti.numParticles();
+
+            // Particle attributes
+            auto& attribs = pti.GetAttribs();
+            auto& uxp = attribs[PIdx::ux];
+            auto& uyp = attribs[PIdx::uy];
+
+#if AMREX_SPACEDIM == 3
+            auto& uzp = attribs[PIdx::uz];
+#endif
+
+            auto& Exp = attribs[PIdx::Ex];
+            auto& Eyp = attribs[PIdx::Ey];
+
+#if AMREX_SPACEDIM == 3
+            auto& Ezp = attribs[PIdx::Ez];
+#endif
+            //
+            // Particle Push
+            //
+            WRPX_PUSH_LEAPFROG(particles.dataPtr(), nstride, np,
+                               uxp.dataPtr(), uyp.dataPtr(),
+#if AMREX_SPACEDIM == 3
+                               uzp.dataPtr(),
+#endif
+                               Exp.dataPtr(), Eyp.dataPtr(),
+#if AMREX_SPACEDIM == 3
+                               Ezp.dataPtr(),
+#endif
+                               &this->charge, &this->mass, &dt,
+                               prob_domain.lo(), prob_domain.hi());
+        }
+    }
+}
+#endif // WARPX_DO_ELECTROSTATIC
+
+void
+PhysicalParticleContainer::FieldGather (int lev,
+                                        const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
+                                        const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz)
+{
+    const std::array<Real,3>& dx = WarpX::CellSize(lev);
+
+    // WarpX assumes the same number of guard cells for Ex, Ey, Ez, Bx, By, Bz
+    long ng = Ex.nGrow();
+
+    BL_ASSERT(OnSameGrids(lev,Ex));
+
+    MultiFab* cost = WarpX::getCosts(lev);
+
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+    {
+        Cuda::DeviceVector<Real> xp, yp, zp;
+
+	for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+	{
+            Real wt = amrex::second();
+
+	    const Box& box = pti.validbox();
+
+            auto& attribs = pti.GetAttribs();
+
+            auto& Exp = attribs[PIdx::Ex];
+            auto& Eyp = attribs[PIdx::Ey];
+            auto& Ezp = attribs[PIdx::Ez];
+            auto& Bxp = attribs[PIdx::Bx];
+            auto& Byp = attribs[PIdx::By];
+            auto& Bzp = attribs[PIdx::Bz];
+
+            const long np = pti.numParticles();
+
+	    // Data on the grid
+	    const FArrayBox& exfab = Ex[pti];
+	    const FArrayBox& eyfab = Ey[pti];
+	    const FArrayBox& ezfab = Ez[pti];
+	    const FArrayBox& bxfab = Bx[pti];
+	    const FArrayBox& byfab = By[pti];
+	    const FArrayBox& bzfab = Bz[pti];
+
+	    Exp.assign(np,0.0);
+	    Eyp.assign(np,0.0);
+	    Ezp.assign(np,0.0);
+	    Bxp.assign(np,0.0);
+	    Byp.assign(np,0.0);
+	    Bzp.assign(np,0.0);
+
+	    //
+	    // copy data from particle container to temp arrays
+	    //
+            pti.GetPosition(xp, yp, zp);
+
+            const std::array<Real,3>& xyzmin = WarpX::LowerCorner(box, lev);
+            const int* ixyzmin = box.loVect();
+
+	    //
+	    // Field Gather
+	    //
+	    const int ll4symtry          = false;
+            long lvect_fieldgathe = 64;
+	    warpx_geteb_energy_conserving(
+	       &np,
+               xp.dataPtr(),
+               yp.dataPtr(),
+               zp.dataPtr(),
+	       Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
+	       Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
+               ixyzmin,
+               &xyzmin[0], &xyzmin[1], &xyzmin[2],
+               &dx[0], &dx[1], &dx[2],
+	       &WarpX::nox, &WarpX::noy, &WarpX::noz,
+               BL_TO_FORTRAN_ANYD(exfab),
+	       BL_TO_FORTRAN_ANYD(eyfab),
+	       BL_TO_FORTRAN_ANYD(ezfab),
+               BL_TO_FORTRAN_ANYD(bxfab),
+	       BL_TO_FORTRAN_ANYD(byfab),
+	       BL_TO_FORTRAN_ANYD(bzfab),
+	       &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
+	       &lvect_fieldgathe, &WarpX::field_gathering_algo);
+
+            if (cost) {
+                const Box& tbx = pti.tilebox();
+                wt = (amrex::second() - wt) / tbx.d_numPts();
+                FArrayBox* costfab = cost->fabPtr(pti);
+                AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box,
+                {
+                    costfab->plus(wt, work_box);
+                });
+            }
+        }
+    }
+}
+
+void
+PhysicalParticleContainer::Evolve (int lev,
+				   const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
+				   const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz,
+				   MultiFab& jx, MultiFab& jy, MultiFab& jz,
+                                   MultiFab* cjx, MultiFab* cjy, MultiFab* cjz,
+                                   MultiFab* rho, MultiFab* crho,
+                                   const MultiFab* cEx, const MultiFab* cEy, const MultiFab* cEz,
+                                   const MultiFab* cBx, const MultiFab* cBy, const MultiFab* cBz,
+                                   Real t, Real dt)
+{
+    BL_PROFILE("PPC::Evolve()");
+    BL_PROFILE_VAR_NS("PPC::Evolve::Copy", blp_copy);
+    BL_PROFILE_VAR_NS("PICSAR::FieldGather", blp_pxr_fg);
+    BL_PROFILE_VAR_NS("PICSAR::ParticlePush", blp_pxr_pp);
+    BL_PROFILE_VAR_NS("PPC::Evolve::partition", blp_partition);
+    
+    const std::array<Real,3>& dx = WarpX::CellSize(lev);
+    const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0));
+
+    const auto& mypc = WarpX::GetInstance().GetPartContainer();
+    const int nstencilz_fdtd_nci_corr = mypc.nstencilz_fdtd_nci_corr;
+
+    BL_ASSERT(OnSameGrids(lev,jx));
+
+    MultiFab* cost = WarpX::getCosts(lev);
+
+    const iMultiFab* current_masks = WarpX::CurrentBufferMasks(lev);
+    const iMultiFab* gather_masks = WarpX::GatherBufferMasks(lev);
+
+    bool has_buffer = cEx || cjx;
+
+#ifdef _OPENMP
+#pragma omp parallel 
+#endif
+    {
+#ifdef _OPENMP
+        int thread_num = omp_get_thread_num();
+#else
+        int thread_num = 0;
+#endif
+	if (local_rho[thread_num] == nullptr) local_rho[thread_num].reset( new amrex::FArrayBox());
+	if (local_jx[thread_num]  == nullptr) local_jx[thread_num].reset(  new amrex::FArrayBox());
+	if (local_jy[thread_num]  == nullptr) local_jy[thread_num].reset(  new amrex::FArrayBox());
+	if (local_jz[thread_num]  == nullptr) local_jz[thread_num].reset(  new amrex::FArrayBox());
+
+        FArrayBox filtered_Ex, filtered_Ey, filtered_Ez;
+        FArrayBox filtered_Bx, filtered_By, filtered_Bz;
+        std::vector<bool> inexflag;
+        Vector<long> pid;
+        RealVector tmp;
+        ParticleVector particle_tmp;
+
+	for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+	{
+            Real wt = amrex::second();
+
+	    const Box& box = pti.validbox();
+
+            auto& attribs = pti.GetAttribs();
+
+            auto&  wp = attribs[PIdx::w];
+            auto& uxp = attribs[PIdx::ux];
+            auto& uyp = attribs[PIdx::uy];
+            auto& uzp = attribs[PIdx::uz];
+            auto& Exp = attribs[PIdx::Ex];
+            auto& Eyp = attribs[PIdx::Ey];
+            auto& Ezp = attribs[PIdx::Ez];
+            auto& Bxp = attribs[PIdx::Bx];
+            auto& Byp = attribs[PIdx::By];
+            auto& Bzp = attribs[PIdx::Bz];
+
+            const long np = pti.numParticles();
+
+	    // Data on the grid
+            FArrayBox const* exfab = &(Ex[pti]);
+            FArrayBox const* eyfab = &(Ey[pti]);
+            FArrayBox const* ezfab = &(Ez[pti]);
+            FArrayBox const* bxfab = &(Bx[pti]);
+            FArrayBox const* byfab = &(By[pti]);
+            FArrayBox const* bzfab = &(Bz[pti]);
+
+            if (WarpX::use_fdtd_nci_corr)
+            {
+#if (AMREX_SPACEDIM == 2)
+                const Box& tbox = amrex::grow(pti.tilebox(),{static_cast<int>(WarpX::nox),
+                            static_cast<int>(WarpX::noz)});
+#else
+                const Box& tbox = amrex::grow(pti.tilebox(),{static_cast<int>(WarpX::nox),
+                            static_cast<int>(WarpX::noy),
+                            static_cast<int>(WarpX::noz)});
+#endif
+
+                // both 2d and 3d
+                filtered_Ex.resize(amrex::convert(tbox,WarpX::Ex_nodal_flag));
+                WRPX_PXR_GODFREY_FILTER(BL_TO_FORTRAN_BOX(filtered_Ex),
+                                        BL_TO_FORTRAN_ANYD(filtered_Ex),
+                                        BL_TO_FORTRAN_ANYD(Ex[pti]),
+                                        mypc.fdtd_nci_stencilz_ex[lev].data(),
+                                        &nstencilz_fdtd_nci_corr);
+                exfab = &filtered_Ex;
+
+                filtered_Ez.resize(amrex::convert(tbox,WarpX::Ez_nodal_flag));
+                WRPX_PXR_GODFREY_FILTER(BL_TO_FORTRAN_BOX(filtered_Ez),
+                                        BL_TO_FORTRAN_ANYD(filtered_Ez),
+                                        BL_TO_FORTRAN_ANYD(Ez[pti]),
+                                        mypc.fdtd_nci_stencilz_by[lev].data(),
+                                        &nstencilz_fdtd_nci_corr);
+                ezfab = &filtered_Ez;
+
+                filtered_By.resize(amrex::convert(tbox,WarpX::By_nodal_flag));
+                WRPX_PXR_GODFREY_FILTER(BL_TO_FORTRAN_BOX(filtered_By),
+                                        BL_TO_FORTRAN_ANYD(filtered_By),
+                                        BL_TO_FORTRAN_ANYD(By[pti]),
+                                        mypc.fdtd_nci_stencilz_by[lev].data(),
+                                        &nstencilz_fdtd_nci_corr);
+                byfab = &filtered_By;
+
+#if (AMREX_SPACEDIM == 3)
+                filtered_Ey.resize(amrex::convert(tbox,WarpX::Ey_nodal_flag));
+                WRPX_PXR_GODFREY_FILTER(BL_TO_FORTRAN_BOX(filtered_Ey),
+                                        BL_TO_FORTRAN_ANYD(filtered_Ey),
+                                        BL_TO_FORTRAN_ANYD(Ey[pti]),
+                                        mypc.fdtd_nci_stencilz_ex[lev].data(),
+                                        &nstencilz_fdtd_nci_corr);
+                eyfab = &filtered_Ey;
+
+                filtered_Bx.resize(amrex::convert(tbox,WarpX::Bx_nodal_flag));
+                WRPX_PXR_GODFREY_FILTER(BL_TO_FORTRAN_BOX(filtered_Bx),
+                                        BL_TO_FORTRAN_ANYD(filtered_Bx),
+                                        BL_TO_FORTRAN_ANYD(Bx[pti]),
+                                        mypc.fdtd_nci_stencilz_by[lev].data(),
+                                        &nstencilz_fdtd_nci_corr);
+                bxfab = &filtered_Bx;
+
+                filtered_Bz.resize(amrex::convert(tbox,WarpX::Bz_nodal_flag));
+                WRPX_PXR_GODFREY_FILTER(BL_TO_FORTRAN_BOX(filtered_Bz),
+                                        BL_TO_FORTRAN_ANYD(filtered_Bz),
+                                        BL_TO_FORTRAN_ANYD(Bz[pti]),
+                                        mypc.fdtd_nci_stencilz_ex[lev].data(),
+                                        &nstencilz_fdtd_nci_corr);
+                bzfab = &filtered_Bz;
+#endif
+            }
+
+	    Exp.assign(np,0.0);
+	    Eyp.assign(np,0.0);
+	    Ezp.assign(np,0.0);
+	    Bxp.assign(np,WarpX::B_external[0]);
+	    Byp.assign(np,WarpX::B_external[1]);
+	    Bzp.assign(np,WarpX::B_external[2]);
+
+	    m_giv[thread_num].resize(np);
+
+            long nfine_current = np;
+            long nfine_gather = np;
+            if (has_buffer && !do_not_push)
+            {
+                BL_PROFILE_VAR_START(blp_partition);
+                inexflag.resize(np);
+                auto& aos = pti.GetArrayOfStructs();
+                // We need to partition the large buffer first
+                iMultiFab const* bmasks = (WarpX::n_field_gather_buffer >= WarpX::n_current_deposition_buffer) ?
+                    gather_masks : current_masks;
+                int i = 0;
+                const auto& msk = (*bmasks)[pti];
+                for (const auto& p : aos) {
+                    const IntVect& iv = Index(p, lev);
+                    inexflag[i++] = msk(iv);
+                }
+
+                pid.resize(np);
+                std::iota(pid.begin(), pid.end(), 0L);
+
+                auto sep = std::stable_partition(pid.begin(), pid.end(),
+                                                 [&inexflag](long id) { return inexflag[id]; });
+
+                if (WarpX::n_current_deposition_buffer == WarpX::n_field_gather_buffer) {
+                    nfine_current = nfine_gather = std::distance(pid.begin(), sep);
+                } else if (sep != pid.end()) {
+                    int n_buf;
+                    if (bmasks == gather_masks) {
+                        nfine_gather = std::distance(pid.begin(), sep);
+                        bmasks = current_masks;
+                        n_buf = WarpX::n_current_deposition_buffer;
+                    } else {
+                        nfine_current = std::distance(pid.begin(), sep);
+                        bmasks = gather_masks;
+                        n_buf = WarpX::n_field_gather_buffer;
+                    }
+                    if (n_buf > 0)
+                    {
+                        const auto& msk2 = (*bmasks)[pti];
+                        for (auto it = sep; it != pid.end(); ++it) {
+                            const long id = *it;
+                            const IntVect& iv = Index(aos[id], lev);
+                            inexflag[id] = msk2(iv);
+                        }
+
+                        auto sep2 = std::stable_partition(sep, pid.end(),
+                                                          [&inexflag](long id) {return inexflag[id];});
+                        if (bmasks == gather_masks) {
+                            nfine_gather = std::distance(pid.begin(), sep2);
+                        } else {
+                            nfine_current = std::distance(pid.begin(), sep2);
+                        }
+                    }
+                }
+
+                if (deposit_on_main_grid && lev > 0) {
+                    nfine_current = 0;
+                }
+
+                if (nfine_current != np || nfine_gather != np)
+                {
+                    particle_tmp.resize(np);
+                    for (long ip = 0; ip < np; ++ip) {
+                        particle_tmp[ip] = aos[pid[ip]];
+                    }
+                    std::swap(aos(), particle_tmp);
+
+                    tmp.resize(np);
+                    for (long ip = 0; ip < np; ++ip) {
+                        tmp[ip] = wp[pid[ip]];
+                    }
+                    std::swap(wp, tmp);
+
+                    for (long ip = 0; ip < np; ++ip) {
+                        tmp[ip] = uxp[pid[ip]];
+                    }
+                    std::swap(uxp, tmp);
+
+                    for (long ip = 0; ip < np; ++ip) {
+                        tmp[ip] = uyp[pid[ip]];
+                    }
+                    std::swap(uyp, tmp);
+
+                    for (long ip = 0; ip < np; ++ip) {
+                        tmp[ip] = uzp[pid[ip]];
+                    }
+                    std::swap(uzp, tmp);
+                }
+                BL_PROFILE_VAR_STOP(blp_partition);
+            }
+
+            const long np_current = (cjx) ? nfine_current : np;
+            
+	    //
+	    // copy data from particle container to temp arrays
+	    //
+	    BL_PROFILE_VAR_START(blp_copy);
+            pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
+	    BL_PROFILE_VAR_STOP(blp_copy);
+
+            if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev);
+            
+            if (! do_not_push)
+            {
+                //
+                // Field Gather of Aux Data (i.e., the full solution)
+                //
+                const int ll4symtry          = false;
+                long lvect_fieldgathe = 64;
+
+                const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
+                const int* ixyzmin_grid = box.loVect();
+                
+                const long np_gather = (cEx) ? nfine_gather : np;
+
+                BL_PROFILE_VAR_START(blp_pxr_fg);
+
+                warpx_geteb_energy_conserving(
+                    &np_gather,
+                    m_xp[thread_num].dataPtr(),
+                    m_yp[thread_num].dataPtr(),
+                    m_zp[thread_num].dataPtr(),
+                    Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
+                    Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
+                    ixyzmin_grid,
+                    &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2],
+                    &dx[0], &dx[1], &dx[2],
+                    &WarpX::nox, &WarpX::noy, &WarpX::noz,
+                    BL_TO_FORTRAN_ANYD(*exfab),
+                    BL_TO_FORTRAN_ANYD(*eyfab),
+                    BL_TO_FORTRAN_ANYD(*ezfab),
+                    BL_TO_FORTRAN_ANYD(*bxfab),
+                    BL_TO_FORTRAN_ANYD(*byfab),
+                    BL_TO_FORTRAN_ANYD(*bzfab),
+                    &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
+                    &lvect_fieldgathe, &WarpX::field_gathering_algo);
+
+                if (np_gather < np)
+                {
+                    const IntVect& ref_ratio = WarpX::RefRatio(lev-1);
+                    const Box& cbox = amrex::coarsen(box,ref_ratio);
+                    const std::array<Real,3>& cxyzmin_grid = WarpX::LowerCorner(cbox, lev-1);
+                    const int* cixyzmin_grid = cbox.loVect();
+
+                    const FArrayBox* cexfab = &(*cEx)[pti];
+                    const FArrayBox* ceyfab = &(*cEy)[pti];
+                    const FArrayBox* cezfab = &(*cEz)[pti];
+                    const FArrayBox* cbxfab = &(*cBx)[pti];
+                    const FArrayBox* cbyfab = &(*cBy)[pti];
+                    const FArrayBox* cbzfab = &(*cBz)[pti];
+
+                    if (WarpX::use_fdtd_nci_corr)
+                    {
+#if (AMREX_SPACEDIM == 2)
+                        const Box& tbox = amrex::grow(cbox,{static_cast<int>(WarpX::nox),
+                                    static_cast<int>(WarpX::noz)});
+#else
+                        const Box& tbox = amrex::grow(cbox,{static_cast<int>(WarpX::nox),
+                                    static_cast<int>(WarpX::noy),
+                                    static_cast<int>(WarpX::noz)});
+#endif
+
+                        // both 2d and 3d
+                        filtered_Ex.resize(amrex::convert(tbox,WarpX::Ex_nodal_flag));
+                        WRPX_PXR_GODFREY_FILTER(BL_TO_FORTRAN_BOX(filtered_Ex),
+                                                BL_TO_FORTRAN_ANYD(filtered_Ex),
+                                                BL_TO_FORTRAN_ANYD((*cEx)[pti]),
+                                                mypc.fdtd_nci_stencilz_ex[lev-1].data(),
+                                                &nstencilz_fdtd_nci_corr);
+                        cexfab = &filtered_Ex;
+
+                        filtered_Ez.resize(amrex::convert(tbox,WarpX::Ez_nodal_flag));
+                        WRPX_PXR_GODFREY_FILTER(BL_TO_FORTRAN_BOX(filtered_Ez),
+                                                BL_TO_FORTRAN_ANYD(filtered_Ez),
+                                                BL_TO_FORTRAN_ANYD((*cEz)[pti]),
+                                                mypc.fdtd_nci_stencilz_by[lev-1].data(),
+                                                &nstencilz_fdtd_nci_corr);
+                        cezfab = &filtered_Ez;
+                        filtered_By.resize(amrex::convert(tbox,WarpX::By_nodal_flag));
+                        WRPX_PXR_GODFREY_FILTER(BL_TO_FORTRAN_BOX(filtered_By),
+                                                BL_TO_FORTRAN_ANYD(filtered_By),
+                                                BL_TO_FORTRAN_ANYD((*cBy)[pti]),
+                                                mypc.fdtd_nci_stencilz_by[lev-1].data(),
+                                                &nstencilz_fdtd_nci_corr);
+                        cbyfab = &filtered_By;
+
+#if (AMREX_SPACEDIM == 3)
+                        filtered_Ey.resize(amrex::convert(tbox,WarpX::Ey_nodal_flag));
+                        WRPX_PXR_GODFREY_FILTER(BL_TO_FORTRAN_BOX(filtered_Ey),
+                                                BL_TO_FORTRAN_ANYD(filtered_Ey),
+                                                BL_TO_FORTRAN_ANYD((*cEy)[pti]),
+                                                mypc.fdtd_nci_stencilz_ex[lev-1].data(),
+                                                &nstencilz_fdtd_nci_corr);
+                        ceyfab = &filtered_Ey;
+                        
+                        filtered_Bx.resize(amrex::convert(tbox,WarpX::Bx_nodal_flag));
+                        WRPX_PXR_GODFREY_FILTER(BL_TO_FORTRAN_BOX(filtered_Bx),
+                                                BL_TO_FORTRAN_ANYD(filtered_Bx),
+                                                BL_TO_FORTRAN_ANYD((*cBx)[pti]),
+                                                mypc.fdtd_nci_stencilz_by[lev-1].data(),
+                                                &nstencilz_fdtd_nci_corr);
+                        cbxfab = &filtered_Bx;
+                        
+                        filtered_Bz.resize(amrex::convert(tbox,WarpX::Bz_nodal_flag));
+                        WRPX_PXR_GODFREY_FILTER(BL_TO_FORTRAN_BOX(filtered_Bz),
+                                                BL_TO_FORTRAN_ANYD(filtered_Bz),
+                                                BL_TO_FORTRAN_ANYD((*cBz)[pti]),
+                                                mypc.fdtd_nci_stencilz_ex[lev-1].data(),
+                                                &nstencilz_fdtd_nci_corr);
+                        cbzfab = &filtered_Bz;
+#endif
+                    }
+                    
+                    long ncrse = np - nfine_gather;
+                    warpx_geteb_energy_conserving(
+                        &ncrse,
+                        m_xp[thread_num].dataPtr()+nfine_gather,
+                        m_yp[thread_num].dataPtr()+nfine_gather,
+                        m_zp[thread_num].dataPtr()+nfine_gather,
+                        Exp.dataPtr()+nfine_gather, Eyp.dataPtr()+nfine_gather, Ezp.dataPtr()+nfine_gather,
+                        Bxp.dataPtr()+nfine_gather, Byp.dataPtr()+nfine_gather, Bzp.dataPtr()+nfine_gather,
+                        cixyzmin_grid,
+                        &cxyzmin_grid[0], &cxyzmin_grid[1], &cxyzmin_grid[2],
+                        &cdx[0], &cdx[1], &cdx[2],
+                        &WarpX::nox, &WarpX::noy, &WarpX::noz,
+                        BL_TO_FORTRAN_ANYD(*cexfab),
+                        BL_TO_FORTRAN_ANYD(*ceyfab),
+                        BL_TO_FORTRAN_ANYD(*cezfab),
+                        BL_TO_FORTRAN_ANYD(*cbxfab),
+                        BL_TO_FORTRAN_ANYD(*cbyfab),
+                        BL_TO_FORTRAN_ANYD(*cbzfab),
+                        &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
+                        &lvect_fieldgathe, &WarpX::field_gathering_algo);
+                }
+
+                BL_PROFILE_VAR_STOP(blp_pxr_fg);
+
+                //
+                // Particle Push
+                //
+                BL_PROFILE_VAR_START(blp_pxr_pp);
+                PushPX(pti, m_xp[thread_num], m_yp[thread_num], m_zp[thread_num], 
+                       m_giv[thread_num], dt);
+                BL_PROFILE_VAR_STOP(blp_pxr_pp);
+
+                //
+                // Current Deposition
+                //
+                DepositCurrent(pti, wp, uxp, uyp, uzp, jx, jy, jz,
+                               cjx, cjy, cjz, np_current, np, thread_num, lev, dt);
+  
+                //
+                // copy particle data back
+                //
+                BL_PROFILE_VAR_START(blp_copy);
+                pti.SetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
+                BL_PROFILE_VAR_STOP(blp_copy);
+            }
+            
+            if (rho) DepositCharge(pti, wp, rho, crho, 1, np_current, np, thread_num, lev);
+
+            if (cost) {
+                const Box& tbx = pti.tilebox();
+                wt = (amrex::second() - wt) / tbx.d_numPts();
+                FArrayBox* costfab = cost->fabPtr(pti);
+                AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box,
+                {
+                    costfab->plus(wt, work_box);
+                });
+            }
+        }
+    }
+    // Split particles
+    if (do_splitting){ SplitParticles(lev); }
+}
+
+// Loop over all particles in the particle container and
+// split particles tagged with p.id()=DoSplitParticleID
+void
+PhysicalParticleContainer::SplitParticles(int lev)
+{
+    auto& mypc = WarpX::GetInstance().GetPartContainer();
+    auto& pctmp_split = mypc.GetPCtmp();
+    Cuda::DeviceVector<Real> xp, yp, zp;
+    RealVector psplit_x, psplit_y, psplit_z, psplit_w;
+    RealVector psplit_ux, psplit_uy, psplit_uz;
+    long np_split_to_add = 0;
+    long np_split;
+    if(split_type==0)
+    {
+        np_split = pow(2, AMREX_SPACEDIM);
+    } else {
+        np_split = 2*AMREX_SPACEDIM;
+    }
+
+    // Loop over particle interator
+    for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+    {
+        pti.GetPosition(xp, yp, zp);
+        const std::array<Real,3>& dx = WarpX::CellSize(lev);
+	// particle Array Of Structs data
+        auto& particles = pti.GetArrayOfStructs();
+	// particle Struct Of Arrays data
+        auto& attribs = pti.GetAttribs();
+        auto& wp  = attribs[PIdx::w ];
+        auto& uxp = attribs[PIdx::ux];
+        auto& uyp = attribs[PIdx::uy];
+        auto& uzp = attribs[PIdx::uz];
+        const long np = pti.numParticles();
+        for(int i=0; i<np; i++){
+            auto& p = particles[i];
+            if (p.id() == DoSplitParticleID){
+		// If particle is tagged, split it and put the 
+		// split particles in local arrays psplit_x etc.
+                np_split_to_add += np_split;
+#if (AMREX_SPACEDIM==2)
+                if (split_type==0){
+		    // Split particle in two along each axis
+		    // 4 particles in 2d
+                    for (int ishift = -1; ishift < 2; ishift +=2 ){
+                        for (int kshift = -1; kshift < 2; kshift +=2 ){
+                            // Add one particle with offset in x and z
+                            psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
+                            psplit_y.push_back( yp[i] );
+                            psplit_z.push_back( zp[i] + kshift*dx[2]/2 );
+                            psplit_ux.push_back( uxp[i] );
+                            psplit_uy.push_back( uyp[i] );
+                            psplit_uz.push_back( uzp[i] );
+                            psplit_w.push_back( wp[i]/np_split );
+                        }
+                    }
+                } else {
+		    // Split particle in two along each diagonal
+		    // 4 particles in 2d
+                    for (int ishift = -1; ishift < 2; ishift +=2 ){
+                        // Add one particle with offset in x
+                        psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
+                        psplit_y.push_back( yp[i] );
+                        psplit_z.push_back( zp[i] );
+                        psplit_ux.push_back( uxp[i] );
+                        psplit_uy.push_back( uyp[i] );
+                        psplit_uz.push_back( uzp[i] );
+                        psplit_w.push_back( wp[i]/np_split );
+                        // Add one particle with offset in z
+                        psplit_x.push_back( xp[i] );
+                        psplit_y.push_back( yp[i] );
+                        psplit_z.push_back( zp[i] + ishift*dx[2]/2 );
+                        psplit_ux.push_back( uxp[i] );
+                        psplit_uy.push_back( uyp[i] );
+                        psplit_uz.push_back( uzp[i] );
+                        psplit_w.push_back( wp[i]/np_split );
+                    }
+                }
+#elif (AMREX_SPACEDIM==3)
+		if (split_type==0){
+		    // Split particle in two along each axis
+		    // 6 particles in 2d
+		    for (int ishift = -1; ishift < 2; ishift +=2 ){
+			for (int jshift = -1; jshift < 2; jshift +=2 ){
+			    for (int kshift = -1; kshift < 2; kshift +=2 ){
+				// Add one particle with offset in x, y and z
+				psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
+				psplit_y.push_back( yp[i] + jshift*dx[1]/2 );
+				psplit_z.push_back( zp[i] + jshift*dx[2]/2 );
+				psplit_ux.push_back( uxp[i] );
+				psplit_uy.push_back( uyp[i] );
+				psplit_uz.push_back( uzp[i] );
+				psplit_w.push_back( wp[i]/np_split );
+			    }
+			}
+		    }
+		} else {
+		    // Split particle in two along each diagonal
+		    // 8 particles in 3d
+                    for (int ishift = -1; ishift < 2; ishift +=2 ){
+                        // Add one particle with offset in x
+                        psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
+                        psplit_y.push_back( yp[i] );
+                        psplit_z.push_back( zp[i] );
+                        psplit_ux.push_back( uxp[i] );
+                        psplit_uy.push_back( uyp[i] );
+                        psplit_uz.push_back( uzp[i] );
+                        psplit_w.push_back( wp[i]/np_split );
+                        // Add one particle with offset in y
+                        psplit_x.push_back( xp[i] );
+                        psplit_y.push_back( yp[i] + ishift*dx[1]/2 );
+                        psplit_z.push_back( zp[i] );
+                        psplit_ux.push_back( uxp[i] );
+                        psplit_uy.push_back( uyp[i] );
+                        psplit_uz.push_back( uzp[i] );
+                        psplit_w.push_back( wp[i]/np_split );
+                        // Add one particle with offset in z
+                        psplit_x.push_back( xp[i] );
+                        psplit_y.push_back( yp[i] );
+                        psplit_z.push_back( zp[i] + ishift*dx[2]/2 );
+                        psplit_ux.push_back( uxp[i] );
+                        psplit_uy.push_back( uyp[i] );
+                        psplit_uz.push_back( uzp[i] );
+                        psplit_w.push_back( wp[i]/np_split );
+                    }
+		}
+#endif
+		// invalidate the particle
+                p.m_idata.id = -p.m_idata.id;
+            }
+        }
+    }
+	// Add local arrays psplit_x etc. to the temporary
+	// particle container pctmp_split. Split particles
+	// are tagged with p.id()=NoSplitParticleID so that 
+	// they are not re-split when entering a higher level
+	// AddNParticles calls Redistribute, so that particles
+	// in pctmp_split are in the proper grids and tiles
+	pctmp_split.AddNParticles(lev, 
+                                  np_split_to_add,
+                                  psplit_x.dataPtr(),
+                                  psplit_y.dataPtr(),
+                                  psplit_z.dataPtr(),
+                                  psplit_ux.dataPtr(),
+                                  psplit_uy.dataPtr(),
+                                  psplit_uz.dataPtr(),
+                                  1,
+                                  psplit_w.dataPtr(),
+                                  1, NoSplitParticleID);
+	// Copy particles from tmp to current particle container
+    addParticles(pctmp_split,1);
+	// Clear tmp container
+    pctmp_split.clearParticles();
+}
+
+void
+PhysicalParticleContainer::PushPX(WarpXParIter& pti,
+	                          Cuda::DeviceVector<Real>& xp,
+                                  Cuda::DeviceVector<Real>& yp,
+                                  Cuda::DeviceVector<Real>& zp,
+                                  Cuda::DeviceVector<Real>& giv,
+                                  Real dt)
+{
+
+    // This wraps the call to warpx_particle_pusher so that inheritors can modify the call.
+    auto& attribs = pti.GetAttribs();
+    auto& uxp = attribs[PIdx::ux];
+    auto& uyp = attribs[PIdx::uy];
+    auto& uzp = attribs[PIdx::uz];
+    auto& Exp = attribs[PIdx::Ex];
+    auto& Eyp = attribs[PIdx::Ey];
+    auto& Ezp = attribs[PIdx::Ez];
+    auto& Bxp = attribs[PIdx::Bx];
+    auto& Byp = attribs[PIdx::By];
+    auto& Bzp = attribs[PIdx::Bz];
+    const long np  = pti.numParticles();
+
+#ifdef WARPX_STORE_OLD_PARTICLE_ATTRIBS
+    auto& xpold  = attribs[PIdx::xold];
+    auto& ypold  = attribs[PIdx::yold];
+    auto& zpold  = attribs[PIdx::zold];
+    auto& uxpold = attribs[PIdx::uxold];
+    auto& uypold = attribs[PIdx::uyold];
+    auto& uzpold = attribs[PIdx::uzold];
+
+    warpx_copy_attribs(&np, xp.dataPtr(), yp.dataPtr(), zp.dataPtr(),
+                       uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+                       xpold.dataPtr(), ypold.dataPtr(), zpold.dataPtr(),
+                       uxpold.dataPtr(), uypold.dataPtr(), uzpold.dataPtr());
+
+#endif
+
+    warpx_particle_pusher(&np,
+                          xp.dataPtr(),
+                          yp.dataPtr(),
+                          zp.dataPtr(),
+                          uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+                          giv.dataPtr(),
+                          Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
+                          Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
+                          &this->charge, &this->mass, &dt,
+                          &WarpX::particle_pusher_algo);
+
+}
+
+void
+PhysicalParticleContainer::PushP (int lev, Real dt,
+                                  const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
+                                  const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz)
+{
+    BL_PROFILE("PhysicalParticleContainer::PushP");
+
+    if (do_not_push) return;
+
+    const std::array<Real,3>& dx = WarpX::CellSize(lev);
+
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+    {
+#ifdef _OPENMP
+        int thread_num = omp_get_thread_num();
+#else
+        int thread_num = 0;
+#endif      
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+	{
+	    const Box& box = pti.validbox();
+
+            auto& attribs = pti.GetAttribs();
+
+            auto& uxp = attribs[PIdx::ux];
+            auto& uyp = attribs[PIdx::uy];
+            auto& uzp = attribs[PIdx::uz];
+            auto& Exp = attribs[PIdx::Ex];
+            auto& Eyp = attribs[PIdx::Ey];
+            auto& Ezp = attribs[PIdx::Ez];
+            auto& Bxp = attribs[PIdx::Bx];
+            auto& Byp = attribs[PIdx::By];
+            auto& Bzp = attribs[PIdx::Bz];
+
+            const long np = pti.numParticles();
+
+	    // Data on the grid
+	    const FArrayBox& exfab = Ex[pti];
+	    const FArrayBox& eyfab = Ey[pti];
+	    const FArrayBox& ezfab = Ez[pti];
+	    const FArrayBox& bxfab = Bx[pti];
+	    const FArrayBox& byfab = By[pti];
+	    const FArrayBox& bzfab = Bz[pti];
+
+	    Exp.assign(np,0.0);
+	    Eyp.assign(np,0.0);
+	    Ezp.assign(np,0.0);
+	    Bxp.assign(np,WarpX::B_external[0]);
+	    Byp.assign(np,WarpX::B_external[1]);
+	    Bzp.assign(np,WarpX::B_external[2]);
+
+	    m_giv[thread_num].resize(np);
+
+	    //
+	    // copy data from particle container to temp arrays
+	    //
+            pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
+
+            const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
+            const int* ixyzmin_grid = box.loVect();
+
+            const int ll4symtry          = false;
+            long lvect_fieldgathe = 64;
+
+            warpx_geteb_energy_conserving(
+                &np,
+                m_xp[thread_num].dataPtr(),
+                m_yp[thread_num].dataPtr(),
+                m_zp[thread_num].dataPtr(),
+                Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
+                Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
+                ixyzmin_grid,
+                &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2],
+                &dx[0], &dx[1], &dx[2],
+                &WarpX::nox, &WarpX::noy, &WarpX::noz,
+                BL_TO_FORTRAN_ANYD(exfab),
+                BL_TO_FORTRAN_ANYD(eyfab),
+                BL_TO_FORTRAN_ANYD(ezfab),
+                BL_TO_FORTRAN_ANYD(bxfab),
+                BL_TO_FORTRAN_ANYD(byfab),
+                BL_TO_FORTRAN_ANYD(bzfab),
+                &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
+                &lvect_fieldgathe, &WarpX::field_gathering_algo);
+
+            warpx_particle_pusher_momenta(&np,
+                                          m_xp[thread_num].dataPtr(),
+                                          m_yp[thread_num].dataPtr(),
+                                          m_zp[thread_num].dataPtr(),
+                                          uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+                                          m_giv[thread_num].dataPtr(),
+                                          Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
+                                          Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
+                                          &this->charge, &this->mass, &dt,
+                                          &WarpX::particle_pusher_algo);
+        }
+    }
+}
+
+void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real z_old,
+                                                 const Real z_new, const Real t_boost,
+                                                 const Real t_lab, const Real dt,
+                                                 DiagnosticParticles& diagnostic_particles)
+{
+    BL_PROFILE("PhysicalParticleContainer::GetParticleSlice");
+
+#ifdef WARPX_STORE_OLD_PARTICLE_ATTRIBS
+    // Assume that the boost in the positive z direction.
+#if (AMREX_SPACEDIM == 2)
+    AMREX_ALWAYS_ASSERT(direction == 1);
+#else
+    AMREX_ALWAYS_ASSERT(direction == 2);
+#endif
+
+    // Note the the slice should always move in the negative boost direction.
+    AMREX_ALWAYS_ASSERT(z_new < z_old);
+
+    const int nlevs = std::max(0, finestLevel()+1);
+
+    // we figure out a box for coarse-grained rejection. If the RealBox corresponding to a
+    // given tile doesn't intersect with this, there is no need to check any particles.
+    const Real* base_dx = Geom(0).CellSize();
+    const Real z_min = z_new - base_dx[direction];
+    const Real z_max = z_old + base_dx[direction];
+
+    RealBox slice_box = Geom(0).ProbDomain();
+    slice_box.setLo(direction, z_min);
+    slice_box.setHi(direction, z_max);
+
+    diagnostic_particles.resize(finestLevel()+1);
+    
+    for (int lev = 0; lev < nlevs; ++lev) {
+
+        const Real* dx  = Geom(lev).CellSize();
+        const Real* plo = Geom(lev).ProbLo();
+
+        // first we touch each map entry in serial
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+        {
+            auto index = std::make_pair(pti.index(), pti.LocalTileIndex());
+            diagnostic_particles[lev][index];
+        }
+
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+        {
+            RealVector xp_new, yp_new, zp_new;
+
+            for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+            {
+                const Box& box = pti.validbox();
+
+                auto index = std::make_pair(pti.index(), pti.LocalTileIndex());
+
+                const RealBox tile_real_box(box, dx, plo);
+
+                if ( !slice_box.intersects(tile_real_box) ) continue;
+
+                pti.GetPosition(xp_new, yp_new, zp_new);
+
+                auto& attribs = pti.GetAttribs();
+
+                auto& wp = attribs[PIdx::w ];
+
+                auto& uxp_new = attribs[PIdx::ux   ];
+                auto& uyp_new = attribs[PIdx::uy   ];
+                auto& uzp_new = attribs[PIdx::uz   ];
+
+                auto&  xp_old = attribs[PIdx::xold ];
+                auto&  yp_old = attribs[PIdx::yold ];
+                auto&  zp_old = attribs[PIdx::zold ];
+                auto& uxp_old = attribs[PIdx::uxold];
+                auto& uyp_old = attribs[PIdx::uyold];
+                auto& uzp_old = attribs[PIdx::uzold];
+
+                const long np = pti.numParticles();
+
+                Real uzfrm = -WarpX::gamma_boost*WarpX::beta_boost*PhysConst::c;
+                Real inv_c2 = 1.0/PhysConst::c/PhysConst::c;
+
+                for (long i = 0; i < np; ++i) {
+
+                    // if the particle did not cross the plane of z_boost in the last
+                    // timestep, skip it.
+                    if ( not (((zp_new[i] >= z_new) && (zp_old[i] <= z_old)) ||
+                              ((zp_new[i] <= z_new) && (zp_old[i] >= z_old))) ) continue;
+
+                    // Lorentz transform particles to lab frame
+                    Real gamma_new_p = std::sqrt(1.0 + inv_c2*(uxp_new[i]*uxp_new[i] + uyp_new[i]*uyp_new[i] + uzp_new[i]*uzp_new[i]));
+                    Real t_new_p = WarpX::gamma_boost*t_boost - uzfrm*zp_new[i]*inv_c2;
+                    Real z_new_p = WarpX::gamma_boost*(zp_new[i] + WarpX::beta_boost*PhysConst::c*t_boost);
+                    Real uz_new_p = WarpX::gamma_boost*uzp_new[i] - gamma_new_p*uzfrm;
+
+                    Real gamma_old_p = std::sqrt(1.0 + inv_c2*(uxp_old[i]*uxp_old[i] + uyp_old[i]*uyp_old[i] + uzp_old[i]*uzp_old[i]));
+                    Real t_old_p = WarpX::gamma_boost*(t_boost - dt) - uzfrm*zp_old[i]*inv_c2;
+                    Real z_old_p = WarpX::gamma_boost*(zp_old[i] + WarpX::beta_boost*PhysConst::c*(t_boost-dt));
+                    Real uz_old_p = WarpX::gamma_boost*uzp_old[i] - gamma_old_p*uzfrm;
+
+                    // interpolate in time to t_lab
+                    Real weight_old = (t_new_p - t_lab) / (t_new_p - t_old_p);
+                    Real weight_new = (t_lab - t_old_p) / (t_new_p - t_old_p);
+
+                    Real xp = xp_old[i]*weight_old + xp_new[i]*weight_new;
+                    Real yp = yp_old[i]*weight_old + yp_new[i]*weight_new;
+                    Real zp = z_old_p  *weight_old + z_new_p  *weight_new;
+
+                    Real uxp = uxp_old[i]*weight_old + uxp_new[i]*weight_new;
+                    Real uyp = uyp_old[i]*weight_old + uyp_new[i]*weight_new;
+                    Real uzp = uz_old_p  *weight_old + uz_new_p  *weight_new;
+
+                    diagnostic_particles[lev][index].GetRealData(DiagIdx::w).push_back(wp[i]);
+                    
+                    diagnostic_particles[lev][index].GetRealData(DiagIdx::x).push_back(xp);
+                    diagnostic_particles[lev][index].GetRealData(DiagIdx::y).push_back(yp);
+                    diagnostic_particles[lev][index].GetRealData(DiagIdx::z).push_back(zp);
+
+                    diagnostic_particles[lev][index].GetRealData(DiagIdx::ux).push_back(uxp);
+                    diagnostic_particles[lev][index].GetRealData(DiagIdx::uy).push_back(uyp);
+                    diagnostic_particles[lev][index].GetRealData(DiagIdx::uz).push_back(uzp);
+                }
+            }
+        }
+    }
+#else
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE( false ,
+"ERROR: WarpX must be compiled with STORE_OLD_PARTICLE_ATTRIBS=TRUE to use the back-transformed diagnostics");
+#endif
+}
+
+int PhysicalParticleContainer::GetRefineFac(const Real x, const Real y, const Real z)
+{
+    if (finestLevel() == 0) return 1;
+    if (not WarpX::refine_plasma) return 1;
+
+    IntVect iv;
+    const Geometry& geom = Geom(0);
+
+    std::array<Real, 3> offset;
+
+#if ( AMREX_SPACEDIM == 3)
+    offset[0] = geom.ProbLo(0);
+    offset[1] = geom.ProbLo(1);
+    offset[2] = geom.ProbLo(2);
+#elif ( AMREX_SPACEDIM == 2 )
+    offset[0] = geom.ProbLo(0);
+    offset[1] = 0.0;
+    offset[2] = geom.ProbLo(1);
+#endif
+
+    AMREX_D_TERM(iv[0]=static_cast<int>(floor((x-offset[0])*geom.InvCellSize(0)));,
+                 iv[1]=static_cast<int>(floor((y-offset[1])*geom.InvCellSize(1)));,
+                 iv[2]=static_cast<int>(floor((z-offset[2])*geom.InvCellSize(2))););
+
+    iv += geom.Domain().smallEnd();
+
+    const int dir = WarpX::moving_window_dir;
+
+    IntVect iv2 = iv;
+    iv2[dir] = 0;
+
+    if ( (*m_refined_injection_mask)(iv2) != -1) return (*m_refined_injection_mask)(iv2);
+
+    int ref_fac = 1;
+    for (int lev = 0; lev < finestLevel(); ++lev)
+    {
+        const IntVect rr = m_gdb->refRatio(lev);
+        const BoxArray& fine_ba = this->ParticleBoxArray(lev+1);
+        const int num_boxes = fine_ba.size();
+        Vector<Box> stretched_boxes;
+        const int safety_factor = 4;
+        for (int i = 0; i < num_boxes; ++i)
+        {
+            Box bx = fine_ba[i];
+            bx.coarsen(ref_fac*rr[dir]);
+            bx.setSmall(dir, std::numeric_limits<int>::min()/safety_factor);
+            bx.setBig(dir, std::numeric_limits<int>::max()/safety_factor);
+            stretched_boxes.push_back(bx);
+        }
+
+        BoxArray stretched_ba(stretched_boxes.dataPtr(), stretched_boxes.size());
+
+        const int num_ghost = 0;
+        if ( stretched_ba.intersects(Box(iv, iv), num_ghost) )
+        {
+            ref_fac *= rr[dir];
+        }
+        else
+        {
+            break;
+        }
+    }
+
+    (*m_refined_injection_mask)(iv2) = ref_fac;
+
+    return ref_fac;
+}
diff --git a/Source/Particles/RigidInjectedParticleContainer.H b/Source/Particles/RigidInjectedParticleContainer.H
new file mode 100644
index 000000000..8a9ac9e6e
--- /dev/null
+++ b/Source/Particles/RigidInjectedParticleContainer.H
@@ -0,0 +1,81 @@
+#ifndef WARPX_RigidInjectedParticleContainer_H_
+#define WARPX_RigidInjectedParticleContainer_H_
+
+#include <PhysicalParticleContainer.H>
+#include <AMReX_Vector.H>
+
+class RigidInjectedParticleContainer
+    : public PhysicalParticleContainer
+{
+public:
+    RigidInjectedParticleContainer (amrex::AmrCore* amr_core,
+                                    int ispecies,
+                                    const std::string& name);
+    virtual ~RigidInjectedParticleContainer () {}
+
+    virtual void InitData() override;
+
+    virtual void RemapParticles();
+    virtual void BoostandRemapParticles();
+
+    virtual void Evolve (int lev,
+                         const amrex::MultiFab& Ex,
+                         const amrex::MultiFab& Ey,
+                         const amrex::MultiFab& Ez,
+                         const amrex::MultiFab& Bx,
+                         const amrex::MultiFab& By,
+                         const amrex::MultiFab& Bz,
+                         amrex::MultiFab& jx,
+                         amrex::MultiFab& jy,
+                         amrex::MultiFab& jz,
+                         amrex::MultiFab* cjx,
+                         amrex::MultiFab* cjy,
+                         amrex::MultiFab* cjz,
+                         amrex::MultiFab* rho,
+                         amrex::MultiFab* crho,
+                         const amrex::MultiFab* cEx,
+                         const amrex::MultiFab* cEy,
+                         const amrex::MultiFab* cEz,
+                         const amrex::MultiFab* cBx,
+                         const amrex::MultiFab* cBy,
+                         const amrex::MultiFab* cBz,
+                         amrex::Real t,
+                         amrex::Real dt) override;
+
+    virtual void PushPX(WarpXParIter& pti,
+	                amrex::Cuda::DeviceVector<amrex::Real>& xp,
+                        amrex::Cuda::DeviceVector<amrex::Real>& yp,
+                        amrex::Cuda::DeviceVector<amrex::Real>& zp,
+                        amrex::Cuda::DeviceVector<amrex::Real>& giv,
+                        amrex::Real dt) override;
+
+    virtual void PushP (int lev, amrex::Real dt,
+                        const amrex::MultiFab& Ex,
+                        const amrex::MultiFab& Ey,
+                        const amrex::MultiFab& Ez,
+                        const amrex::MultiFab& Bx,
+                        const amrex::MultiFab& By,
+                        const amrex::MultiFab& Bz) override;
+
+private:
+
+    // User input quantities
+    amrex::Real zinject_plane = 0.;
+    bool projected = true; // When true, particle transverse positions are directly projected (without adjusment)
+    bool focused = false; // When true, particle transverse positions are adjusted to account for distance between zinject and z=0
+    bool rigid_advance = true; // When true, particles are advance with vzbar before injection
+
+    amrex::Real vzbeam_ave_boosted;
+
+    amrex::Vector<int> done_injecting;
+    amrex::Vector<amrex::Real> zinject_plane_levels;
+
+    // Temporary quantites
+    amrex::Real zinject_plane_lev;
+    amrex::Real zinject_plane_lev_previous;
+    amrex::Vector<int> done_injecting_temp;
+    bool done_injecting_lev;
+
+};
+
+#endif
diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp
new file mode 100644
index 000000000..db3623705
--- /dev/null
+++ b/Source/Particles/RigidInjectedParticleContainer.cpp
@@ -0,0 +1,462 @@
+#include <limits>
+#include <sstream>
+#include <algorithm>
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#include <RigidInjectedParticleContainer.H>
+#include <WarpX_f.H>
+#include <WarpX.H>
+#include <WarpXConst.H>
+
+using namespace amrex;
+
+RigidInjectedParticleContainer::RigidInjectedParticleContainer (AmrCore* amr_core, int ispecies,
+                                                                const std::string& name)
+    : PhysicalParticleContainer(amr_core, ispecies, name)
+{
+
+    ParmParse pp(species_name);
+
+    pp.get("zinject_plane", zinject_plane);
+    pp.query("projected", projected);
+    pp.query("focused", focused);
+    pp.query("rigid_advance", rigid_advance);
+
+}
+
+void RigidInjectedParticleContainer::InitData()
+{
+    done_injecting.resize(finestLevel()+1, 0);
+    zinject_plane_levels.resize(finestLevel()+1, zinject_plane/WarpX::gamma_boost);
+
+    AddParticles(0); // Note - add on level 0
+
+    // Particles added by AddParticles should already be in the boosted frame
+    RemapParticles();
+
+    Redistribute();  // We then redistribute
+}
+
+void
+RigidInjectedParticleContainer::RemapParticles()
+{
+
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(projected, "ERROR: projected = false is not supported with this particle loading");
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(!focused, "ERROR: focused = true is not supported with this particle loading");
+
+    // For rigid_advance == false, nothing needs to be done
+
+    if (rigid_advance) {
+
+        // The particle z positions are adjusted to account for the difference between
+        // advancing with vzbar and wih vz[i] before injection
+
+        // For now, start with the assumption that this will only happen
+        // at the start of the simulation.
+        const Real t_lab = 0.;
+
+        const Real uz_boost = WarpX::gamma_boost*WarpX::beta_boost*PhysConst::c;
+        const Real csq = PhysConst::c*PhysConst::c;
+
+        vzbeam_ave_boosted = meanParticleVelocity(false)[2];
+
+        for (int lev = 0; lev <= finestLevel(); lev++) {
+
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+            {
+                // Get the average beam velocity in the boosted frame.
+                // Note that the particles are already in the boosted frame.
+                // This value is saved to advance the particles not injected yet
+
+                Cuda::DeviceVector<Real> xp, yp, zp;
+
+                for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+                {
+
+                    auto& attribs = pti.GetAttribs();
+                    auto& uxp = attribs[PIdx::ux];
+                    auto& uyp = attribs[PIdx::uy];
+                    auto& uzp = attribs[PIdx::uz];
+
+                    // Copy data from particle container to temp arrays
+                    pti.GetPosition(xp, yp, zp);
+
+                    // Loop over particles
+                    const long np = pti.numParticles();
+                    for (int i=0 ; i < np ; i++) {
+
+                        const Real gammapr = std::sqrt(1. + (uxp[i]*uxp[i] + uyp[i]*uyp[i] + uzp[i]*uzp[i])/csq);
+                        const Real vzpr = uzp[i]/gammapr;
+
+                        // Back out the value of z_lab
+                        const Real z_lab = (zp[i] + uz_boost*t_lab + WarpX::gamma_boost*t_lab*vzpr)/(WarpX::gamma_boost + uz_boost*vzpr/csq);
+
+                        // Time of the particle in the boosted frame given its position in the lab frame at t=0.
+                        const Real tpr = WarpX::gamma_boost*t_lab - uz_boost*z_lab/csq;
+
+                        // Adjust the position, taking away its motion from its own velocity and adding
+                        // the motion from the average velocity
+                        zp[i] = zp[i] + tpr*vzpr - tpr*vzbeam_ave_boosted;
+
+                    }
+
+                    // Copy the data back to the particle container
+                    pti.SetPosition(xp, yp, zp);
+
+                }
+            }
+        }
+    }
+}
+
+
+void
+RigidInjectedParticleContainer::BoostandRemapParticles()
+{
+
+    // Boost the particles into the boosted frame and map the particles
+    // to the t=0 in the boosted frame. If using rigid_advance, the z position
+    // is adjusted using vzbar, otherwise using vz[i]
+
+    if (rigid_advance) {
+        // Get the average beam velocity in the boosted frame
+        // This value is saved to advance the particles not injected yet
+        const Real vzbeam_ave_lab = meanParticleVelocity(false)[2];
+        vzbeam_ave_boosted = (vzbeam_ave_lab - WarpX::beta_boost*PhysConst::c)/(1. - vzbeam_ave_lab*WarpX::beta_boost/PhysConst::c);
+    }
+
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+    {
+        Cuda::DeviceVector<Real> xp, yp, zp;
+
+        for (WarpXParIter pti(*this, 0); pti.isValid(); ++pti)
+        {
+
+            auto& attribs = pti.GetAttribs();
+            auto& uxp = attribs[PIdx::ux];
+            auto& uyp = attribs[PIdx::uy];
+            auto& uzp = attribs[PIdx::uz];
+
+            // Copy data from particle container to temp arrays
+            pti.GetPosition(xp, yp, zp);
+
+            // Loop over particles
+            const long np = pti.numParticles();
+            for (int i=0 ; i < np ; i++) {
+
+                const Real gamma_lab = std::sqrt(1. + (uxp[i]*uxp[i] + uyp[i]*uyp[i] + uzp[i]*uzp[i])/(PhysConst::c*PhysConst::c));
+
+                const Real vx_lab = uxp[i]/gamma_lab;
+                const Real vy_lab = uyp[i]/gamma_lab;
+                const Real vz_lab = uzp[i]/gamma_lab;
+
+                // t0_lab is the time in the lab frame that the particles reaches z=0
+                // The location and time (z=0, t=0) is a synchronization point between the
+                // lab and boosted frames.
+                const Real t0_lab = -zp[i]/vz_lab;
+
+                if (!projected) {
+                    xp[i] += t0_lab*vx_lab;
+                    yp[i] += t0_lab*vy_lab;
+                }
+                if (focused) {
+                    // Correct for focusing effect from shift from z=0 to zinject
+                    const Real tfocus = -zinject_plane*WarpX::gamma_boost/vz_lab;
+                    xp[i] -= tfocus*vx_lab;
+                    yp[i] -= tfocus*vy_lab;
+                }
+
+                // Time of the particle in the boosted frame given its position in the lab frame at t=0.
+                const Real tpr = -WarpX::gamma_boost*WarpX::beta_boost*zp[i]/PhysConst::c;
+
+                // Position of the particle in the boosted frame given its position in the lab frame at t=0.
+                const Real zpr = WarpX::gamma_boost*zp[i];
+
+                // Momentum of the particle in the boosted frame (assuming that it is fixed).
+                uzp[i] = WarpX::gamma_boost*(uzp[i] - WarpX::beta_boost*PhysConst::c*gamma_lab);
+
+                // Put the particle at the location in the boosted frame at boost frame t=0,
+                if (rigid_advance) {
+                    // with the particle moving at the average velocity
+                    zp[i] = zpr - vzbeam_ave_boosted*tpr;
+                }
+                else {
+                    // with the particle moving with its own velocity
+                    const Real gammapr = std::sqrt(1. + (uxp[i]*uxp[i] + uyp[i]*uyp[i] + uzp[i]*uzp[i])/(PhysConst::c*PhysConst::c));
+                    const Real vzpr = uzp[i]/gammapr;
+                    zp[i] = zpr - vzpr*tpr;
+                }
+
+            }
+
+            // Copy the data back to the particle container
+            pti.SetPosition(xp, yp, zp);
+
+        }
+    }
+}
+
+void
+RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
+	                               Cuda::DeviceVector<Real>& xp,
+                                       Cuda::DeviceVector<Real>& yp,
+                                       Cuda::DeviceVector<Real>& zp,
+                                       Cuda::DeviceVector<Real>& giv,
+                                       Real dt)
+{
+
+    // This wraps the call to warpx_particle_pusher so that inheritors can modify the call.
+    auto& attribs = pti.GetAttribs();
+    auto& uxp = attribs[PIdx::ux];
+    auto& uyp = attribs[PIdx::uy];
+    auto& uzp = attribs[PIdx::uz];
+    auto& Exp = attribs[PIdx::Ex];
+    auto& Eyp = attribs[PIdx::Ey];
+    auto& Ezp = attribs[PIdx::Ez];
+    auto& Bxp = attribs[PIdx::Bx];
+    auto& Byp = attribs[PIdx::By];
+    auto& Bzp = attribs[PIdx::Bz];
+    const long np  = pti.numParticles();
+
+#ifdef WARPX_STORE_OLD_PARTICLE_ATTRIBS
+    auto& xpold  = attribs[PIdx::xold];
+    auto& ypold  = attribs[PIdx::yold];
+    auto& zpold  = attribs[PIdx::zold];
+    auto& uxpold = attribs[PIdx::uxold];
+    auto& uypold = attribs[PIdx::uyold];
+    auto& uzpold = attribs[PIdx::uzold];
+
+    warpx_copy_attribs(&np, xp.dataPtr(), yp.dataPtr(), zp.dataPtr(),
+                       uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+                       xpold.dataPtr(), ypold.dataPtr(), zpold.dataPtr(),
+                       uxpold.dataPtr(), uypold.dataPtr(), uzpold.dataPtr());
+
+#endif
+
+    // Save the position and momenta, making copies
+    Cuda::DeviceVector<Real> xp_save, yp_save, zp_save;
+    RealVector uxp_save, uyp_save, uzp_save;
+
+    if (!done_injecting_lev) {
+        xp_save = xp;
+        yp_save = yp;
+        zp_save = zp;
+        uxp_save = uxp;
+        uyp_save = uyp;
+        uzp_save = uzp;
+        // Scale the fields of particles about to cross the injection plane.
+        // This only approximates what should be happening. The particles
+        // should by advanced a fraction of a time step instead.
+        // Scaling the fields is much easier and may be good enough.
+        for (int i=0 ; i < zp.size() ; i++) {
+            const Real dtscale = dt - (zinject_plane_lev_previous - zp[i])/(vzbeam_ave_boosted + WarpX::beta_boost*PhysConst::c);
+            if (0. < dtscale && dtscale < dt) {
+                Exp[i] *= dtscale;
+                Eyp[i] *= dtscale;
+                Ezp[i] *= dtscale;
+                Bxp[i] *= dtscale;
+                Byp[i] *= dtscale;
+                Bzp[i] *= dtscale;
+            }
+        }
+    }
+
+    warpx_particle_pusher(&np,
+                          xp.dataPtr(),
+                          yp.dataPtr(),
+                          zp.dataPtr(),
+                          uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+                          giv.dataPtr(),
+                          Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
+                          Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
+                          &this->charge, &this->mass, &dt,
+                          &WarpX::particle_pusher_algo);
+
+    if (!done_injecting_lev) {
+#ifdef _OPENMP
+        const int tid = omp_get_thread_num();
+#else
+        const int tid = 0;
+#endif
+        // Undo the push for particles not injected yet.
+        // The zp are advanced a fixed amount.
+        for (int i=0 ; i < zp.size() ; i++) {
+            if (zp[i] <= zinject_plane_lev) {
+                uxp[i] = uxp_save[i];
+                uyp[i] = uyp_save[i];
+                uzp[i] = uzp_save[i];
+                giv[i] = 1./std::sqrt(1. + (uxp[i]*uxp[i] + uyp[i]*uyp[i] + uzp[i]*uzp[i])/(PhysConst::c*PhysConst::c));
+                xp[i] = xp_save[i];
+                yp[i] = yp_save[i];
+                if (rigid_advance) {
+                    zp[i] = zp_save[i] + dt*vzbeam_ave_boosted;
+                }
+                else {
+                    zp[i] = zp_save[i] + dt*uzp[i]*giv[i];
+                }
+                done_injecting_temp[tid] = 0;
+            }
+        }
+    }
+
+}
+
+void
+RigidInjectedParticleContainer::Evolve (int lev,
+                                        const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
+                                        const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz,
+                                        MultiFab& jx, MultiFab& jy, MultiFab& jz,
+                                        MultiFab* cjx, MultiFab* cjy, MultiFab* cjz,
+                                        MultiFab* rho, MultiFab* crho,
+                                        const MultiFab* cEx, const MultiFab* cEy, const MultiFab* cEz,
+                                        const MultiFab* cBx, const MultiFab* cBy, const MultiFab* cBz,
+                                        Real t, Real dt)
+{
+
+    // Update location of injection plane in the boosted frame
+    zinject_plane_lev_previous = zinject_plane_levels[lev];
+    zinject_plane_levels[lev] -= dt*WarpX::beta_boost*PhysConst::c;
+    zinject_plane_lev = zinject_plane_levels[lev];
+
+    // Setup check of whether more particles need to be injected
+#ifdef _OPENMP
+    const int nthreads = omp_get_max_threads();
+#else
+    const int nthreads = 1;
+#endif
+    done_injecting_temp.assign(nthreads, 1); // We do not use bool because vector<bool> is special.
+    done_injecting_lev = done_injecting[lev];
+
+    PhysicalParticleContainer::Evolve (lev,
+				       Ex, Ey, Ez,
+				       Bx, By, Bz,
+				       jx, jy, jz,
+                                       cjx, cjy, cjz,
+                                       rho, crho,
+                                       cEx, cEy, cEz,
+                                       cBx, cBy, cBz,
+                                       t, dt);
+
+    // Check if all done_injecting_temp are still true.
+    done_injecting[lev] = std::all_of(done_injecting_temp.begin(), done_injecting_temp.end(),
+                                      [](int i) -> bool { return i; });
+}
+
+void
+RigidInjectedParticleContainer::PushP (int lev, Real dt,
+                                       const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
+                                       const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz)
+{
+    if (do_not_push) return;
+
+    const std::array<Real,3>& dx = WarpX::CellSize(lev);
+
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+    {
+        Cuda::DeviceVector<Real> xp, yp, zp, giv;
+
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+        {
+            const Box& box = pti.validbox();
+
+            auto& attribs = pti.GetAttribs();
+
+            auto& uxp = attribs[PIdx::ux];
+            auto& uyp = attribs[PIdx::uy];
+            auto& uzp = attribs[PIdx::uz];
+            auto& Exp = attribs[PIdx::Ex];
+            auto& Eyp = attribs[PIdx::Ey];
+            auto& Ezp = attribs[PIdx::Ez];
+            auto& Bxp = attribs[PIdx::Bx];
+            auto& Byp = attribs[PIdx::By];
+            auto& Bzp = attribs[PIdx::Bz];
+
+            const long np = pti.numParticles();
+
+            // Data on the grid
+            const FArrayBox& exfab = Ex[pti];
+            const FArrayBox& eyfab = Ey[pti];
+            const FArrayBox& ezfab = Ez[pti];
+            const FArrayBox& bxfab = Bx[pti];
+            const FArrayBox& byfab = By[pti];
+            const FArrayBox& bzfab = Bz[pti];
+
+            Exp.assign(np,0.0);
+            Eyp.assign(np,0.0);
+            Ezp.assign(np,0.0);
+            Bxp.assign(np,WarpX::B_external[0]);
+            Byp.assign(np,WarpX::B_external[1]);
+            Bzp.assign(np,WarpX::B_external[2]);
+
+            giv.resize(np);
+
+            //
+            // copy data from particle container to temp arrays
+            //
+            pti.GetPosition(xp, yp, zp);
+
+            const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
+            const int* ixyzmin_grid = box.loVect();
+
+            const int ll4symtry          = false;
+            const int l_lower_order_in_v = true;
+            long lvect_fieldgathe = 64;
+            warpx_geteb_energy_conserving(
+                &np,
+                xp.dataPtr(),
+                yp.dataPtr(),
+                zp.dataPtr(),
+                Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
+                Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
+                ixyzmin_grid,
+                &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2],
+                &dx[0], &dx[1], &dx[2],
+                &WarpX::nox, &WarpX::noy, &WarpX::noz,
+                BL_TO_FORTRAN_ANYD(exfab),
+                BL_TO_FORTRAN_ANYD(eyfab),
+                BL_TO_FORTRAN_ANYD(ezfab),
+                BL_TO_FORTRAN_ANYD(bxfab),
+                BL_TO_FORTRAN_ANYD(byfab),
+                BL_TO_FORTRAN_ANYD(bzfab),
+                &ll4symtry, &l_lower_order_in_v, &WarpX::do_nodal,
+                &lvect_fieldgathe, &WarpX::field_gathering_algo);
+
+            // Save the position and momenta, making copies
+            auto uxp_save = uxp;
+            auto uyp_save = uyp;
+            auto uzp_save = uzp;
+
+            warpx_particle_pusher_momenta(&np,
+                                          xp.dataPtr(),
+                                          yp.dataPtr(),
+                                          zp.dataPtr(),
+                                          uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+                                          giv.dataPtr(),
+                                          Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
+                                          Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
+                                          &this->charge, &this->mass, &dt,
+                                          &WarpX::particle_pusher_algo);
+
+            // Undo the push for particles not injected yet.
+            // It is assumed that PushP will only be called on the first and last steps
+            // and that no particles will cross zinject_plane.
+            for (int i=0 ; i < zp.size() ; i++) {
+                if (zp[i] <= zinject_plane_levels[lev]) {
+                    uxp[i] = uxp_save[i];
+                    uyp[i] = uyp_save[i];
+                    uzp[i] = uzp_save[i];
+                }
+            }
+
+        }
+    }
+}
diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H
new file mode 100644
index 000000000..d11ce49c9
--- /dev/null
+++ b/Source/Particles/WarpXParticleContainer.H
@@ -0,0 +1,233 @@
+#ifndef WARPX_WarpXParticleContainer_H_
+#define WARPX_WarpXParticleContainer_H_
+
+#include <memory>
+
+#include <AMReX_Particles.H>
+#include <AMReX_AmrCore.H>
+
+struct PIdx
+{
+    enum { // Particle Attributes stored in amrex::ParticleContainer's struct of array
+	w = 0,  // weight
+	ux, uy, uz, Ex, Ey, Ez, Bx, By, Bz,
+#if (BL_SPACEDIM == 2) && WARPX_RZ
+        theta, // RZ needs all three position components
+#endif
+#ifdef WARPX_STORE_OLD_PARTICLE_ATTRIBS
+        xold, yold, zold, uxold, uyold, uzold,
+#endif        
+	nattribs
+    };
+};
+
+struct DiagIdx
+{
+    enum {
+        w = 0,
+        x, y, z, ux, uy, uz,
+        nattribs
+    };
+};
+
+class WarpXParIter
+    : public amrex::ParIter<0,0,PIdx::nattribs>
+{
+public:
+    using amrex::ParIter<0,0,PIdx::nattribs>::ParIter;
+
+    WarpXParIter (ContainerType& pc, int level);
+
+#if (AMREX_SPACEDIM == 2)
+    void GetPosition (amrex::Cuda::DeviceVector<amrex::Real>& x,
+                      amrex::Cuda::DeviceVector<amrex::Real>& y,
+                      amrex::Cuda::DeviceVector<amrex::Real>& z) const;
+    void SetPosition (const amrex::Cuda::DeviceVector<amrex::Real>& x,
+                      const amrex::Cuda::DeviceVector<amrex::Real>& y,
+                      const amrex::Cuda::DeviceVector<amrex::Real>& z);
+#endif
+
+    const std::array<RealVector, PIdx::nattribs>& GetAttribs () const { 
+        return GetStructOfArrays().GetRealData(); 
+    }
+    
+    std::array<RealVector, PIdx::nattribs>& GetAttribs () { 
+        return GetStructOfArrays().GetRealData(); 
+    }
+
+    const RealVector& GetAttribs (int comp) const { 
+        return GetStructOfArrays().GetRealData(comp);
+    }
+    
+    RealVector& GetAttribs (int comp) { 
+        return GetStructOfArrays().GetRealData(comp);
+    }
+};
+
+class MultiParticleContainer;
+
+class WarpXParticleContainer
+    : public amrex::ParticleContainer<0,0,PIdx::nattribs>
+{
+public:
+    friend MultiParticleContainer;
+
+    using DiagnosticParticleData = amrex::StructOfArrays<DiagIdx::nattribs, 0>;
+    using DiagnosticParticles = amrex::Vector<std::map<std::pair<int, int>, DiagnosticParticleData> >;
+    
+    WarpXParticleContainer (amrex::AmrCore* amr_core, int ispecies);
+    virtual ~WarpXParticleContainer() {}
+
+    virtual void InitData () = 0;
+
+    virtual void FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E,
+                                const amrex::Vector<std::unique_ptr<amrex::FabArray<amrex::BaseFab<int> > > >& masks) {}
+
+    virtual void FieldGather (int lev,
+                              const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, const amrex::MultiFab& Ez,
+                              const amrex::MultiFab& Bx, const amrex::MultiFab& By, const amrex::MultiFab& Bz) {}
+
+#ifdef WARPX_DO_ELECTROSTATIC    
+    virtual void EvolveES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E,
+                                 amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rho, 
+                           amrex::Real t, amrex::Real dt) = 0;
+#endif // WARPX_DO_ELECTROSTATIC
+    
+    virtual void Evolve (int lev,
+			 const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, const amrex::MultiFab& Ez,
+			 const amrex::MultiFab& Bx, const amrex::MultiFab& By, const amrex::MultiFab& Bz,
+			 amrex::MultiFab& jx, amrex::MultiFab& jy, amrex::MultiFab& jz,
+                         amrex::MultiFab* cjx, amrex::MultiFab* cjy, amrex::MultiFab* cjz,
+                         amrex::MultiFab* rho, amrex::MultiFab* crho,
+                         const amrex::MultiFab* cEx, const amrex::MultiFab* cEy, const amrex::MultiFab* cEz,
+                         const amrex::MultiFab* cBx, const amrex::MultiFab* cBy, const amrex::MultiFab* cBz,
+                         amrex::Real t, amrex::Real dt) = 0;
+
+    virtual void PostRestart () = 0;
+
+    virtual void GetParticleSlice(const int direction,     const amrex::Real z_old,
+                                  const amrex::Real z_new, const amrex::Real t_boost, 
+                                  const amrex::Real t_lab, const amrex::Real dt,
+                                  DiagnosticParticles& diagnostic_particles) {}
+    
+    void AllocData ();
+
+    ///
+    /// This pushes the particle positions by one half time step.
+    /// It is used to desynchronize the particles after initializaton
+    /// or when restarting from a checkpoint.
+    /// This is the electrostatic version of the particle push.
+    /// 
+    void PushXES (amrex::Real dt);
+
+    ///
+    /// This pushes the particle positions by one half time step.
+    /// It is used to desynchronize the particles after initializaton
+    /// or when restarting from a checkpoint.
+    /// This is the electromagnetic version of the particle push.
+    /// 
+    void PushX (         amrex::Real dt);
+    void PushX (int lev, amrex::Real dt);
+
+    ///
+    /// This pushes the particle momenta by dt.
+    /// 
+    virtual void PushP (int lev, amrex::Real dt,
+                        const amrex::MultiFab& Ex,
+                        const amrex::MultiFab& Ey,
+                        const amrex::MultiFab& Ez,
+                        const amrex::MultiFab& Bx,
+                        const amrex::MultiFab& By,
+                        const amrex::MultiFab& Bz) = 0;
+
+    void DepositCharge(amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rho,
+                       bool local = false);
+    std::unique_ptr<amrex::MultiFab> GetChargeDensity(int lev, bool local = false);
+
+   virtual void DepositCharge(WarpXParIter& pti,
+                             RealVector& wp,
+                             amrex::MultiFab* rhomf,
+                             amrex::MultiFab* crhomf,
+                             int icomp,
+                             const long np_current,
+                             const long np,
+                             int thread_num,
+                             int lev );
+  
+  virtual void DepositCurrent(WarpXParIter& pti,
+                              RealVector& wp,
+                              RealVector& uxp,
+                              RealVector& uyp,
+                              RealVector& uzp,
+                              amrex::MultiFab& jx,
+                              amrex::MultiFab& jy,
+                              amrex::MultiFab& jz,
+                              amrex::MultiFab* cjx,
+                              amrex::MultiFab* cjy,
+                              amrex::MultiFab* cjz,
+                              const long np_current,
+                              const long np,
+                              int thread_num,
+                              int lev,
+                              amrex::Real dt );
+  
+    /// 
+    /// This returns the total charge for all the particles in this ParticleContainer.
+    /// This is needed when solving Poisson's equation with periodic boundary conditions.
+    ///
+    amrex::Real sumParticleCharge(bool local = false);
+
+    std::array<amrex::Real, 3> meanParticleVelocity(bool local = false);
+
+    amrex::Real maxParticleVelocity(bool local = false);
+
+    void AddNParticles (int lev,
+                        int n, const amrex::Real* x, const amrex::Real* y, const amrex::Real* z,
+			const amrex::Real* vx, const amrex::Real* vy, const amrex::Real* vz,
+			int nattr, const amrex::Real* attr, int uniqueparticles, int id=-1);
+
+    void AddOneParticle (int lev, int grid, int tile,
+                         amrex::Real x, amrex::Real y, amrex::Real z,
+                         const std::array<amrex::Real,PIdx::nattribs>& attribs);
+
+    void AddOneParticle (ParticleTileType& particle_tile,
+                         amrex::Real x, amrex::Real y, amrex::Real z,
+                         const std::array<amrex::Real,PIdx::nattribs>& attribs);
+
+    void ReadHeader (std::istream& is);
+
+    void WriteHeader (std::ostream& os) const;
+
+    static void ReadParameters ();
+
+    static int NextID () { return ParticleType::NextID(); }
+
+    bool do_splitting = false;
+
+    // split along axes (0) or diagonals (1)
+    int split_type = 0;
+
+protected:
+
+    int species_id;
+
+    amrex::Real charge;
+    amrex::Real mass;
+
+    bool deposit_on_main_grid = false;
+
+    static int do_not_push;
+
+  amrex::Vector<std::unique_ptr<amrex::FArrayBox> > local_rho;
+  amrex::Vector<std::unique_ptr<amrex::FArrayBox> > local_jx;
+  amrex::Vector<std::unique_ptr<amrex::FArrayBox> > local_jy;
+  amrex::Vector<std::unique_ptr<amrex::FArrayBox> > local_jz;
+
+  amrex::Vector<amrex::Cuda::DeviceVector<amrex::Real> > m_xp, m_yp, m_zp, m_giv;
+
+private:
+    virtual void particlePostLocate(ParticleType& p, const amrex::ParticleLocData& pld,
+                                    const int lev) override;
+};
+
+#endif
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
new file mode 100644
index 000000000..1ba82e3d3
--- /dev/null
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -0,0 +1,1072 @@
+
+#include <limits>
+
+#include <MultiParticleContainer.H>
+#include <WarpXParticleContainer.H>
+#include <AMReX_AmrParGDB.H>
+#include <WarpX_f.H>
+#include <WarpX.H>
+
+using namespace amrex;
+
+int WarpXParticleContainer::do_not_push = 0;
+
+WarpXParIter::WarpXParIter (ContainerType& pc, int level)
+    : ParIter(pc, level, MFItInfo().SetDynamic(WarpX::do_dynamic_scheduling))
+{
+}
+
+#if (AMREX_SPACEDIM == 2)
+void
+WarpXParIter::GetPosition (Cuda::DeviceVector<Real>& x, Cuda::DeviceVector<Real>& y, Cuda::DeviceVector<Real>& z) const
+{
+    amrex::ParIter<0,0,PIdx::nattribs>::GetPosition(x, z);
+#ifdef WARPX_RZ
+    const auto& attribs = GetAttribs();
+    const auto& theta = attribs[PIdx::theta];
+    y.resize(x.size());
+    for (unsigned int i=0 ; i < x.size() ; i++) {
+        // The x stored in the particles is actually the radius
+        y[i] = x[i]*std::sin(theta[i]);
+        x[i] = x[i]*std::cos(theta[i]);
+    }
+#else
+    y.resize(x.size(), std::numeric_limits<Real>::quiet_NaN());
+#endif
+}
+
+void
+WarpXParIter::SetPosition (const Cuda::DeviceVector<Real>& x, const Cuda::DeviceVector<Real>& y, const Cuda::DeviceVector<Real>& z)
+{
+#ifdef WARPX_RZ
+    const auto& attribs = GetAttribs();
+    const auto& theta = attribs[PIdx::theta];
+    for (unsigned int i=0 ; i < x.size() ; i++) {
+        theta[i] = std::atan2(y[i], x[i]);
+        x[i] = std::sqrt(x[i]*x[i] + y[i]*y[i]);
+    }
+#endif
+    amrex::ParIter<0,0,PIdx::nattribs>::SetPosition(x, z);
+}
+#endif
+
+WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies)
+    : ParticleContainer<0,0,PIdx::nattribs>(amr_core->GetParGDB())
+    , species_id(ispecies)
+{
+    for (unsigned int i = PIdx::Ex; i <= PIdx::Bz; ++i) {
+        communicate_real_comp[i] = false; // Don't need to communicate E and B.
+    }
+    SetParticleSize();
+    ReadParameters();
+
+    // Initialize temporary local arrays for charge/current deposition
+    int num_threads = 1;
+    #ifdef _OPENMP
+    #pragma omp parallel
+    #pragma omp single
+    num_threads = omp_get_num_threads();
+    #endif
+    local_rho.resize(num_threads);
+    local_jx.resize(num_threads);
+    local_jy.resize(num_threads);
+    local_jz.resize(num_threads);
+    m_xp.resize(num_threads);
+    m_yp.resize(num_threads);
+    m_zp.resize(num_threads);
+    m_giv.resize(num_threads);
+    for (int i = 0; i < num_threads; ++i)
+      {
+        local_rho[i].reset(nullptr);
+        local_jx[i].reset(nullptr);
+        local_jy[i].reset(nullptr);
+        local_jz[i].reset(nullptr);
+      }
+
+}
+
+void
+WarpXParticleContainer::ReadParameters ()
+{
+    static bool initialized = false;
+    if (!initialized)
+    {
+	ParmParse pp("particles");
+
+#ifdef AMREX_USE_GPU
+        do_tiling = false; // By default, tiling is off on GPU
+#else
+        do_tiling = true;
+#endif
+        pp.query("do_tiling",  do_tiling);
+        pp.query("do_not_push", do_not_push);
+
+	initialized = true;
+    }
+}
+
+void
+WarpXParticleContainer::AllocData ()
+{
+    // have to resize here, not in the constructor because grids have not
+    // been built when constructor was called.
+    reserveData();
+    resizeData();
+}
+
+void
+WarpXParticleContainer::AddOneParticle (int lev, int grid, int tile,
+                                        Real x, Real y, Real z,
+                                        const std::array<Real,PIdx::nattribs>& attribs)
+{
+    auto& particle_tile = GetParticles(lev)[std::make_pair(grid,tile)];
+    AddOneParticle(particle_tile, x, y, z, attribs);
+}
+
+void
+WarpXParticleContainer::AddOneParticle (ParticleTileType& particle_tile,
+                                        Real x, Real y, Real z,
+                                        const std::array<Real,PIdx::nattribs>& attribs)
+{
+    ParticleType p;
+    p.id()  = ParticleType::NextID();
+    p.cpu() = ParallelDescriptor::MyProc();
+#if (AMREX_SPACEDIM == 3)
+    p.pos(0) = x;
+    p.pos(1) = y;
+    p.pos(2) = z;
+#elif (AMREX_SPACEDIM == 2)
+#ifdef WARPX_RZ
+    attribs[PIdx::theta] = std::atan2(y, x);
+    x = std::sqrt(x*x + y*y);
+#endif
+    p.pos(0) = x;
+    p.pos(1) = z;
+#endif
+
+    particle_tile.push_back(p);
+    particle_tile.push_back_real(attribs);
+}
+
+void
+WarpXParticleContainer::AddNParticles (int lev,
+                                       int n, const Real* x, const Real* y, const Real* z,
+				       const Real* vx, const Real* vy, const Real* vz,
+				       int nattr, const Real* attr, int uniqueparticles, int id)
+{	
+    BL_ASSERT(nattr == 1);
+    const Real* weight = attr;
+
+    int ibegin, iend;
+    if (uniqueparticles) {
+	ibegin = 0;
+	iend = n;
+    } else {
+	int myproc = ParallelDescriptor::MyProc();
+	int nprocs = ParallelDescriptor::NProcs();
+	int navg = n/nprocs;
+	int nleft = n - navg * nprocs;
+	if (myproc < nleft) {
+	    ibegin = myproc*(navg+1);
+	    iend = ibegin + navg+1;
+	} else {
+	    ibegin = myproc*navg + nleft;
+	    iend = ibegin + navg;
+	}
+    }
+
+    //  Add to grid 0 and tile 0
+    // Redistribute() will move them to proper places.
+    std::pair<int,int> key {0,0};
+    auto& particle_tile = GetParticles(lev)[key];
+
+    std::size_t np = iend-ibegin;
+
+#ifdef WARPX_RZ
+    Vector<Real> theta(np);
+#endif
+
+    for (int i = ibegin; i < iend; ++i)
+    {
+        ParticleType p;
+	if (id==-1)
+	{
+	    p.id() = ParticleType::NextID();
+	} else {
+	    p.id() = id;
+	}
+        p.cpu() = ParallelDescriptor::MyProc();
+#if (AMREX_SPACEDIM == 3)
+        p.pos(0) = x[i];
+        p.pos(1) = y[i];
+        p.pos(2) = z[i];
+#elif (AMREX_SPACEDIM == 2)
+#ifdef WARPX_RZ
+        theta[i-ibegin] = std::atan2(y[i], x[i]);
+        p.pos(0) = std::sqrt(x[i]*x[i] + y[i]*y[i]);
+#else
+        p.pos(0) = x[i];
+#endif
+        p.pos(1) = z[i];
+#endif
+        particle_tile.push_back(p);
+    }
+
+    if (np > 0)
+    {
+        particle_tile.push_back_real(PIdx::w , weight + ibegin, weight + iend);
+        particle_tile.push_back_real(PIdx::ux,     vx + ibegin,     vx + iend);
+        particle_tile.push_back_real(PIdx::uy,     vy + ibegin,     vy + iend);
+        particle_tile.push_back_real(PIdx::uz,     vz + ibegin,     vz + iend);
+
+        for (int comp = PIdx::uz+1; comp < PIdx::nattribs; ++comp)
+        {
+#ifdef WARPX_RZ
+            if (comp == PIdx::theta) {
+                particle_tile.push_back_real(comp, theta.data, theta.data+np);
+            }
+            else {
+                particle_tile.push_back_real(comp, np, 0.0);
+            }
+#else
+            particle_tile.push_back_real(comp, np, 0.0);
+#endif
+        }
+    }
+
+    Redistribute();
+}
+
+
+void
+WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
+                                          RealVector& wp, RealVector& uxp,
+                                          RealVector& uyp, RealVector& uzp,
+                                          MultiFab& jx, MultiFab& jy, MultiFab& jz,
+                                          MultiFab* cjx, MultiFab* cjy, MultiFab* cjz,
+                                          const long np_current, const long np,
+                                          int thread_num, int lev, Real dt )
+{
+  Real *jx_ptr, *jy_ptr, *jz_ptr;
+  const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev);
+  const std::array<Real,3>& dx = WarpX::CellSize(lev);
+  const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0));
+  const std::array<Real, 3>& xyzmin = xyzmin_tile;
+  const long lvect = 8;
+
+  BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd);
+  BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
+
+  Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag);
+  Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag);
+  Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag);
+
+  // WarpX assumes the same number of guard cells for Jx, Jy, Jz
+  long ngJ = jx.nGrow();
+
+  bool j_is_nodal = jx.is_nodal() and jy.is_nodal() and jz.is_nodal();
+
+  // Deposit charge for particles that are not in the current buffers
+  if (np_current > 0)
+    {
+      tbx.grow(ngJ);
+      tby.grow(ngJ);
+      tbz.grow(ngJ);
+
+      local_jx[thread_num]->resize(tbx);
+      local_jy[thread_num]->resize(tby);
+      local_jz[thread_num]->resize(tbz);
+
+      jx_ptr = local_jx[thread_num]->dataPtr();
+      jy_ptr = local_jy[thread_num]->dataPtr();
+      jz_ptr = local_jz[thread_num]->dataPtr();
+
+      FArrayBox* local_jx_ptr = local_jx[thread_num].get();
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, b,
+      {
+        local_jx_ptr->setVal(0.0, b, 0, 1);
+      });
+
+      FArrayBox* local_jy_ptr = local_jy[thread_num].get();
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, b,
+      {
+        local_jy_ptr->setVal(0.0, b, 0, 1);
+      });
+
+      FArrayBox* local_jz_ptr = local_jz[thread_num].get();
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, b,
+      {
+        local_jz_ptr->setVal(0.0, b, 0, 1);
+      });
+
+      auto jxntot = local_jx[thread_num]->length();
+      auto jyntot = local_jy[thread_num]->length();
+      auto jzntot = local_jz[thread_num]->length();
+
+      BL_PROFILE_VAR_START(blp_pxr_cd);
+      if (j_is_nodal) {
+          const Real* p_wp = wp.dataPtr();
+          const Real* p_gaminv = m_giv[thread_num].dataPtr();
+          const Real* p_uxp = uxp.dataPtr();
+          const Real* p_uyp = uyp.dataPtr();
+          const Real* p_uzp = uzp.dataPtr();
+          AsyncArray<Real> wptmp_aa(np_current);
+          Real* const wptmp = wptmp_aa.data();
+          const Box& tile_box = pti.tilebox();
+#if (AMREX_SPACEDIM == 3)
+          const long nx = tile_box.length(0);
+          const long ny = tile_box.length(1);
+          const long nz = tile_box.length(2);
+#else
+          const long nx = tile_box.length(0);
+          const long ny = 0;
+          const long nz = tile_box.length(1);
+#endif
+          amrex::ParallelFor (np_current, [=] AMREX_GPU_DEVICE (long ip) {
+                  wptmp[ip] = p_wp[ip] * p_gaminv[ip] * p_uxp[ip];
+          });
+          warpx_charge_deposition(jx_ptr, &np_current,
+                                  m_xp[thread_num].dataPtr(),
+                                  m_yp[thread_num].dataPtr(),
+                                  m_zp[thread_num].dataPtr(),
+                                  wptmp,
+                                  &this->charge,
+                                  &xyzmin[0], &xyzmin[1], &xyzmin[2],
+                                  &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
+                                  &ngJ, &ngJ, &ngJ,
+                                  &WarpX::nox,&WarpX::noy,&WarpX::noz,
+                                  &lvect, &WarpX::current_deposition_algo);
+          amrex::ParallelFor (np_current, [=] AMREX_GPU_DEVICE (long ip) {
+                  wptmp[ip] = p_wp[ip] * p_gaminv[ip] * p_uyp[ip];
+          });
+          warpx_charge_deposition(jy_ptr, &np_current,
+                                  m_xp[thread_num].dataPtr(),
+                                  m_yp[thread_num].dataPtr(),
+                                  m_zp[thread_num].dataPtr(),
+                                  wptmp,
+                                  &this->charge,
+                                  &xyzmin[0], &xyzmin[1], &xyzmin[2],
+                                  &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
+                                  &ngJ, &ngJ, &ngJ,
+                                  &WarpX::nox,&WarpX::noy,&WarpX::noz,
+                                  &lvect, &WarpX::current_deposition_algo);
+          amrex::ParallelFor (np_current, [=] AMREX_GPU_DEVICE (long ip) {
+                  wptmp[ip] = p_wp[ip] * p_gaminv[ip] * p_uzp[ip];
+          });
+          warpx_charge_deposition(jz_ptr, &np_current,
+                                  m_xp[thread_num].dataPtr(),
+                                  m_yp[thread_num].dataPtr(),
+                                  m_zp[thread_num].dataPtr(),
+                                  wptmp,
+                                  &this->charge,
+                                  &xyzmin[0], &xyzmin[1], &xyzmin[2],
+                                  &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
+                                  &ngJ, &ngJ, &ngJ,
+                                  &WarpX::nox,&WarpX::noy,&WarpX::noz,
+                                  &lvect, &WarpX::current_deposition_algo);
+      } else {
+          warpx_current_deposition(
+                               jx_ptr, &ngJ, jxntot.getVect(),
+                               jy_ptr, &ngJ, jyntot.getVect(),
+                               jz_ptr, &ngJ, jzntot.getVect(),
+                               &np_current,
+                               m_xp[thread_num].dataPtr(),
+                               m_yp[thread_num].dataPtr(),
+                               m_zp[thread_num].dataPtr(),
+                               uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+                               m_giv[thread_num].dataPtr(),
+                               wp.dataPtr(), &this->charge,
+                               &xyzmin[0], &xyzmin[1], &xyzmin[2],
+                               &dt, &dx[0], &dx[1], &dx[2],
+                               &WarpX::nox,&WarpX::noy,&WarpX::noz,
+                               &lvect,&WarpX::current_deposition_algo);
+      }
+      BL_PROFILE_VAR_STOP(blp_pxr_cd);
+
+      BL_PROFILE_VAR_START(blp_accumulate);
+
+      FArrayBox const* local_jx_const_ptr = local_jx[thread_num].get();
+      FArrayBox* global_jx_ptr = jx.fabPtr(pti);
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx,
+      {
+        global_jx_ptr->atomicAdd(*local_jx_const_ptr, thread_bx, thread_bx, 0, 0, 1);
+      });
+
+      FArrayBox const* local_jy_const_ptr = local_jy[thread_num].get();
+      FArrayBox* global_jy_ptr = jy.fabPtr(pti);
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx,
+      {
+        global_jy_ptr->atomicAdd(*local_jy_const_ptr, thread_bx, thread_bx, 0, 0, 1);
+      });
+
+      FArrayBox const* local_jz_const_ptr = local_jz[thread_num].get();
+      FArrayBox* global_jz_ptr = jz.fabPtr(pti);
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx,
+      {
+        global_jz_ptr->atomicAdd(*local_jz_const_ptr, thread_bx, thread_bx, 0, 0, 1);
+      });
+
+      BL_PROFILE_VAR_STOP(blp_accumulate);
+    }
+
+  // Deposit charge for particles that are in the current buffers
+  if (np_current < np)
+    {
+      const IntVect& ref_ratio = WarpX::RefRatio(lev-1);
+      const Box& ctilebox = amrex::coarsen(pti.tilebox(),ref_ratio);
+      const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1);
+
+      tbx = amrex::convert(ctilebox, WarpX::jx_nodal_flag);
+      tby = amrex::convert(ctilebox, WarpX::jy_nodal_flag);
+      tbz = amrex::convert(ctilebox, WarpX::jz_nodal_flag);
+      tbx.grow(ngJ);
+      tby.grow(ngJ);
+      tbz.grow(ngJ);
+
+      local_jx[thread_num]->resize(tbx);
+      local_jy[thread_num]->resize(tby);
+      local_jz[thread_num]->resize(tbz);
+
+      jx_ptr = local_jx[thread_num]->dataPtr();
+      jy_ptr = local_jy[thread_num]->dataPtr();
+      jz_ptr = local_jz[thread_num]->dataPtr();
+
+      FArrayBox* local_jx_ptr = local_jx[thread_num].get();
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, b,
+      {
+        local_jx_ptr->setVal(0.0, b, 0, 1);
+      });
+
+      FArrayBox* local_jy_ptr = local_jy[thread_num].get();
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, b,
+      {
+        local_jy_ptr->setVal(0.0, b, 0, 1);
+      });
+
+      FArrayBox* local_jz_ptr = local_jz[thread_num].get();
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, b,
+      {
+        local_jz_ptr->setVal(0.0, b, 0, 1);
+      });
+      auto jxntot = local_jx[thread_num]->length();
+      auto jyntot = local_jy[thread_num]->length();
+      auto jzntot = local_jz[thread_num]->length();
+
+      long ncrse = np - np_current;
+      BL_PROFILE_VAR_START(blp_pxr_cd);
+      if (j_is_nodal) {
+          const Real* p_wp = wp.dataPtr() + np_current;
+          const Real* p_gaminv = m_giv[thread_num].dataPtr() + np_current;
+          const Real* p_uxp = uxp.dataPtr() + np_current;
+          const Real* p_uyp = uyp.dataPtr() + np_current;
+          const Real* p_uzp = uzp.dataPtr() + np_current;
+          AsyncArray<Real> wptmp_aa(ncrse);
+          Real* const wptmp = wptmp_aa.data();
+          const Box& tile_box = pti.tilebox();
+#if (AMREX_SPACEDIM == 3)
+          const long nx = tile_box.length(0);
+          const long ny = tile_box.length(1);
+          const long nz = tile_box.length(2);
+#else
+          const long nx = tile_box.length(0);
+          const long ny = 0;
+          const long nz = tile_box.length(1);
+#endif
+          amrex::ParallelFor (ncrse, [=] AMREX_GPU_DEVICE (long ip) {
+                  wptmp[ip] = p_wp[ip] * p_gaminv[ip] * p_uxp[ip];
+          });
+          warpx_charge_deposition(jx_ptr, &ncrse,
+                                  m_xp[thread_num].dataPtr() +np_current,
+                                  m_yp[thread_num].dataPtr() +np_current,
+                                  m_zp[thread_num].dataPtr() +np_current,
+                                  wptmp,
+                                  &this->charge,
+                                  &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
+                                  &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz,
+                                  &ngJ, &ngJ, &ngJ,
+                                  &WarpX::nox,&WarpX::noy,&WarpX::noz,
+                                  &lvect, &WarpX::current_deposition_algo);
+          amrex::ParallelFor (ncrse, [=] AMREX_GPU_DEVICE (long ip) {
+                  wptmp[ip] = p_wp[ip] * p_gaminv[ip] * p_uyp[ip];
+          });
+          warpx_charge_deposition(jy_ptr, &ncrse,
+                                  m_xp[thread_num].dataPtr() +np_current,
+                                  m_yp[thread_num].dataPtr() +np_current,
+                                  m_zp[thread_num].dataPtr() +np_current,
+                                  wptmp,
+                                  &this->charge,
+                                  &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
+                                  &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz,
+                                  &ngJ, &ngJ, &ngJ,
+                                  &WarpX::nox,&WarpX::noy,&WarpX::noz,
+                                  &lvect, &WarpX::current_deposition_algo);
+          amrex::ParallelFor (ncrse, [=] AMREX_GPU_DEVICE (long ip) {
+                  wptmp[ip] = p_wp[ip] * p_gaminv[ip] * p_uzp[ip];
+          });
+          warpx_charge_deposition(jz_ptr, &ncrse,
+                                  m_xp[thread_num].dataPtr() +np_current,
+                                  m_yp[thread_num].dataPtr() +np_current,
+                                  m_zp[thread_num].dataPtr() +np_current,
+                                  wptmp,
+                                  &this->charge,
+                                  &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
+                                  &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz,
+                                  &ngJ, &ngJ, &ngJ,
+                                  &WarpX::nox,&WarpX::noy,&WarpX::noz,
+                                  &lvect, &WarpX::current_deposition_algo);
+      } else {
+          warpx_current_deposition(
+                               jx_ptr, &ngJ, jxntot.getVect(),
+                               jy_ptr, &ngJ, jyntot.getVect(),
+                               jz_ptr, &ngJ, jzntot.getVect(),
+                               &ncrse,
+                               m_xp[thread_num].dataPtr() +np_current,
+                               m_yp[thread_num].dataPtr() +np_current,
+                               m_zp[thread_num].dataPtr() +np_current,
+                               uxp.dataPtr()+np_current,
+                               uyp.dataPtr()+np_current,
+                               uzp.dataPtr()+np_current,
+                               m_giv[thread_num].dataPtr()+np_current,
+                               wp.dataPtr()+np_current, &this->charge,
+                               &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
+                               &dt, &cdx[0], &cdx[1], &cdx[2],
+                               &WarpX::nox,&WarpX::noy,&WarpX::noz,
+                               &lvect,&WarpX::current_deposition_algo);
+      }
+      BL_PROFILE_VAR_STOP(blp_pxr_cd);
+
+      BL_PROFILE_VAR_START(blp_accumulate);
+
+      FArrayBox const* local_jx_const_ptr = local_jx[thread_num].get();
+      FArrayBox* global_jx_ptr = cjx->fabPtr(pti);
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx,
+      {
+        global_jx_ptr->atomicAdd(*local_jx_const_ptr, thread_bx, thread_bx, 0, 0, 1);
+      });
+
+      FArrayBox const* local_jy_const_ptr = local_jy[thread_num].get();
+      FArrayBox* global_jy_ptr = cjy->fabPtr(pti);
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx,
+      {
+        global_jy_ptr->atomicAdd(*local_jy_const_ptr, thread_bx, thread_bx, 0, 0, 1);
+      });
+
+      FArrayBox const* local_jz_const_ptr = local_jz[thread_num].get();
+      FArrayBox* global_jz_ptr = cjz->fabPtr(pti);
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx,
+      {
+        global_jz_ptr->atomicAdd(*local_jz_const_ptr, thread_bx, thread_bx, 0, 0, 1);
+      });
+
+      BL_PROFILE_VAR_STOP(blp_accumulate);
+    }
+};
+
+
+void
+WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
+                                  MultiFab* rhomf, MultiFab* crhomf, int icomp,
+                                 const long np_current,
+                                 const long np, int thread_num, int lev )
+{
+
+  BL_PROFILE_VAR_NS("PICSAR::ChargeDeposition", blp_pxr_chd);
+  BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
+
+  const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev);
+  const long lvect = 8;
+
+  long ngRho = rhomf->nGrow();
+  Real* data_ptr;
+  Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector());
+
+  const std::array<Real,3>& dx = WarpX::CellSize(lev);
+  const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0));
+
+  // Deposit charge for particles that are not in the current buffers
+  if (np_current > 0)
+    {
+      const std::array<Real, 3>& xyzmin = xyzmin_tile;
+      tile_box.grow(ngRho);
+      local_rho[thread_num]->resize(tile_box);
+      FArrayBox* local_rho_ptr = local_rho[thread_num].get();
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, b,
+      {
+        local_rho_ptr->setVal(0.0, b, 0, 1);
+      });
+
+      data_ptr = local_rho[thread_num]->dataPtr();
+      auto rholen = local_rho[thread_num]->length();
+#if (AMREX_SPACEDIM == 3)
+      const long nx = rholen[0]-1-2*ngRho;
+      const long ny = rholen[1]-1-2*ngRho;
+      const long nz = rholen[2]-1-2*ngRho;
+#else
+      const long nx = rholen[0]-1-2*ngRho;
+      const long ny = 0;
+      const long nz = rholen[1]-1-2*ngRho;
+#endif
+      BL_PROFILE_VAR_START(blp_pxr_chd);
+      warpx_charge_deposition(data_ptr, &np_current,
+                              m_xp[thread_num].dataPtr(),
+                              m_yp[thread_num].dataPtr(),
+                              m_zp[thread_num].dataPtr(),
+                              wp.dataPtr(),
+                              &this->charge,
+                              &xyzmin[0], &xyzmin[1], &xyzmin[2],
+                              &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
+                              &ngRho, &ngRho, &ngRho,
+                              &WarpX::nox,&WarpX::noy,&WarpX::noz,
+                              &lvect, &WarpX::charge_deposition_algo);
+      BL_PROFILE_VAR_STOP(blp_pxr_chd);
+
+      const int ncomp = 1;
+      FArrayBox const* local_fab = local_rho[thread_num].get();
+      FArrayBox*       global_fab = rhomf->fabPtr(pti);
+      BL_PROFILE_VAR_START(blp_accumulate);
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx,
+      {
+        global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp);
+      });
+      BL_PROFILE_VAR_STOP(blp_accumulate);
+    }
+
+  // Deposit charge for particles that are in the current buffers
+  if (np_current < np)
+    {
+      const IntVect& ref_ratio = WarpX::RefRatio(lev-1);
+      const Box& ctilebox = amrex::coarsen(pti.tilebox(), ref_ratio);
+      const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1);
+
+      tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector());
+      tile_box.grow(ngRho);
+      local_rho[thread_num]->resize(tile_box);
+      FArrayBox* local_rho_ptr = local_rho[thread_num].get();
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, b,
+      {
+        local_rho_ptr->setVal(0.0, b, 0, 1);
+      });
+
+      data_ptr = local_rho[thread_num]->dataPtr();
+      auto rholen = local_rho[thread_num]->length();
+#if (AMREX_SPACEDIM == 3)
+      const long nx = rholen[0]-1-2*ngRho;
+      const long ny = rholen[1]-1-2*ngRho;
+      const long nz = rholen[2]-1-2*ngRho;
+#else
+      const long nx = rholen[0]-1-2*ngRho;
+      const long ny = 0;
+      const long nz = rholen[1]-1-2*ngRho;
+#endif
+
+      long ncrse = np - np_current;
+      BL_PROFILE_VAR_START(blp_pxr_chd);
+      warpx_charge_deposition(data_ptr, &ncrse,
+                              m_xp[thread_num].dataPtr() + np_current,
+                              m_yp[thread_num].dataPtr() + np_current,
+                              m_zp[thread_num].dataPtr() + np_current,
+                              wp.dataPtr() + np_current,
+                              &this->charge,
+                              &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
+                              &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz,
+                              &ngRho, &ngRho, &ngRho,
+                              &WarpX::nox,&WarpX::noy,&WarpX::noz,
+                              &lvect, &WarpX::charge_deposition_algo);
+      BL_PROFILE_VAR_STOP(blp_pxr_chd);
+
+      const int ncomp = 1;
+      FArrayBox const* local_fab = local_rho[thread_num].get();
+      FArrayBox*       global_fab = crhomf->fabPtr(pti);
+      BL_PROFILE_VAR_START(blp_accumulate);
+      AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx,
+      {
+        global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp);
+      });
+      BL_PROFILE_VAR_STOP(blp_accumulate);
+    }
+};
+
+void
+WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, bool local)
+{
+
+    int num_levels = rho.size();
+    int finest_level = num_levels - 1;
+
+    // each level deposits it's own particles
+    const int ng = rho[0]->nGrow();
+    for (int lev = 0; lev < num_levels; ++lev) {
+
+        rho[lev]->setVal(0.0, ng);
+
+        const auto& gm = m_gdb->Geom(lev);
+        const auto& ba = m_gdb->ParticleBoxArray(lev);
+        const auto& dm = m_gdb->DistributionMap(lev);
+
+        const Real* dx  = gm.CellSize();
+        const Real* plo = gm.ProbLo();
+        BoxArray nba = ba;
+        nba.surroundingNodes();
+
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) {
+            const Box& box = nba[pti];
+
+            auto& wp = pti.GetAttribs(PIdx::w);
+            const auto& particles = pti.GetArrayOfStructs();
+            int nstride = particles.dataShape().first;
+            const long np  = pti.numParticles();
+
+            FArrayBox& rhofab = (*rho[lev])[pti];
+
+            WRPX_DEPOSIT_CIC(particles.dataPtr(), nstride, np,
+                             wp.dataPtr(), &this->charge,
+                             rhofab.dataPtr(), box.loVect(), box.hiVect(),
+                             plo, dx, &ng);
+        }
+
+        if (!local) rho[lev]->SumBoundary(gm.periodicity());
+    }
+
+    // now we average down fine to crse
+    std::unique_ptr<MultiFab> crse;
+    for (int lev = finest_level - 1; lev >= 0; --lev) {
+        const BoxArray& fine_BA = rho[lev+1]->boxArray();
+        const DistributionMapping& fine_dm = rho[lev+1]->DistributionMap();
+        BoxArray coarsened_fine_BA = fine_BA;
+        coarsened_fine_BA.coarsen(m_gdb->refRatio(lev));
+
+        MultiFab coarsened_fine_data(coarsened_fine_BA, fine_dm, 1, 0);
+        coarsened_fine_data.setVal(0.0);
+
+        IntVect ratio(AMREX_D_DECL(2, 2, 2));  // FIXME
+
+        for (MFIter mfi(coarsened_fine_data); mfi.isValid(); ++mfi) {
+            const Box& bx = mfi.validbox();
+            const Box& crse_box = coarsened_fine_data[mfi].box();
+            const Box& fine_box = (*rho[lev+1])[mfi].box();
+            WRPX_SUM_FINE_TO_CRSE_NODAL(bx.loVect(), bx.hiVect(), ratio.getVect(),
+                                        coarsened_fine_data[mfi].dataPtr(), crse_box.loVect(), crse_box.hiVect(),
+                                        (*rho[lev+1])[mfi].dataPtr(), fine_box.loVect(), fine_box.hiVect());
+        }
+
+        rho[lev]->copy(coarsened_fine_data, m_gdb->Geom(lev).periodicity(), FabArrayBase::ADD);
+    }
+}
+
+std::unique_ptr<MultiFab>
+WarpXParticleContainer::GetChargeDensity (int lev, bool local)
+{
+    const auto& gm = m_gdb->Geom(lev);
+    const auto& ba = m_gdb->ParticleBoxArray(lev);
+    const auto& dm = m_gdb->DistributionMap(lev);
+    BoxArray nba = ba;
+    nba.surroundingNodes();
+
+    const std::array<Real,3>& dx = WarpX::CellSize(lev);
+
+    const int ng = WarpX::nox;
+
+    auto rho = std::unique_ptr<MultiFab>(new MultiFab(nba,dm,1,ng));
+    rho->setVal(0.0);
+
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+    {
+        Cuda::DeviceVector<Real> xp, yp, zp;
+        FArrayBox local_rho;
+
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+        {
+            const Box& box = pti.validbox();
+
+            auto& wp = pti.GetAttribs(PIdx::w);
+
+            const long np  = pti.numParticles();
+
+            pti.GetPosition(xp, yp, zp);
+
+            const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev);
+            const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
+
+            // Data on the grid
+            Real* data_ptr;
+            FArrayBox& rhofab = (*rho)[pti];
+#ifdef _OPENMP
+            Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector());
+            const std::array<Real, 3>& xyzmin = xyzmin_tile;
+            tile_box.grow(ng);
+            local_rho.resize(tile_box);
+            local_rho = 0.0;
+            data_ptr = local_rho.dataPtr();
+            auto rholen = local_rho.length();
+#else
+            const std::array<Real, 3>& xyzmin = xyzmin_grid;
+            data_ptr = rhofab.dataPtr();
+            auto rholen = rhofab.length();
+#endif
+
+#if (AMREX_SPACEDIM == 3)
+            const long nx = rholen[0]-1-2*ng;
+            const long ny = rholen[1]-1-2*ng;
+            const long nz = rholen[2]-1-2*ng;
+#else
+            const long nx = rholen[0]-1-2*ng;
+            const long ny = 0;
+            const long nz = rholen[1]-1-2*ng;
+#endif
+
+            long nxg = ng;
+            long nyg = ng;
+            long nzg = ng;
+            long lvect = 8;
+
+            warpx_charge_deposition(data_ptr,
+                                    &np,
+                                    xp.dataPtr(),
+                                    yp.dataPtr(),
+                                    zp.dataPtr(), wp.dataPtr(),
+                                    &this->charge, &xyzmin[0], &xyzmin[1], &xyzmin[2],
+                                    &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
+                                    &nxg, &nyg, &nzg, &WarpX::nox,&WarpX::noy,&WarpX::noz,
+                                    &lvect, &WarpX::charge_deposition_algo);
+
+#ifdef _OPENMP
+            rhofab.atomicAdd(local_rho);
+#endif
+        }
+
+    }
+
+    if (!local) rho->SumBoundary(gm.periodicity());
+
+    return rho;
+}
+
+Real WarpXParticleContainer::sumParticleCharge(bool local) {
+
+    amrex::Real total_charge = 0.0;
+
+    for (int lev = 0; lev < finestLevel(); ++lev)
+    {
+
+#ifdef _OPENMP
+#pragma omp parallel reduction(+:total_charge)
+#endif
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+        {
+            auto& wp = pti.GetAttribs(PIdx::w);
+            for (unsigned long i = 0; i < wp.size(); i++) {
+                total_charge += wp[i];
+            }
+        }
+    }
+
+    if (!local) ParallelDescriptor::ReduceRealSum(total_charge);
+    total_charge *= this->charge;
+    return total_charge;
+}
+
+std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) {
+
+    amrex::Real vx_total = 0.0;
+    amrex::Real vy_total = 0.0;
+    amrex::Real vz_total = 0.0;
+
+    long np_total = 0;
+
+    amrex::Real inv_clight_sq = 1.0/PhysConst::c/PhysConst::c;
+
+    for (int lev = 0; lev <= finestLevel(); ++lev) {
+
+#ifdef _OPENMP
+#pragma omp parallel reduction(+:vx_total, vy_total, vz_total, np_total)
+#endif
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+        {
+            auto& ux = pti.GetAttribs(PIdx::ux);
+            auto& uy = pti.GetAttribs(PIdx::uy);
+            auto& uz = pti.GetAttribs(PIdx::uz);
+
+            np_total += pti.numParticles();
+
+            for (unsigned long i = 0; i < ux.size(); i++) {
+                Real usq = (ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i])*inv_clight_sq;
+                Real gaminv = 1.0/std::sqrt(1.0 + usq);
+                vx_total += ux[i]*gaminv;
+                vy_total += uy[i]*gaminv;
+                vz_total += uz[i]*gaminv;
+            }
+        }
+    }
+
+    if (!local) {
+        ParallelDescriptor::ReduceRealSum(vx_total);
+        ParallelDescriptor::ReduceRealSum(vy_total);
+        ParallelDescriptor::ReduceRealSum(vz_total);
+        ParallelDescriptor::ReduceLongSum(np_total);
+    }
+
+    std::array<Real, 3> mean_v;
+    if (np_total > 0) {
+        mean_v[0] = vx_total / np_total;
+        mean_v[1] = vy_total / np_total;
+        mean_v[2] = vz_total / np_total;
+    }
+
+    return mean_v;
+}
+
+Real WarpXParticleContainer::maxParticleVelocity(bool local) {
+
+    amrex::Real max_v = 0.0;
+
+    for (int lev = 0; lev <= finestLevel(); ++lev)
+    {
+
+#ifdef _OPENMP
+#pragma omp parallel reduction(max:max_v)
+#endif
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+        {
+            auto& ux = pti.GetAttribs(PIdx::ux);
+            auto& uy = pti.GetAttribs(PIdx::uy);
+            auto& uz = pti.GetAttribs(PIdx::uz);
+            for (unsigned long i = 0; i < ux.size(); i++) {
+                max_v = std::max(max_v, sqrt(ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i]));
+            }
+        }
+    }
+
+    if (!local) ParallelDescriptor::ReduceRealMax(max_v);
+    return max_v;
+}
+
+void
+WarpXParticleContainer::PushXES (Real dt)
+{
+    BL_PROFILE("WPC::PushXES()");
+
+    int num_levels = finestLevel() + 1;
+
+    for (int lev = 0; lev < num_levels; ++lev) {
+        const auto& gm = m_gdb->Geom(lev);
+        const RealBox& prob_domain = gm.ProbDomain();
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) {
+            auto& particles = pti.GetArrayOfStructs();
+            int nstride = particles.dataShape().first;
+            const long np  = pti.numParticles();
+
+            auto& attribs = pti.GetAttribs();
+            auto& uxp = attribs[PIdx::ux];
+            auto& uyp = attribs[PIdx::uy];
+            auto& uzp = attribs[PIdx::uz];
+
+            WRPX_PUSH_LEAPFROG_POSITIONS(particles.dataPtr(), nstride, np,
+                                         uxp.dataPtr(), uyp.dataPtr(),
+#if AMREX_SPACEDIM == 3
+                                         uzp.dataPtr(),
+#endif
+                                         &dt,
+                                         prob_domain.lo(), prob_domain.hi());
+        }
+    }
+}
+
+void
+WarpXParticleContainer::PushX (Real dt)
+{
+    for (int lev = 0; lev <= finestLevel(); ++lev) {
+        PushX(lev, dt);
+    }
+}
+
+void
+WarpXParticleContainer::PushX (int lev, Real dt)
+{
+    BL_PROFILE("WPC::PushX()");
+    BL_PROFILE_VAR_NS("WPC::PushX::Copy", blp_copy);
+    BL_PROFILE_VAR_NS("WPC:PushX::Push", blp_pxr_pp);
+
+    if (do_not_push) return;
+
+    MultiFab* cost = WarpX::getCosts(lev);
+
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+    {
+        Cuda::DeviceVector<Real> xp, yp, zp, giv;
+
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+        {
+            Real wt = amrex::second();
+
+            auto& attribs = pti.GetAttribs();
+
+            auto& uxp = attribs[PIdx::ux];
+            auto& uyp = attribs[PIdx::uy];
+            auto& uzp = attribs[PIdx::uz];
+
+            const long np = pti.numParticles();
+
+            giv.resize(np);
+
+            //
+            // copy data from particle container to temp arrays
+            //
+            BL_PROFILE_VAR_START(blp_copy);
+            pti.GetPosition(xp, yp, zp);
+            BL_PROFILE_VAR_STOP(blp_copy);
+
+            //
+            // Particle Push
+            //
+            BL_PROFILE_VAR_START(blp_pxr_pp);
+            warpx_particle_pusher_positions(&np,
+                                            xp.dataPtr(),
+                                            yp.dataPtr(),
+                                            zp.dataPtr(),
+                                            uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+                                            giv.dataPtr(), &dt);
+            BL_PROFILE_VAR_STOP(blp_pxr_pp);
+
+            //
+            // copy particle data back
+            //
+            BL_PROFILE_VAR_START(blp_copy);
+            pti.SetPosition(xp, yp, zp);
+            BL_PROFILE_VAR_STOP(blp_copy);
+
+            if (cost) {
+                const Box& tbx = pti.tilebox();
+                wt = (amrex::second() - wt) / tbx.d_numPts();
+                FArrayBox* costfab = cost->fabPtr(pti);
+                AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box,
+                {
+                    costfab->plus(wt, work_box);
+                });
+            }
+        }
+    }
+}
+
+// This function is called in Redistribute, just after locate
+void 
+WarpXParticleContainer::particlePostLocate(ParticleType& p, 
+                                           const ParticleLocData& pld,
+                                           const int lev)
+{
+    // Tag particle if goes to higher level.
+    // It will be split later in the loop
+    if (pld.m_lev == lev+1 
+        and p.m_idata.id != NoSplitParticleID 
+        and p.m_idata.id >= 0)
+    {
+        p.m_idata.id = DoSplitParticleID;
+    }
+    // For the moment, do not do anything if particles goes
+    // to lower level.
+    if (pld.m_lev == lev-1){
+    }
+}
diff --git a/Source/Particles/deposit_cic.F90 b/Source/Particles/deposit_cic.F90
new file mode 100644
index 000000000..e4e1ace03
--- /dev/null
+++ b/Source/Particles/deposit_cic.F90
@@ -0,0 +1,134 @@
+module warpx_ES_deposit_cic
+
+  use iso_c_binding
+  use amrex_fort_module, only : amrex_real
+
+  implicit none
+
+contains
+
+! This routine computes the charge density due to the particles using cloud-in-cell 
+! deposition. The Fab rho is assumed to be node-centered.
+!
+! Arguments:
+!     particles : a pointer to the particle array-of-structs 
+!     ns        : the stride length of particle struct (the size of the struct in number of reals)
+!     np        : the number of particles
+!     weights   : the particle weights
+!     charge    : the charge of this particle species
+!     rho       : a Fab that will contain the charge density on exit
+!     lo        : a pointer to the lo corner of this valid box for rho, in index space
+!     hi        : a pointer to the hi corner of this valid box for rho, in index space
+!     plo       : the real position of the left-hand corner of the problem domain
+!     dx        : the mesh spacing
+!     ng        : the number of ghost cells in rho
+!
+  subroutine warpx_deposit_cic_3d(particles, ns, np,                     &
+                                  weights, charge, rho, lo, hi, plo, dx, &
+                                  ng)                                    &
+       bind(c,name='warpx_deposit_cic_3d')
+    integer, value,   intent(in)     :: ns, np
+    real(amrex_real), intent(in)     :: particles(ns,np)
+    real(amrex_real), intent(in)     :: weights(np)
+    real(amrex_real), intent(in)     :: charge
+    integer,          intent(in)     :: lo(3)
+    integer,          intent(in)     :: hi(3)
+    integer,          intent(in)     :: ng
+    real(amrex_real), intent(inout)  :: rho(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng)
+    real(amrex_real), intent(in)     :: plo(3)
+    real(amrex_real), intent(in)     :: dx(3)
+
+    integer i, j, k, n
+    real(amrex_real) wx_lo, wy_lo, wz_lo, wx_hi, wy_hi, wz_hi
+    real(amrex_real) lx, ly, lz
+    real(amrex_real) inv_dx(3)
+    real(amrex_real) qp, inv_vol
+
+    inv_dx = 1.0d0/dx
+
+    inv_vol = inv_dx(1) * inv_dx(2) * inv_dx(3)
+
+    do n = 1, np
+
+       qp = weights(n) * charge * inv_vol
+
+       lx = (particles(1, n) - plo(1))*inv_dx(1)
+       ly = (particles(2, n) - plo(2))*inv_dx(2)
+       lz = (particles(3, n) - plo(3))*inv_dx(3)
+
+       i = floor(lx)
+       j = floor(ly)
+       k = floor(lz)
+
+       wx_hi = lx - i
+       wy_hi = ly - j
+       wz_hi = lz - k
+
+       wx_lo = 1.0d0 - wx_hi
+       wy_lo = 1.0d0 - wy_hi
+       wz_lo = 1.0d0 - wz_hi
+
+       rho(i,   j,   k  ) = rho(i,   j,   k  ) + wx_lo*wy_lo*wz_lo*qp
+       rho(i,   j,   k+1) = rho(i,   j,   k+1) + wx_lo*wy_lo*wz_hi*qp
+       rho(i,   j+1, k  ) = rho(i,   j+1, k  ) + wx_lo*wy_hi*wz_lo*qp
+       rho(i,   j+1, k+1) = rho(i,   j+1, k+1) + wx_lo*wy_hi*wz_hi*qp
+       rho(i+1, j,   k  ) = rho(i+1, j,   k  ) + wx_hi*wy_lo*wz_lo*qp
+       rho(i+1, j,   k+1) = rho(i+1, j,   k+1) + wx_hi*wy_lo*wz_hi*qp
+       rho(i+1, j+1, k  ) = rho(i+1, j+1, k  ) + wx_hi*wy_hi*wz_lo*qp
+       rho(i+1, j+1, k+1) = rho(i+1, j+1, k+1) + wx_hi*wy_hi*wz_hi*qp
+
+    end do
+
+  end subroutine warpx_deposit_cic_3d
+
+  subroutine warpx_deposit_cic_2d(particles, ns, np,                     &
+                                  weights, charge, rho, lo, hi, plo, dx, &
+                                  ng)                                    &
+       bind(c,name='warpx_deposit_cic_2d')
+    integer, value,   intent(in)     :: ns, np
+    real(amrex_real), intent(in)     :: particles(ns,np)
+    real(amrex_real), intent(in)     :: weights(np)
+    real(amrex_real), intent(in)     :: charge
+    integer,          intent(in)     :: lo(2)
+    integer,          intent(in)     :: hi(2)
+    integer,          intent(in)     :: ng
+    real(amrex_real), intent(inout)  :: rho(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng)
+    real(amrex_real), intent(in)     :: plo(2)
+    real(amrex_real), intent(in)     :: dx(2)
+
+    integer i, j, n
+    real(amrex_real) wx_lo, wy_lo, wx_hi, wy_hi
+    real(amrex_real) lx, ly
+    real(amrex_real) inv_dx(2)
+    real(amrex_real) qp, inv_vol
+
+    inv_dx = 1.0d0/dx
+
+    inv_vol = inv_dx(1) * inv_dx(2)
+
+    do n = 1, np
+
+       qp = weights(n) * charge * inv_vol
+
+       lx = (particles(1, n) - plo(1))*inv_dx(1)
+       ly = (particles(2, n) - plo(2))*inv_dx(2)
+
+       i = floor(lx)
+       j = floor(ly)
+
+       wx_hi = lx - i
+       wy_hi = ly - j
+
+       wx_lo = 1.0d0 - wx_hi
+       wy_lo = 1.0d0 - wy_hi
+
+       rho(i,   j  ) = rho(i,   j  ) + wx_lo*wy_lo*qp
+       rho(i,   j+1) = rho(i,   j+1) + wx_lo*wy_hi*qp
+       rho(i+1, j  ) = rho(i+1, j  ) + wx_hi*wy_lo*qp
+       rho(i+1, j+1) = rho(i+1, j+1) + wx_hi*wy_hi*qp
+
+    end do
+
+  end subroutine warpx_deposit_cic_2d
+
+end module warpx_ES_deposit_cic
diff --git a/Source/Particles/interpolate_cic.F90 b/Source/Particles/interpolate_cic.F90
new file mode 100644
index 000000000..bc2c4f37e
--- /dev/null
+++ b/Source/Particles/interpolate_cic.F90
@@ -0,0 +1,371 @@
+module warpx_ES_interpolate_cic
+
+  use iso_c_binding
+  use amrex_fort_module, only : amrex_real
+
+  implicit none
+
+contains
+
+! This routine interpolates the electric field to the particle positions
+! using cloud-in-cell interpolation. The electric fields are assumed to be
+! node-centered.
+!
+! Arguments:
+!     particles : a pointer to the particle array-of-structs 
+!     ns        : the stride length of particle struct (the size of the struct in number of reals)
+!     np        : the number of particles
+!     Ex_p      : the electric field in the x-direction at the particle positions (output)
+!     Ey_p      : the electric field in the y-direction at the particle positions (output)
+!     Ez_p      : the electric field in the z-direction at the particle positions (output)
+!     Ex, Ey, Ez: Fabs conting the electric field on the mesh
+!     lo        : a pointer to the lo corner of this valid box, in index space
+!     hi        : a pointer to the hi corner of this valid box, in index space
+!     plo       : the real position of the left-hand corner of the problem domain
+!     dx        : the mesh spacing
+!     ng        : the number of ghost cells for the E-field
+!
+  subroutine warpx_interpolate_cic_3d(particles, ns, np,      &
+                                      Ex_p, Ey_p, Ez_p,       &
+                                      Ex,   Ey,   Ez,         &
+                                      lo, hi, plo, dx, ng)    &
+       bind(c,name='warpx_interpolate_cic_3d')
+    integer, value,   intent(in)     :: ns, np
+    real(amrex_real), intent(in)     :: particles(ns,np)
+    real(amrex_real), intent(inout)  :: Ex_p(np), Ey_p(np), Ez_p(np)
+    integer,          intent(in)     :: ng
+    integer,          intent(in)     :: lo(3)
+    integer,          intent(in)     :: hi(3)
+    real(amrex_real), intent(in)     :: Ex(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng)
+    real(amrex_real), intent(in)     :: Ey(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng)
+    real(amrex_real), intent(in)     :: Ez(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng)
+    real(amrex_real), intent(in)     :: plo(3)
+    real(amrex_real), intent(in)     :: dx(3)
+
+    integer i, j, k, n
+    real(amrex_real) wx_lo, wy_lo, wz_lo, wx_hi, wy_hi, wz_hi
+    real(amrex_real) lx, ly, lz
+    real(amrex_real) inv_dx(3)
+    inv_dx = 1.0d0/dx
+
+    do n = 1, np
+       lx = (particles(1, n) - plo(1))*inv_dx(1)
+       ly = (particles(2, n) - plo(2))*inv_dx(2)
+       lz = (particles(3, n) - plo(3))*inv_dx(3)
+
+       i = floor(lx)
+       j = floor(ly)
+       k = floor(lz)
+
+       wx_hi = lx - i
+       wy_hi = ly - j
+       wz_hi = lz - k
+
+       wx_lo = 1.0d0 - wx_hi
+       wy_lo = 1.0d0 - wy_hi
+       wz_lo = 1.0d0 - wz_hi
+
+       Ex_p(n) = wx_lo*wy_lo*wz_lo*Ex(i,   j,   k  ) + &
+                 wx_lo*wy_lo*wz_hi*Ex(i,   j,   k+1) + &
+                 wx_lo*wy_hi*wz_lo*Ex(i,   j+1, k  ) + &
+                 wx_lo*wy_hi*wz_hi*Ex(i,   j+1, k+1) + &
+                 wx_hi*wy_lo*wz_lo*Ex(i+1, j,   k  ) + &
+                 wx_hi*wy_lo*wz_hi*Ex(i+1, j,   k+1) + &
+                 wx_hi*wy_hi*wz_lo*Ex(i+1, j+1, k  ) + &
+                 wx_hi*wy_hi*wz_hi*Ex(i+1, j+1, k+1)
+
+       Ey_p(n) = wx_lo*wy_lo*wz_lo*Ey(i,   j,   k  ) + &
+                 wx_lo*wy_lo*wz_hi*Ey(i,   j,   k+1) + &
+                 wx_lo*wy_hi*wz_lo*Ey(i,   j+1, k  ) + &
+                 wx_lo*wy_hi*wz_hi*Ey(i,   j+1, k+1) + &
+                 wx_hi*wy_lo*wz_lo*Ey(i+1, j,   k  ) + &
+                 wx_hi*wy_lo*wz_hi*Ey(i+1, j,   k+1) + &
+                 wx_hi*wy_hi*wz_lo*Ey(i+1, j+1, k  ) + &
+                 wx_hi*wy_hi*wz_hi*Ey(i+1, j+1, k+1)
+
+       Ez_p(n) = wx_lo*wy_lo*wz_lo*Ez(i,   j,   k  ) + &
+                 wx_lo*wy_lo*wz_hi*Ez(i,   j,   k+1) + &
+                 wx_lo*wy_hi*wz_lo*Ez(i,   j+1, k  ) + &
+                 wx_lo*wy_hi*wz_hi*Ez(i,   j+1, k+1) + &
+                 wx_hi*wy_lo*wz_lo*Ez(i+1, j,   k  ) + &
+                 wx_hi*wy_lo*wz_hi*Ez(i+1, j,   k+1) + &
+                 wx_hi*wy_hi*wz_lo*Ez(i+1, j+1, k  ) + &
+                 wx_hi*wy_hi*wz_hi*Ez(i+1, j+1, k+1)
+       
+    end do
+
+  end subroutine warpx_interpolate_cic_3d
+
+
+  subroutine warpx_interpolate_cic_2d(particles, ns, np,      &
+                             Ex_p, Ey_p,             &
+                             Ex,   Ey,               &
+                             lo, hi, plo, dx, ng)    &
+       bind(c,name='warpx_interpolate_cic_2d')
+    integer, value,   intent(in)     :: ns, np
+    real(amrex_real), intent(in)     :: particles(ns,np)
+    real(amrex_real), intent(inout)  :: Ex_p(np), Ey_p(np)
+    integer,          intent(in)     :: ng
+    integer,          intent(in)     :: lo(2)
+    integer,          intent(in)     :: hi(2)
+    real(amrex_real), intent(in)     :: Ex(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng)
+    real(amrex_real), intent(in)     :: Ey(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng)
+    real(amrex_real), intent(in)     :: plo(2)
+    real(amrex_real), intent(in)     :: dx(2)
+
+    integer i, j, n
+    real(amrex_real) wx_lo, wy_lo, wx_hi, wy_hi
+    real(amrex_real) lx, ly
+    real(amrex_real) inv_dx(2)
+    inv_dx = 1.0d0/dx
+
+    do n = 1, np
+       lx = (particles(1, n) - plo(1))*inv_dx(1)
+       ly = (particles(2, n) - plo(2))*inv_dx(2)
+
+       i = floor(lx)
+       j = floor(ly)
+
+       wx_hi = lx - i
+       wy_hi = ly - j
+
+       wx_lo = 1.0d0 - wx_hi
+       wy_lo = 1.0d0 - wy_hi
+
+       Ex_p(n) = wx_lo*wy_lo*Ex(i,   j  ) + &
+                 wx_lo*wy_hi*Ex(i,   j+1) + &
+                 wx_hi*wy_lo*Ex(i+1, j  ) + &
+                 wx_hi*wy_hi*Ex(i+1, j+1)
+
+       Ey_p(n) = wx_lo*wy_lo*Ey(i,   j  ) + &
+                 wx_lo*wy_hi*Ey(i,   j+1) + &
+                 wx_hi*wy_lo*Ey(i+1, j  ) + &
+                 wx_hi*wy_hi*Ey(i+1, j+1)
+
+    end do
+
+  end subroutine warpx_interpolate_cic_2d
+
+
+  subroutine warpx_interpolate_cic_two_levels_3d(particles, ns, np,      &
+                                                 Ex_p, Ey_p, Ez_p,       &
+                                                 Ex,   Ey,   Ez,         &
+                                                 lo,   hi,   dx,         &
+                                                 cEx,  cEy,  cEz,        &
+                                                 mask,                   &
+                                                 clo,  chi,  cdx,        &
+                                                 plo,  ng,   lev)        &
+       bind(c,name='warpx_interpolate_cic_two_levels_3d')
+    integer, value,   intent(in)     :: ns, np
+    real(amrex_real), intent(in)     :: particles(ns,np)
+    real(amrex_real), intent(inout)  :: Ex_p(np), Ey_p(np), Ez_p(np)
+    integer,          intent(in)     :: ng, lev
+    integer,          intent(in)     :: lo(3), hi(3)
+    integer,          intent(in)     :: clo(3), chi(3)
+    real(amrex_real), intent(in)     :: Ex(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng)
+    real(amrex_real), intent(in)     :: Ey(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng)
+    real(amrex_real), intent(in)     :: Ez(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng)
+    real(amrex_real), intent(in)     :: cEx(clo(1)-ng:chi(1)+ng, clo(2)-ng:chi(2)+ng, clo(3)-ng:chi(3)+ng)
+    real(amrex_real), intent(in)     :: cEy(clo(1)-ng:chi(1)+ng, clo(2)-ng:chi(2)+ng, clo(3)-ng:chi(3)+ng)
+    real(amrex_real), intent(in)     :: cEz(clo(1)-ng:chi(1)+ng, clo(2)-ng:chi(2)+ng, clo(3)-ng:chi(3)+ng)
+    integer(c_int),   intent(in)     :: mask (lo(1):hi(1),lo(2):hi(2),lo(3):hi(3))
+    real(amrex_real), intent(in)     :: plo(3)
+    real(amrex_real), intent(in)     :: dx(3), cdx(3)
+
+    integer i, j, k, n
+    real(amrex_real) wx_lo, wy_lo, wz_lo, wx_hi, wy_hi, wz_hi
+    real(amrex_real) lx, ly, lz
+    real(amrex_real) inv_dx(3), inv_cdx(3)
+    inv_dx  = 1.0d0/dx
+    inv_cdx = 1.0d0/cdx
+
+    do n = 1, np
+
+       lx = (particles(1, n) - plo(1))*inv_dx(1)
+       ly = (particles(2, n) - plo(2))*inv_dx(2)
+       lz = (particles(3, n) - plo(3))*inv_dx(3)
+       
+       i = floor(lx)
+       j = floor(ly)
+       k = floor(lz)
+
+! use the coarse E if near the level boundary
+       if (lev .eq. 1 .and. mask(i,j,k) .eq. 1) then
+
+          lx = (particles(1, n) - plo(1))*inv_cdx(1)
+          ly = (particles(2, n) - plo(2))*inv_cdx(2)
+          lz = (particles(3, n) - plo(3))*inv_cdx(3)
+
+          i = floor(lx)
+          j = floor(ly)
+          k = floor(lz)
+
+          wx_hi = lx - i
+          wy_hi = ly - j
+          wz_hi = lz - k
+          
+          wx_lo = 1.0d0 - wx_hi
+          wy_lo = 1.0d0 - wy_hi
+          wz_lo = 1.0d0 - wz_hi
+          
+          Ex_p(n) = wx_lo*wy_lo*wz_lo*cEx(i,   j,   k  ) + &
+                    wx_lo*wy_lo*wz_hi*cEx(i,   j,   k+1) + &
+                    wx_lo*wy_hi*wz_lo*cEx(i,   j+1, k  ) + &
+                    wx_lo*wy_hi*wz_hi*cEx(i,   j+1, k+1) + &
+                    wx_hi*wy_lo*wz_lo*cEx(i+1, j,   k  ) + &
+                    wx_hi*wy_lo*wz_hi*cEx(i+1, j,   k+1) + &
+                    wx_hi*wy_hi*wz_lo*cEx(i+1, j+1, k  ) + &
+                    wx_hi*wy_hi*wz_hi*cEx(i+1, j+1, k+1)
+
+          Ey_p(n) = wx_lo*wy_lo*wz_lo*cEy(i,   j,   k  ) + &
+                    wx_lo*wy_lo*wz_hi*cEy(i,   j,   k+1) + &
+                    wx_lo*wy_hi*wz_lo*cEy(i,   j+1, k  ) + &
+                    wx_lo*wy_hi*wz_hi*cEy(i,   j+1, k+1) + &
+                    wx_hi*wy_lo*wz_lo*cEy(i+1, j,   k  ) + &
+                    wx_hi*wy_lo*wz_hi*cEy(i+1, j,   k+1) + &
+                    wx_hi*wy_hi*wz_lo*cEy(i+1, j+1, k  ) + &
+                    wx_hi*wy_hi*wz_hi*cEy(i+1, j+1, k+1)
+          
+          Ez_p(n) = wx_lo*wy_lo*wz_lo*cEz(i,   j,   k  ) + &
+                    wx_lo*wy_lo*wz_hi*cEz(i,   j,   k+1) + &
+                    wx_lo*wy_hi*wz_lo*cEz(i,   j+1, k  ) + &
+                    wx_lo*wy_hi*wz_hi*cEz(i,   j+1, k+1) + &
+                    wx_hi*wy_lo*wz_lo*cEz(i+1, j,   k  ) + &
+                    wx_hi*wy_lo*wz_hi*cEz(i+1, j,   k+1) + &
+                    wx_hi*wy_hi*wz_lo*cEz(i+1, j+1, k  ) + &
+                    wx_hi*wy_hi*wz_hi*cEz(i+1, j+1, k+1)
+          
+! otherwise use the fine
+       else
+
+          wx_hi = lx - i
+          wy_hi = ly - j
+          wz_hi = lz - k
+          
+          wx_lo = 1.0d0 - wx_hi
+          wy_lo = 1.0d0 - wy_hi
+          wz_lo = 1.0d0 - wz_hi
+
+          Ex_p(n) = wx_lo*wy_lo*wz_lo*Ex(i,   j,   k  ) + &
+               wx_lo*wy_lo*wz_hi*Ex(i,   j,   k+1) + &
+               wx_lo*wy_hi*wz_lo*Ex(i,   j+1, k  ) + &
+               wx_lo*wy_hi*wz_hi*Ex(i,   j+1, k+1) + &
+               wx_hi*wy_lo*wz_lo*Ex(i+1, j,   k  ) + &
+               wx_hi*wy_lo*wz_hi*Ex(i+1, j,   k+1) + &
+               wx_hi*wy_hi*wz_lo*Ex(i+1, j+1, k  ) + &
+               wx_hi*wy_hi*wz_hi*Ex(i+1, j+1, k+1)
+          
+          Ey_p(n) = wx_lo*wy_lo*wz_lo*Ey(i,   j,   k  ) + &
+                    wx_lo*wy_lo*wz_hi*Ey(i,   j,   k+1) + &
+                    wx_lo*wy_hi*wz_lo*Ey(i,   j+1, k  ) + &
+                    wx_lo*wy_hi*wz_hi*Ey(i,   j+1, k+1) + &
+                    wx_hi*wy_lo*wz_lo*Ey(i+1, j,   k  ) + &
+                    wx_hi*wy_lo*wz_hi*Ey(i+1, j,   k+1) + &
+                    wx_hi*wy_hi*wz_lo*Ey(i+1, j+1, k  ) + &
+                    wx_hi*wy_hi*wz_hi*Ey(i+1, j+1, k+1)
+          
+          Ez_p(n) = wx_lo*wy_lo*wz_lo*Ez(i,   j,   k  ) + &
+                    wx_lo*wy_lo*wz_hi*Ez(i,   j,   k+1) + &
+                    wx_lo*wy_hi*wz_lo*Ez(i,   j+1, k  ) + &
+                    wx_lo*wy_hi*wz_hi*Ez(i,   j+1, k+1) + &
+                    wx_hi*wy_lo*wz_lo*Ez(i+1, j,   k  ) + &
+                    wx_hi*wy_lo*wz_hi*Ez(i+1, j,   k+1) + &
+                    wx_hi*wy_hi*wz_lo*Ez(i+1, j+1, k  ) + &
+                    wx_hi*wy_hi*wz_hi*Ez(i+1, j+1, k+1)
+       
+       end if
+
+    end do
+
+  end subroutine warpx_interpolate_cic_two_levels_3d
+
+
+  subroutine warpx_interpolate_cic_two_levels_2d(particles, ns, np,      &
+                                                 Ex_p, Ey_p,             &
+                                                 Ex,   Ey,               &
+                                                 lo,   hi,   dx,         &
+                                                 cEx,  cEy,              &
+                                                 mask,                   &
+                                                 clo,  chi,  cdx,        &
+                                                 plo,  ng,   lev)        &
+       bind(c,name='warpx_interpolate_cic_two_levels_2d')
+    integer, value,   intent(in)     :: ns, np
+    real(amrex_real), intent(in)     :: particles(ns,np)
+    real(amrex_real), intent(inout)  :: Ex_p(np), Ey_p(np)
+    integer,          intent(in)     :: ng, lev
+    integer,          intent(in)     :: lo(2), hi(2)
+    integer,          intent(in)     :: clo(2), chi(2)
+    real(amrex_real), intent(in)     :: Ex(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng)
+    real(amrex_real), intent(in)     :: Ey(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng)
+    real(amrex_real), intent(in)     :: cEx(clo(1)-ng:chi(1)+ng, clo(2)-ng:chi(2)+ng)
+    real(amrex_real), intent(in)     :: cEy(clo(1)-ng:chi(1)+ng, clo(2)-ng:chi(2)+ng)
+    integer(c_int),   intent(in)     :: mask (lo(1):hi(1),lo(2):hi(2))
+    real(amrex_real), intent(in)     :: plo(2)
+    real(amrex_real), intent(in)     :: dx(2), cdx(2)
+
+    integer i, j, n
+    real(amrex_real) wx_lo, wy_lo, wx_hi, wy_hi
+    real(amrex_real) lx, ly
+    real(amrex_real) inv_dx(2), inv_cdx(2)
+    inv_dx  = 1.0d0/dx
+    inv_cdx = 1.0d0/cdx
+
+    do n = 1, np
+
+       lx = (particles(1, n) - plo(1))*inv_dx(1)
+       ly = (particles(2, n) - plo(2))*inv_dx(2)
+       
+       i = floor(lx)
+       j = floor(ly)
+
+! use the coarse E if near the level boundary
+       if (lev .eq. 1 .and. mask(i,j) .eq. 1) then
+
+          lx = (particles(1, n) - plo(1))*inv_cdx(1)
+          ly = (particles(2, n) - plo(2))*inv_cdx(2)
+
+          i = floor(lx)
+          j = floor(ly)
+
+          wx_hi = lx - i
+          wy_hi = ly - j
+          
+          wx_lo = 1.0d0 - wx_hi
+          wy_lo = 1.0d0 - wy_hi
+          
+          Ex_p(n) = wx_lo*wy_lo*cEx(i,   j  ) + &
+                    wx_lo*wy_hi*cEx(i,   j+1) + &
+                    wx_hi*wy_lo*cEx(i+1, j  ) + &
+                    wx_hi*wy_hi*cEx(i+1, j+1)
+
+          Ey_p(n) = wx_lo*wy_lo*cEy(i,   j  ) + &
+                    wx_lo*wy_hi*cEy(i,   j+1) + &
+                    wx_hi*wy_lo*cEy(i+1, j  ) + &
+                    wx_hi*wy_hi*cEy(i+1, j+1)
+          
+! otherwise use the fine
+       else
+
+          wx_hi = lx - i
+          wy_hi = ly - j
+          
+          wx_lo = 1.0d0 - wx_hi
+          wy_lo = 1.0d0 - wy_hi
+
+          Ex_p(n) = wx_lo*wy_lo*Ex(i,   j  ) + &
+                    wx_lo*wy_hi*Ex(i,   j+1) + &
+                    wx_hi*wy_lo*Ex(i+1, j  ) + &
+                    wx_hi*wy_hi*Ex(i+1, j+1)
+          
+          Ey_p(n) = wx_lo*wy_lo*Ey(i,   j  ) + &
+                    wx_lo*wy_hi*Ey(i,   j+1) + &
+                    wx_hi*wy_lo*Ey(i+1, j  ) + &
+                    wx_hi*wy_hi*Ey(i+1, j+1)
+
+       end if
+
+    end do
+
+  end subroutine warpx_interpolate_cic_two_levels_2d
+
+end module warpx_ES_interpolate_cic
diff --git a/Source/Particles/push_particles_ES.F90 b/Source/Particles/push_particles_ES.F90
new file mode 100644
index 000000000..9c7bd9e92
--- /dev/null
+++ b/Source/Particles/push_particles_ES.F90
@@ -0,0 +1,258 @@
+module warpx_ES_push_particles
+
+  use iso_c_binding
+  use amrex_fort_module, only : amrex_real
+
+  implicit none
+
+contains
+
+!
+! This routine updates the particle positions and velocities using the 
+! leapfrog time integration algorithm, given the electric fields at the
+! particle positions. It also enforces specular reflection off the domain
+! walls.
+!
+! Arguments:
+!     particles : a pointer to the particle array-of-structs 
+!     ns        : the stride length of particle struct (the size of the struct in number of reals)
+!     np        : the number of particles
+!     vx_p      : the particle x-velocities
+!     vy_p      : the particle y-velocities
+!     vz_p      : the particle z-velocities
+!     Ex_p      : the electric field in the x-direction at the particle positions
+!     Ey_p      : the electric field in the y-direction at the particle positions
+!     Ez_p      : the electric field in the z-direction at the particle positions
+!     charge    : the charge of this particle species
+!     mass      : the mass of this particle species
+!     dt        : the time step
+!     prob_lo   : the left-hand corner of the problem domain
+!     prob_hi   : the right-hand corner of the problem domain
+!
+  subroutine warpx_push_leapfrog_3d(particles, ns, np,      &
+                                    vx_p, vy_p, vz_p,       &                                 
+                                    Ex_p, Ey_p, Ez_p,       &
+                                    charge, mass, dt,       &
+                                    prob_lo, prob_hi)       &
+       bind(c,name='warpx_push_leapfrog_3d')
+    integer, value,   intent(in)     :: ns, np
+    real(amrex_real), intent(inout)  :: particles(ns,np)
+    real(amrex_real), intent(inout)  :: vx_p(np), vy_p(np), vz_p(np)
+    real(amrex_real), intent(in)     :: Ex_p(np), Ey_p(np), Ez_p(np)
+    real(amrex_real), intent(in)     :: charge
+    real(amrex_real), intent(in)     :: mass
+    real(amrex_real), intent(in)     :: dt
+    real(amrex_real), intent(in)     :: prob_lo(3), prob_hi(3)
+   
+    integer n
+    real(amrex_real) fac
+
+    fac = charge * dt / mass
+
+    do n = 1, np
+
+       vx_p(n) = vx_p(n) + fac * Ex_p(n)
+       vy_p(n) = vy_p(n) + fac * Ey_p(n)
+       vz_p(n) = vz_p(n) + fac * Ez_p(n)
+
+       particles(1, n) = particles(1, n) + dt * vx_p(n)
+       particles(2, n) = particles(2, n) + dt * vy_p(n)
+       particles(3, n) = particles(3, n) + dt * vz_p(n)
+
+!      bounce off the walls in the x...
+       do while (particles(1, n) .lt. prob_lo(1) .or. particles(1, n) .gt. prob_hi(1))
+          if (particles(1, n) .lt. prob_lo(1)) then
+             particles(1, n) = 2.d0*prob_lo(1) - particles(1, n)
+          else
+             particles(1, n) = 2.d0*prob_hi(1) - particles(1, n)
+          end if
+          vx_p(n) = -vx_p(n)
+       end do
+
+!      ... y... 
+       do while (particles(2, n) .lt. prob_lo(2) .or. particles(2, n) .gt. prob_hi(2))
+          if (particles(2, n) .lt. prob_lo(2)) then
+             particles(2, n) = 2.d0*prob_lo(2) - particles(2, n)
+          else
+             particles(2, n) = 2.d0*prob_hi(2) - particles(2, n)
+          end if
+          vy_p(n) = -vy_p(n)
+       end do
+
+!      ... and z directions
+       do while (particles(3, n) .lt. prob_lo(3) .or. particles(3, n) .gt. prob_hi(3))
+          if (particles(3, n) .lt. prob_lo(3)) then
+             particles(3, n) = 2.d0*prob_lo(3) - particles(3, n)
+          else
+             particles(3, n) = 2.d0*prob_hi(3) - particles(3, n)
+          end if
+          vz_p(n) = -vz_p(n)
+       end do
+
+    end do
+
+  end subroutine warpx_push_leapfrog_3d
+
+  subroutine warpx_push_leapfrog_2d(particles, ns, np,      &
+                                    vx_p, vy_p,             &
+                                    Ex_p, Ey_p,             &
+                                    charge, mass, dt,       &
+                                    prob_lo, prob_hi)       &
+       bind(c,name='warpx_push_leapfrog_2d')
+    integer, value,   intent(in)     :: ns, np
+    real(amrex_real), intent(inout)  :: particles(ns,np)
+    real(amrex_real), intent(inout)  :: vx_p(np), vy_p(np)
+    real(amrex_real), intent(in)     :: Ex_p(np), Ey_p(np)
+    real(amrex_real), intent(in)     :: charge
+    real(amrex_real), intent(in)     :: mass
+    real(amrex_real), intent(in)     :: dt
+    real(amrex_real), intent(in)     :: prob_lo(2), prob_hi(2)
+   
+    integer n
+    real(amrex_real) fac
+
+    fac = charge * dt / mass
+
+    do n = 1, np
+
+       vx_p(n) = vx_p(n) + fac * Ex_p(n)
+       vy_p(n) = vy_p(n) + fac * Ey_p(n)
+
+       particles(1, n) = particles(1, n) + dt * vx_p(n)
+       particles(2, n) = particles(2, n) + dt * vy_p(n)
+
+!      bounce off the walls in the x...
+       do while (particles(1, n) .lt. prob_lo(1) .or. particles(1, n) .gt. prob_hi(1))
+          if (particles(1, n) .lt. prob_lo(1)) then
+             particles(1, n) = 2.d0*prob_lo(1) - particles(1, n)
+          else
+             particles(1, n) = 2.d0*prob_hi(1) - particles(1, n)
+          end if
+          vx_p(n) = -vx_p(n)
+       end do
+
+!      ... y... 
+       do while (particles(2, n) .lt. prob_lo(2) .or. particles(2, n) .gt. prob_hi(2))
+          if (particles(2, n) .lt. prob_lo(2)) then
+             particles(2, n) = 2.d0*prob_lo(2) - particles(2, n)
+          else
+             particles(2, n) = 2.d0*prob_hi(2) - particles(2, n)
+          end if
+          vy_p(n) = -vy_p(n)
+       end do
+
+    end do
+
+  end subroutine warpx_push_leapfrog_2d
+
+
+!
+! This routine advances the particle positions using the current
+! velocity. This is needed to desynchronize the particle positions
+! from the velocities after particle initialization.
+!
+! Arguments:
+!     particles : a pointer to the particle array-of-structs 
+!     ns        : the stride length of particle struct (the size of the struct in number of reals)
+!     np        : the number of particles
+!     xx_p      : the electric field in the x-direction at the particle positions
+!     vy_p      : the electric field in the y-direction at the particle positions
+!     vz_p      : the electric field in the z-direction at the particle positions
+!     dt        : the time step
+!     prob_lo   : the left-hand corner of the problem domain
+!     prob_hi   : the right-hand corner of the problem domain
+!
+  subroutine warpx_push_leapfrog_positions_3d(particles, ns, np,      &
+                                             vx_p, vy_p, vz_p, dt,   &
+                                             prob_lo, prob_hi)       &
+       bind(c,name='warpx_push_leapfrog_positions_3d')
+    integer, value,   intent(in)    :: ns, np
+    real(amrex_real), intent(inout) :: particles(ns,np)
+    real(amrex_real), intent(inout) :: vx_p(np), vy_p(np), vz_p(np)
+    real(amrex_real), intent(in)    :: dt
+    real(amrex_real), intent(in)    :: prob_lo(3), prob_hi(3)
+
+    integer n
+
+    do n = 1, np
+
+       particles(1, n) = particles(1, n) + dt * vx_p(n)
+       particles(2, n) = particles(2, n) + dt * vy_p(n)
+       particles(3, n) = particles(3, n) + dt * vz_p(n)
+
+!      bounce off the walls in the x...
+       do while (particles(1, n) .lt. prob_lo(1) .or. particles(1, n) .gt. prob_hi(1))
+          if (particles(1, n) .lt. prob_lo(1)) then
+             particles(1, n) = 2.d0*prob_lo(1) - particles(1, n)
+          else
+             particles(1, n) = 2.d0*prob_hi(1) - particles(1, n)
+          end if
+          vx_p(n) = -vx_p(n)
+       end do
+
+!      ... y... 
+       do while (particles(2, n) .lt. prob_lo(2) .or. particles(2, n) .gt. prob_hi(2))
+          if (particles(2, n) .lt. prob_lo(2)) then
+             particles(2, n) = 2.d0*prob_lo(2) - particles(2, n)
+          else
+             particles(2, n) = 2.d0*prob_hi(2) - particles(2, n)
+          end if
+          vy_p(n) = -vy_p(n)
+       end do
+
+!      ... and z directions
+       do while (particles(3, n) .lt. prob_lo(3) .or. particles(3, n) .gt. prob_hi(3))
+          if (particles(3, n) .lt. prob_lo(3)) then
+             particles(3, n) = 2.d0*prob_lo(3) - particles(3, n)
+          else
+             particles(3, n) = 2.d0*prob_hi(3) - particles(3, n)
+          end if
+          vz_p(n) = -vz_p(n)
+       end do
+              
+    end do
+
+  end subroutine warpx_push_leapfrog_positions_3d
+
+  subroutine warpx_push_leapfrog_positions_2d(particles, ns, np,      &
+                                              vx_p, vy_p, dt,         &
+                                              prob_lo, prob_hi)       &
+       bind(c,name='warpx_push_leapfrog_positions_2d')
+    integer, value,   intent(in)    :: ns, np
+    real(amrex_real), intent(inout) :: particles(ns,np)
+    real(amrex_real), intent(inout) :: vx_p(np), vy_p(np)
+    real(amrex_real), intent(in)    :: dt
+    real(amrex_real), intent(in)    :: prob_lo(2), prob_hi(2)
+
+    integer n
+
+    do n = 1, np
+
+       particles(1, n) = particles(1, n) + dt * vx_p(n)
+       particles(2, n) = particles(2, n) + dt * vy_p(n)
+
+!      bounce off the walls in the x...
+       do while (particles(1, n) .lt. prob_lo(1) .or. particles(1, n) .gt. prob_hi(1))
+          if (particles(1, n) .lt. prob_lo(1)) then
+             particles(1, n) = 2.d0*prob_lo(1) - particles(1, n)
+          else
+             particles(1, n) = 2.d0*prob_hi(1) - particles(1, n)
+          end if
+          vx_p(n) = -vx_p(n)
+       end do
+
+!      ... y... 
+       do while (particles(2, n) .lt. prob_lo(2) .or. particles(2, n) .gt. prob_hi(2))
+          if (particles(2, n) .lt. prob_lo(2)) then
+             particles(2, n) = 2.d0*prob_lo(2) - particles(2, n)
+          else
+             particles(2, n) = 2.d0*prob_hi(2) - particles(2, n)
+          end if
+          vy_p(n) = -vy_p(n)
+       end do
+
+    end do
+
+  end subroutine warpx_push_leapfrog_positions_2d
+
+end module warpx_ES_push_particles