21 files changed, 105 insertions, 367 deletions
diff --git a/Source/Diagnostics/BackTransformedDiagnostic.H b/Source/Diagnostics/BackTransformedDiagnostic.H
index 9e24caa1b..94d557605 100644
--- a/Source/Diagnostics/BackTransformedDiagnostic.H
+++ b/Source/Diagnostics/BackTransformedDiagnostic.H
@@ -8,7 +8,6 @@
 #include <AMReX_PlotFileUtil.H>
 #include <AMReX_ParallelDescriptor.H>
 #include <AMReX_Geometry.H>
-#include <AMReX_CudaContainers.H>
 
 #include "MultiParticleContainer.H"
 #include "WarpXConst.H"
diff --git a/Source/Evolve/WarpXEvolveES.cpp b/Source/Evolve/WarpXEvolveES.cpp
index effd6ec96..7a57dfa80 100644
--- a/Source/Evolve/WarpXEvolveES.cpp
+++ b/Source/Evolve/WarpXEvolveES.cpp
@@ -179,30 +179,6 @@ void WarpX::zeroOutBoundary(amrex::MultiFab& input_data,
     bndry_data.FillBoundary();
 }
 
-void WarpX::sumFineToCrseNodal(const amrex::MultiFab& fine,
-                               amrex::MultiFab& crse,
-                               const amrex::Geometry& cgeom,
-                               const amrex::IntVect& ratio) {
-    const BoxArray& fine_BA = fine.boxArray();
-    const DistributionMapping& fine_dm = fine.DistributionMap();
-    BoxArray coarsened_fine_BA = fine_BA;
-    coarsened_fine_BA.coarsen(ratio);
-
-    MultiFab coarsened_fine_data(coarsened_fine_BA, fine_dm, 1, 0);
-    coarsened_fine_data.setVal(0.0);
-
-    for (MFIter mfi(coarsened_fine_data); mfi.isValid(); ++mfi) {
-        const Box& bx = mfi.validbox();
-        const Box& crse_box = coarsened_fine_data[mfi].box();
-        const Box& fine_box = fine[mfi].box();
-        WRPX_SUM_FINE_TO_CRSE_NODAL(bx.loVect(), bx.hiVect(), ratio.getVect(),
-                                    coarsened_fine_data[mfi].dataPtr(), crse_box.loVect(), crse_box.hiVect(),
-                                    fine[mfi].dataPtr(), fine_box.loVect(), fine_box.hiVect());
-    }
-
-    crse.copy(coarsened_fine_data, cgeom.periodicity(), FabArrayBase::ADD);
-}
-
 void
 WarpX::fixRHSForSolve(Vector<std::unique_ptr<MultiFab> >& rhs,
                       const Vector<std::unique_ptr<FabArray<BaseFab<int> > > >& masks) const {
diff --git a/Source/Filter/Filter.H b/Source/Filter/Filter.H
index 5eb84b96a..1ecb1bff4 100644
--- a/Source/Filter/Filter.H
+++ b/Source/Filter/Filter.H
@@ -1,5 +1,4 @@
 #include <AMReX_MultiFab.H>
-#include <AMReX_CudaContainers.H>
 
 #ifndef WARPX_FILTER_H_
 #define WARPX_FILTER_H_
diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H
index b51a83387..e7f02197f 100644
--- a/Source/FortranInterface/WarpX_f.H
+++ b/Source/FortranInterface/WarpX_f.H
@@ -16,11 +16,9 @@
 
 #if (AMREX_SPACEDIM == 3)
 
-#define WRPX_SUM_FINE_TO_CRSE_NODAL      warpx_sum_fine_to_crse_nodal_3d
 #define WRPX_ZERO_OUT_BNDRY              warpx_zero_out_bndry_3d
 #define WRPX_BUILD_MASK                  warpx_build_mask_3d
 #define WRPX_COMPUTE_E_NODAL             warpx_compute_E_nodal_3d
-#define WRPX_DEPOSIT_CIC                 warpx_deposit_cic_3d
 #define WRPX_INTERPOLATE_CIC             warpx_interpolate_cic_3d
 #define WRPX_INTERPOLATE_CIC_TWO_LEVELS  warpx_interpolate_cic_two_levels_3d
 #define WRPX_PUSH_LEAPFROG               warpx_push_leapfrog_3d
@@ -28,11 +26,9 @@
 
 #elif (AMREX_SPACEDIM == 2)
 
-#define WRPX_SUM_FINE_TO_CRSE_NODAL      warpx_sum_fine_to_crse_nodal_2d
 #define WRPX_ZERO_OUT_BNDRY              warpx_zero_out_bndry_2d
 #define WRPX_BUILD_MASK                  warpx_build_mask_2d
 #define WRPX_COMPUTE_E_NODAL             warpx_compute_E_nodal_2d
-#define WRPX_DEPOSIT_CIC                 warpx_deposit_cic_2d
 #define WRPX_INTERPOLATE_CIC             warpx_interpolate_cic_2d
 #define WRPX_INTERPOLATE_CIC_TWO_LEVELS  warpx_interpolate_cic_two_levels_2d
 #define WRPX_PUSH_LEAPFROG               warpx_push_leapfrog_2d
diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp
index 9493672e0..35eadf064 100644
--- a/Source/Laser/LaserParticleContainer.cpp
+++ b/Source/Laser/LaserParticleContainer.cpp
@@ -454,7 +454,7 @@ LaserParticleContainer::Evolve (int lev,
         int const thread_num = 0;
 #endif
 
-        Cuda::ManagedDeviceVector<Real> plane_Xp, plane_Yp, amplitude_E;
+        Gpu::ManagedDeviceVector<Real> plane_Xp, plane_Yp, amplitude_E;
 
         for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
         {
diff --git a/Source/Parallelization/WarpXComm.H b/Source/Parallelization/WarpXComm.H
new file mode 100644
index 000000000..81f00088e
--- /dev/null
+++ b/Source/Parallelization/WarpXComm.H
@@ -0,0 +1,33 @@
+#ifndef WARPX_PARALLELIZATION_COMM_H_
+#define WARPX_PARALLELIZATION_COMM_H_
+
+#include <AMReX_MultiFab.H>
+
+/** \brief Fills the values of the current on the coarse patch by
+ *  averaging the values of the current of the fine patch (on the same level).
+ *  Also fills the guards of the coarse patch.
+ *
+ * \param[in]  fine             fine patches to interpolate from
+ * \param[out] coarse           coarse patches to interpolate to
+ * \param[in]  refinement_ratio integer ratio between the two
+ */
+void
+interpolateCurrentFineToCoarse (
+      std::array< amrex::MultiFab const *, 3 > const & fine,
+      std::array< amrex::MultiFab       *, 3 > const & coarse,
+      int const refinement_ratio);
+
+/** \brief Fills the values of the charge density on the coarse patch by
+ *  averaging the values of the charge density of the fine patch (on the same level).
+ *
+ * \param[in]  fine             fine patches to interpolate from
+ * \param[out] coarse           coarse patches to interpolate to
+ * \param[in]  refinement_ratio integer ratio between the two
+ */
+void
+interpolateDensityFineToCoarse (
+     const amrex::MultiFab& fine,
+     amrex::MultiFab& coarse,
+     int const refinement_ratio);
+
+#endif // WARPX_PARALLELIZATION_COMM_H_
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index 52df3dc25..529d52604 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -1,3 +1,4 @@
+#include <WarpXComm.H>
 #include <WarpXComm_K.H>
 #include <WarpX.H>
 #include <WarpX_f.H>
@@ -503,9 +504,9 @@ WarpX::SyncCurrent ()
 }
 
 void
-WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine,
-                                        std::array< amrex::MultiFab       *, 3 > const & coarse,
-                                        int const refinement_ratio)
+interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine,
+                                 std::array< amrex::MultiFab       *, 3 > const & coarse,
+                                 int const refinement_ratio)
 {
     BL_PROFILE("interpolateCurrentFineToCoarse()");
     BL_ASSERT(refinement_ratio == 2);
@@ -563,7 +564,7 @@ WarpX::SyncRho ()
 }
 
 void
-WarpX::interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio)
+interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio)
 {
     BL_PROFILE("interpolateDensityFineToCoarse()");
     BL_ASSERT(refinement_ratio == 2);
diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package
index a6c124ddd..6d34fa0ef 100644
--- a/Source/Particles/Make.package
+++ b/Source/Particles/Make.package
@@ -1,4 +1,3 @@
-F90EXE_sources += deposit_cic.F90
 F90EXE_sources += interpolate_cic.F90
 F90EXE_sources += push_particles_ES.F90
 
diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp
index fca22daa2..55768c6fc 100644
--- a/Source/Particles/MultiParticleContainer.cpp
+++ b/Source/Particles/MultiParticleContainer.cpp
@@ -321,11 +321,7 @@ void
 MultiParticleContainer::Redistribute ()
 {
     for (auto& pc : allcontainers) {
-        if ( (pc->NumRuntimeRealComps()>0) || (pc->NumRuntimeIntComps()>0) ) {
-            pc->RedistributeCPU();
-        } else {
-            pc->Redistribute();
-        }
+        pc->Redistribute();
     }
 }
 
@@ -333,11 +329,7 @@ void
 MultiParticleContainer::RedistributeLocal (const int num_ghost)
 {
     for (auto& pc : allcontainers) {
-        if ( (pc->NumRuntimeRealComps()>0) || (pc->NumRuntimeIntComps()>0) ) {
-            pc->RedistributeCPU(0, 0, 0, num_ghost);
-        } else {
-            pc->Redistribute(0, 0, 0, num_ghost);
-        }
+        pc->Redistribute(0, 0, 0, num_ghost);
     }
 }
 
diff --git a/Source/Particles/PhotonParticleContainer.H b/Source/Particles/PhotonParticleContainer.H
index 9132cf4a9..851726d57 100644
--- a/Source/Particles/PhotonParticleContainer.H
+++ b/Source/Particles/PhotonParticleContainer.H
@@ -41,9 +41,9 @@ public:
                          DtType a_dt_type=DtType::Full) override;
 
     virtual void PushPX(WarpXParIter& pti,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& xp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& yp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& zp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& xp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& yp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& zp,
                         amrex::Real dt, DtType a_dt_type=DtType::Full) override;
 
     // Don't push momenta for photons
diff --git a/Source/Particles/PhotonParticleContainer.cpp b/Source/Particles/PhotonParticleContainer.cpp
index 7e52b52e1..fd47ac8a0 100644
--- a/Source/Particles/PhotonParticleContainer.cpp
+++ b/Source/Particles/PhotonParticleContainer.cpp
@@ -51,9 +51,9 @@ void PhotonParticleContainer::InitData()
 
 void
 PhotonParticleContainer::PushPX(WarpXParIter& pti,
-                                Cuda::ManagedDeviceVector<ParticleReal>& xp,
-                                Cuda::ManagedDeviceVector<ParticleReal>& yp,
-                                Cuda::ManagedDeviceVector<ParticleReal>& zp,
+                                Gpu::ManagedDeviceVector<ParticleReal>& xp,
+                                Gpu::ManagedDeviceVector<ParticleReal>& yp,
+                                Gpu::ManagedDeviceVector<ParticleReal>& zp,
                                 Real dt, DtType a_dt_type)
 {
 
diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H
index 17a504719..174488eb9 100644
--- a/Source/Particles/PhysicalParticleContainer.H
+++ b/Source/Particles/PhysicalParticleContainer.H
@@ -94,9 +94,9 @@ public:
                          DtType a_dt_type=DtType::Full) override;
 
     virtual void PushPX(WarpXParIter& pti,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& xp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& yp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& zp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& xp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& yp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& zp,
                         amrex::Real dt, DtType a_dt_type=DtType::Full);
 
     virtual void PushP (int lev, amrex::Real dt,
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 51690d659..938c80de5 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -62,16 +62,6 @@ PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int isp
         "Radiation reaction can be enabled only if Boris pusher is used");
     //_____________________________
 
-#ifdef AMREX_USE_GPU
-    Print()<<"\n-----------------------------------------------------\n";
-    Print()<<"WARNING: field ionization on GPU uses RedistributeCPU\n";
-    Print()<<"-----------------------------------------------------\n\n";
-    //AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
-        //do_field_ionization == 0,
-        //"Field ionization does not work on GPU so far, because the current "
-        //"version of Redistribute in AMReX does not work with runtime parameters");
-#endif
-
 #ifdef WARPX_QED
     //Add real component if QED is enabled
     pp.query("do_qed", m_do_qed);
@@ -1406,7 +1396,7 @@ PhysicalParticleContainer::SplitParticles(int lev)
 {
     auto& mypc = WarpX::GetInstance().GetPartContainer();
     auto& pctmp_split = mypc.GetPCtmp();
-    Cuda::ManagedDeviceVector<ParticleReal> xp, yp, zp;
+    Gpu::ManagedDeviceVector<ParticleReal> xp, yp, zp;
     RealVector psplit_x, psplit_y, psplit_z, psplit_w;
     RealVector psplit_ux, psplit_uy, psplit_uz;
     long np_split_to_add = 0;
@@ -1564,9 +1554,9 @@ PhysicalParticleContainer::SplitParticles(int lev)
 
 void
 PhysicalParticleContainer::PushPX(WarpXParIter& pti,
-                                  Cuda::ManagedDeviceVector<ParticleReal>& xp,
-                                  Cuda::ManagedDeviceVector<ParticleReal>& yp,
-                                  Cuda::ManagedDeviceVector<ParticleReal>& zp,
+                                  Gpu::ManagedDeviceVector<ParticleReal>& xp,
+                                  Gpu::ManagedDeviceVector<ParticleReal>& yp,
+                                  Gpu::ManagedDeviceVector<ParticleReal>& zp,
                                   Real dt, DtType a_dt_type)
 {
 
diff --git a/Source/Particles/RigidInjectedParticleContainer.H b/Source/Particles/RigidInjectedParticleContainer.H
index 3abbb4afe..a2473c5ad 100644
--- a/Source/Particles/RigidInjectedParticleContainer.H
+++ b/Source/Particles/RigidInjectedParticleContainer.H
@@ -44,9 +44,9 @@ public:
                          DtType a_dt_type=DtType::Full) override;
 
     virtual void PushPX(WarpXParIter& pti,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& xp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& yp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& zp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& xp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& yp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& zp,
                         amrex::Real dt, DtType a_dt_type=DtType::Full) override;
 
     virtual void PushP (int lev, amrex::Real dt,
diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp
index 507206041..bee71fba1 100644
--- a/Source/Particles/RigidInjectedParticleContainer.cpp
+++ b/Source/Particles/RigidInjectedParticleContainer.cpp
@@ -78,7 +78,7 @@ RigidInjectedParticleContainer::RemapParticles()
                 // Note that the particles are already in the boosted frame.
                 // This value is saved to advance the particles not injected yet
 
-                Cuda::ManagedDeviceVector<ParticleReal> xp, yp, zp;
+                Gpu::ManagedDeviceVector<ParticleReal> xp, yp, zp;
 
                 for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
                 {
@@ -138,7 +138,7 @@ RigidInjectedParticleContainer::BoostandRemapParticles()
 #pragma omp parallel
 #endif
     {
-        Cuda::ManagedDeviceVector<ParticleReal> xp, yp, zp;
+        Gpu::ManagedDeviceVector<ParticleReal> xp, yp, zp;
 
         for (WarpXParIter pti(*this, 0); pti.isValid(); ++pti)
         {
@@ -209,9 +209,9 @@ RigidInjectedParticleContainer::BoostandRemapParticles()
 
 void
 RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
-                                       Cuda::ManagedDeviceVector<ParticleReal>& xp,
-                                       Cuda::ManagedDeviceVector<ParticleReal>& yp,
-                                       Cuda::ManagedDeviceVector<ParticleReal>& zp,
+                                       Gpu::ManagedDeviceVector<ParticleReal>& xp,
+                                       Gpu::ManagedDeviceVector<ParticleReal>& yp,
+                                       Gpu::ManagedDeviceVector<ParticleReal>& zp,
                                        Real dt, DtType a_dt_type)
 {
 
@@ -222,7 +222,7 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
     auto& uzp = attribs[PIdx::uz];
 
     // Save the position and momenta, making copies
-    Cuda::ManagedDeviceVector<ParticleReal> xp_save, yp_save, zp_save;
+    Gpu::ManagedDeviceVector<ParticleReal> xp_save, yp_save, zp_save;
     RealVector uxp_save, uyp_save, uzp_save;
 
     ParticleReal* const AMREX_RESTRICT x = xp.dataPtr();
diff --git a/Source/Particles/Sorting/SortingUtils.H b/Source/Particles/Sorting/SortingUtils.H
index 28a1b73b7..80eaaf9cb 100644
--- a/Source/Particles/Sorting/SortingUtils.H
+++ b/Source/Particles/Sorting/SortingUtils.H
@@ -2,7 +2,6 @@
 #define WARPX_PARTICLES_SORTING_SORTINGUTILS_H_
 
 #include <WarpXParticleContainer.H>
-#include <AMReX_CudaContainers.H>
 #include <AMReX_Gpu.H>
 #ifdef AMREX_USE_GPU
     #include <thrust/partition.h>
@@ -43,7 +42,7 @@ ForwardIterator stablePartition(ForwardIterator const index_begin,
     // On GPU: Use thrust
     int const* AMREX_RESTRICT predicate_ptr = predicate.dataPtr();
     ForwardIterator const sep = thrust::stable_partition(
-        thrust::cuda::par(amrex::Cuda::The_ThrustCachedAllocator()),
+        thrust::cuda::par(amrex::Gpu::The_ThrustCachedAllocator()),
         index_begin, index_end,
         [predicate_ptr] AMREX_GPU_DEVICE (long i) { return predicate_ptr[i]; }
     );
diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H
index dbd913c5b..7f86930b2 100644
--- a/Source/Particles/WarpXParticleContainer.H
+++ b/Source/Particles/WarpXParticleContainer.H
@@ -73,12 +73,12 @@ public:
     WarpXParIter (ContainerType& pc, int level);
 
 #if (AMREX_SPACEDIM == 2)
-    void GetPosition (amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& x,
-                      amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& y,
-                      amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& z) const;
-    void SetPosition (const amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& x,
-                      const amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& y,
-                      const amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& z);
+    void GetPosition (amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& x,
+                      amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& y,
+                      amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& z) const;
+    void SetPosition (const amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& x,
+                      const amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& y,
+                      const amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& z);
 #endif
     const std::array<RealVector, PIdx::nattribs>& GetAttribs () const {
         return GetStructOfArrays().GetRealData();
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index d5520ba06..d22de00e0 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -4,6 +4,7 @@
 #include <MultiParticleContainer.H>
 #include <WarpXParticleContainer.H>
 #include <AMReX_AmrParGDB.H>
+#include <WarpXComm.H>
 #include <WarpX_f.H>
 #include <WarpX.H>
 #include <WarpXAlgorithmSelection.H>
@@ -502,66 +503,54 @@ WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector& wp,
 void
 WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, bool local)
 {
+    // Loop over the refinement levels
+    int const finest_level = rho.size() - 1;
+    for (int lev = 0; lev < finest_level; ++lev) {
 
-    int num_levels = rho.size();
-    int finest_level = num_levels - 1;
-
-    // each level deposits it's own particles
-    const int ng = rho[0]->nGrow();
-    for (int lev = 0; lev < num_levels; ++lev) {
-
-        rho[lev]->setVal(0.0, ng);
-
-        const auto& gm = m_gdb->Geom(lev);
-        const auto& ba = m_gdb->ParticleBoxArray(lev);
-
-        const Real* dx  = gm.CellSize();
-        const Real* plo = gm.ProbLo();
-        BoxArray nba = ba;
-        nba.surroundingNodes();
-
-        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) {
-            const Box& box = nba[pti];
-
+        // Loop over particle tiles and deposit charge on each level
+#ifdef _OPENMP
+        #pragma omp parallel
+        {
+        int thread_num = omp_get_thread_num();
+#else
+        int thread_num = 0;
+#endif
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+        {
+            const long np = pti.numParticles();
             auto& wp = pti.GetAttribs(PIdx::w);
-            const auto& particles = pti.GetArrayOfStructs();
-            int nstride = particles.dataShape().first;
-            const long np  = pti.numParticles();
 
-            FArrayBox& rhofab = (*rho[lev])[pti];
+            pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
 
-            WRPX_DEPOSIT_CIC(particles.dataPtr(), nstride, np,
-                             wp.dataPtr(), &this->charge,
-                             rhofab.dataPtr(), box.loVect(), box.hiVect(),
-                             plo, dx, &ng);
-        }
+            int* AMREX_RESTRICT ion_lev;
+            if (do_field_ionization){
+                ion_lev = pti.GetiAttribs(particle_icomps["ionization_level"]).dataPtr();
+            } else {
+                ion_lev = nullptr;
+            }
 
-        if (!local) rho[lev]->SumBoundary(gm.periodicity());
+            DepositCharge(pti, wp, ion_lev, rho[lev].get(), 0, 0, np, thread_num, lev, lev);
+        }
+#ifdef _OPENMP
+        }
+#endif
+        // Exchange guard cells
+        if (!local) rho[lev]->SumBoundary( m_gdb->Geom(lev).periodicity() );
     }
 
-    // now we average down fine to crse
-    std::unique_ptr<MultiFab> crse;
+    // Now that the charge has been deposited at each level,
+    // we average down from fine to crse
     for (int lev = finest_level - 1; lev >= 0; --lev) {
-        const BoxArray& fine_BA = rho[lev+1]->boxArray();
         const DistributionMapping& fine_dm = rho[lev+1]->DistributionMap();
-        BoxArray coarsened_fine_BA = fine_BA;
+        BoxArray coarsened_fine_BA = rho[lev+1]->boxArray();
         coarsened_fine_BA.coarsen(m_gdb->refRatio(lev));
-
         MultiFab coarsened_fine_data(coarsened_fine_BA, fine_dm, rho[lev+1]->nComp(), 0);
         coarsened_fine_data.setVal(0.0);
 
-        IntVect ratio(AMREX_D_DECL(2, 2, 2));  // FIXME
+        int const refinement_ratio = 2;
 
-        for (MFIter mfi(coarsened_fine_data); mfi.isValid(); ++mfi) {
-            const Box& bx = mfi.validbox();
-            const Box& crse_box = coarsened_fine_data[mfi].box();
-            const Box& fine_box = (*rho[lev+1])[mfi].box();
-            WRPX_SUM_FINE_TO_CRSE_NODAL(bx.loVect(), bx.hiVect(), ratio.getVect(),
-                                        coarsened_fine_data[mfi].dataPtr(), crse_box.loVect(), crse_box.hiVect(),
-                                        (*rho[lev+1])[mfi].dataPtr(), fine_box.loVect(), fine_box.hiVect());
-        }
-
-        rho[lev]->copy(coarsened_fine_data, m_gdb->Geom(lev).periodicity(), FabArrayBase::ADD);
+        interpolateDensityFineToCoarse( *rho[lev+1], coarsened_fine_data, refinement_ratio );
+        rho[lev]->ParallelAdd( coarsened_fine_data, m_gdb->Geom(lev).periodicity() );
     }
 }
 
@@ -840,5 +829,3 @@ WarpXParticleContainer::particlePostLocate(ParticleType& p,
     if (pld.m_lev == lev-1){
     }
 }
-
-
diff --git a/Source/Particles/deposit_cic.F90 b/Source/Particles/deposit_cic.F90
deleted file mode 100644
index 2831ce96b..000000000
--- a/Source/Particles/deposit_cic.F90
+++ /dev/null
@@ -1,134 +0,0 @@
-module warpx_ES_deposit_cic
-
-  use iso_c_binding
-  use amrex_fort_module, only : amrex_real, amrex_particle_real
-
-  implicit none
-
-contains
-
-! This routine computes the charge density due to the particles using cloud-in-cell
-! deposition. The Fab rho is assumed to be node-centered.
-!
-! Arguments:
-!     particles : a pointer to the particle array-of-structs
-!     ns        : the stride length of particle struct (the size of the struct in number of reals)
-!     np        : the number of particles
-!     weights   : the particle weights
-!     charge    : the charge of this particle species
-!     rho       : a Fab that will contain the charge density on exit
-!     lo        : a pointer to the lo corner of this valid box for rho, in index space
-!     hi        : a pointer to the hi corner of this valid box for rho, in index space
-!     plo       : the real position of the left-hand corner of the problem domain
-!     dx        : the mesh spacing
-!     ng        : the number of ghost cells in rho
-!
-  subroutine warpx_deposit_cic_3d(particles, ns, np,                     &
-                                  weights, charge, rho, lo, hi, plo, dx, &
-                                  ng)                                    &
-       bind(c,name='warpx_deposit_cic_3d')
-    integer, value,   intent(in)     :: ns, np
-    real(amrex_particle_real), intent(in)     :: particles(ns,np)
-    real(amrex_particle_real), intent(in)     :: weights(np)
-    real(amrex_real), intent(in)     :: charge
-    integer,          intent(in)     :: lo(3)
-    integer,          intent(in)     :: hi(3)
-    integer,          intent(in)     :: ng
-    real(amrex_real), intent(inout)  :: rho(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng)
-    real(amrex_real), intent(in)     :: plo(3)
-    real(amrex_real), intent(in)     :: dx(3)
-
-    integer i, j, k, n
-    real(amrex_real) wx_lo, wy_lo, wz_lo, wx_hi, wy_hi, wz_hi
-    real(amrex_real) lx, ly, lz
-    real(amrex_real) inv_dx(3)
-    real(amrex_real) qp, inv_vol
-
-    inv_dx = 1.0d0/dx
-
-    inv_vol = inv_dx(1) * inv_dx(2) * inv_dx(3)
-
-    do n = 1, np
-
-       qp = weights(n) * charge * inv_vol
-
-       lx = (particles(1, n) - plo(1))*inv_dx(1)
-       ly = (particles(2, n) - plo(2))*inv_dx(2)
-       lz = (particles(3, n) - plo(3))*inv_dx(3)
-
-       i = floor(lx)
-       j = floor(ly)
-       k = floor(lz)
-
-       wx_hi = lx - i
-       wy_hi = ly - j
-       wz_hi = lz - k
-
-       wx_lo = 1.0d0 - wx_hi
-       wy_lo = 1.0d0 - wy_hi
-       wz_lo = 1.0d0 - wz_hi
-
-       rho(i,   j,   k  ) = rho(i,   j,   k  ) + wx_lo*wy_lo*wz_lo*qp
-       rho(i,   j,   k+1) = rho(i,   j,   k+1) + wx_lo*wy_lo*wz_hi*qp
-       rho(i,   j+1, k  ) = rho(i,   j+1, k  ) + wx_lo*wy_hi*wz_lo*qp
-       rho(i,   j+1, k+1) = rho(i,   j+1, k+1) + wx_lo*wy_hi*wz_hi*qp
-       rho(i+1, j,   k  ) = rho(i+1, j,   k  ) + wx_hi*wy_lo*wz_lo*qp
-       rho(i+1, j,   k+1) = rho(i+1, j,   k+1) + wx_hi*wy_lo*wz_hi*qp
-       rho(i+1, j+1, k  ) = rho(i+1, j+1, k  ) + wx_hi*wy_hi*wz_lo*qp
-       rho(i+1, j+1, k+1) = rho(i+1, j+1, k+1) + wx_hi*wy_hi*wz_hi*qp
-
-    end do
-
-  end subroutine warpx_deposit_cic_3d
-
-  subroutine warpx_deposit_cic_2d(particles, ns, np,                     &
-                                  weights, charge, rho, lo, hi, plo, dx, &
-                                  ng)                                    &
-       bind(c,name='warpx_deposit_cic_2d')
-    integer, value,   intent(in)     :: ns, np
-    real(amrex_particle_real), intent(in)     :: particles(ns,np)
-    real(amrex_particle_real), intent(in)     :: weights(np)
-    real(amrex_real), intent(in)     :: charge
-    integer,          intent(in)     :: lo(2)
-    integer,          intent(in)     :: hi(2)
-    integer,          intent(in)     :: ng
-    real(amrex_real), intent(inout)  :: rho(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng)
-    real(amrex_real), intent(in)     :: plo(2)
-    real(amrex_real), intent(in)     :: dx(2)
-
-    integer i, j, n
-    real(amrex_real) wx_lo, wy_lo, wx_hi, wy_hi
-    real(amrex_real) lx, ly
-    real(amrex_real) inv_dx(2)
-    real(amrex_real) qp, inv_vol
-
-    inv_dx = 1.0d0/dx
-
-    inv_vol = inv_dx(1) * inv_dx(2)
-
-    do n = 1, np
-
-       qp = weights(n) * charge * inv_vol
-
-       lx = (particles(1, n) - plo(1))*inv_dx(1)
-       ly = (particles(2, n) - plo(2))*inv_dx(2)
-
-       i = floor(lx)
-       j = floor(ly)
-
-       wx_hi = lx - i
-       wy_hi = ly - j
-
-       wx_lo = 1.0d0 - wx_hi
-       wy_lo = 1.0d0 - wy_hi
-
-       rho(i,   j  ) = rho(i,   j  ) + wx_lo*wy_lo*qp
-       rho(i,   j+1) = rho(i,   j+1) + wx_lo*wy_hi*qp
-       rho(i+1, j  ) = rho(i+1, j  ) + wx_hi*wy_lo*qp
-       rho(i+1, j+1) = rho(i+1, j+1) + wx_hi*wy_hi*qp
-
-    end do
-
-  end subroutine warpx_deposit_cic_2d
-
-end module warpx_ES_deposit_cic
diff --git a/Source/Utils/utils_ES.F90 b/Source/Utils/utils_ES.F90
index ce143bb94..baadeb7af 100644
--- a/Source/Utils/utils_ES.F90
+++ b/Source/Utils/utils_ES.F90
@@ -7,79 +7,6 @@ module warpx_ES_utils
 
 contains
 
-  subroutine warpx_sum_fine_to_crse_nodal_3d (lo, hi, lrat, crse, clo, chi, fine, flo, fhi) &
-       bind(c, name="warpx_sum_fine_to_crse_nodal_3d")
-
-    integer, intent(in)             ::   lo(3),  hi(3)
-    integer, intent(in)             ::  clo(3), chi(3)
-    integer, intent(in)             ::  flo(3), fhi(3)
-    integer, intent(in)             ::  lrat(3)
-    real(amrex_real), intent(inout) :: crse(clo(1):chi(1),clo(2):chi(2),clo(3):chi(3))
-    real(amrex_real), intent(in)    :: fine(flo(1):fhi(1),flo(2):fhi(2),flo(3):fhi(3))
-
-    integer :: i, j, k, ii, jj, kk
-
-    do k        = lo(3), hi(3)
-       kk       = k * lrat(3)
-       do j     = lo(2), hi(2)
-          jj    = j * lrat(2)
-          do i  = lo(1), hi(1)
-             ii = i * lrat(1)
-             crse(i,j,k)  =  fine(ii,jj,kk)                              + &
-! These six fine nodes are shared by two coarse nodes...
-                  0.5d0   * (fine(ii-1,jj,kk)     + fine(ii+1,jj,kk)     + &
-                             fine(ii,jj-1,kk)     + fine(ii,jj+1,kk)     + &
-                             fine(ii,jj,kk-1)     + fine(ii,jj,kk+1))    + &
-! ... these twelve are shared by four...
-                  0.25d0  * (fine(ii,jj-1,kk-1)   + fine(ii,jj+1,kk-1)   + &
-                             fine(ii,jj-1,kk+1)   + fine(ii,jj+1,kk+1)   + &
-                             fine(ii-1,jj,kk-1)   + fine(ii+1,jj,kk-1)   + &
-                             fine(ii-1,jj,kk+1)   + fine(ii+1,jj,kk+1)   + &
-                             fine(ii-1,jj-1,kk)   + fine(ii+1,jj-1,kk)   + &
-                             fine(ii-1,jj+1,kk)   + fine(ii+1,jj+1,kk))  + &
-! ... and these eight are shared by eight
-                  0.125d0 * (fine(ii-1,jj-1,kk-1) + fine(ii-1,jj-1,kk+1) + &
-                             fine(ii-1,jj+1,kk-1) + fine(ii-1,jj+1,kk+1) + &
-                             fine(ii+1,jj-1,kk-1) + fine(ii+1,jj-1,kk+1) + &
-                             fine(ii+1,jj+1,kk-1) + fine(ii+1,jj+1,kk+1))
-! ... note that we have 27 nodes in total...
-             crse(i,j,k) = crse(i,j,k) / 8.d0
-          end do
-       end do
-    end do
-
-  end subroutine warpx_sum_fine_to_crse_nodal_3d
-
-  subroutine warpx_sum_fine_to_crse_nodal_2d (lo, hi, lrat, crse, clo, chi, fine, flo, fhi) &
-       bind(c, name="warpx_sum_fine_to_crse_nodal_2d")
-
-    integer, intent(in)             ::   lo(2),  hi(2)
-    integer, intent(in)             ::  clo(2), chi(2)
-    integer, intent(in)             ::  flo(2), fhi(2)
-    integer, intent(in)             ::  lrat(2)
-    real(amrex_real), intent(inout) :: crse(clo(1):chi(1),clo(2):chi(2))
-    real(amrex_real), intent(in)    :: fine(flo(1):fhi(1),flo(2):fhi(2))
-
-    integer :: i, j, ii, jj
-
-    do j     = lo(2), hi(2)
-       jj    = j * lrat(2)
-       do i  = lo(1), hi(1)
-          ii = i * lrat(1)
-          crse(i,j)  =  fine(ii,jj)                             + &
-! These four fine nodes are shared by two coarse nodes...
-               0.5d0   * (fine(ii-1,jj)     + fine(ii+1,jj)     + &
-               fine(ii,jj-1)     + fine(ii,jj+1))               + &
-! ... and these four are shared by four...
-               0.25d0  * (fine(ii-1,jj-1)   + fine(ii-1,jj+1)   + &
-               fine(ii-1,jj+1)   + fine(ii+1,jj+1))
-! ... note that we have 9 nodes in total...
-             crse(i,j) = crse(i,j) / 4.d0
-       end do
-    end do
-
-  end subroutine warpx_sum_fine_to_crse_nodal_2d
-
   subroutine warpx_zero_out_bndry_3d (lo, hi, input_data, bndry_data, mask) &
        bind(c,name='warpx_zero_out_bndry_3d')
 
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 216c8f6a8..09901dcb1 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -386,9 +386,6 @@ private:
     void zeroOutBoundary(amrex::MultiFab& input_data, amrex::MultiFab& bndry_data,
                          const amrex::FabArray<amrex::BaseFab<int> >& mask) const;
 
-    void sumFineToCrseNodal(const amrex::MultiFab& fine, amrex::MultiFab& crse,
-                            const amrex::Geometry& cgeom, const amrex::IntVect& ratio);
-
     void fixRHSForSolve(amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rhs,
                         const amrex::Vector<std::unique_ptr<amrex::FabArray<amrex::BaseFab<int> > > >& masks) const ;
 
@@ -434,29 +431,6 @@ private:
 
     std::array<std::unique_ptr<amrex::MultiFab>, 3> getInterpolatedB(int lev) const;
 
-    /** \brief Fills the values of the current on the coarse patch by
-     *  averaging the values of the current of the fine patch (on the same level).
-     *  Also fills the guards of the coarse patch.
-     *
-     * \param[in]  fine             fine patches to interpolate from
-     * \param[out] coarse           coarse patches to interpolate to
-     * \param[in]  refinement_ratio integer ratio between the two
-     */
-    void interpolateCurrentFineToCoarse (std::array< amrex::MultiFab const *, 3 > const & fine,
-                                         std::array< amrex::MultiFab       *, 3 > const & coarse,
-                                         int const refinement_ratio);
-
-    /** \brief Fills the values of the charge density on the coarse patch by
-     *  averaging the values of the charge density of the fine patch (on the same level).
-     *
-     * \param[in]  fine             fine patches to interpolate from
-     * \param[out] coarse           coarse patches to interpolate to
-     * \param[in]  refinement_ratio integer ratio between the two
-     */
-    void interpolateDensityFineToCoarse (const amrex::MultiFab& fine,
-                                               amrex::MultiFab& coarse,
-                                         int const refinement_ratio);
-
     void ExchangeWithPmlB (int lev);
     void ExchangeWithPmlE (int lev);
     void ExchangeWithPmlF (int lev);