26 files changed, 215 insertions, 374 deletions
diff --git a/Source/Diagnostics/BackTransformedDiagnostic.H b/Source/Diagnostics/BackTransformedDiagnostic.H
index 9e24caa1b..94d557605 100644
--- a/Source/Diagnostics/BackTransformedDiagnostic.H
+++ b/Source/Diagnostics/BackTransformedDiagnostic.H
@@ -8,7 +8,6 @@
 #include <AMReX_PlotFileUtil.H>
 #include <AMReX_ParallelDescriptor.H>
 #include <AMReX_Geometry.H>
-#include <AMReX_CudaContainers.H>
 
 #include "MultiParticleContainer.H"
 #include "WarpXConst.H"
diff --git a/Source/Evolve/WarpXEvolveES.cpp b/Source/Evolve/WarpXEvolveES.cpp
index effd6ec96..7a57dfa80 100644
--- a/Source/Evolve/WarpXEvolveES.cpp
+++ b/Source/Evolve/WarpXEvolveES.cpp
@@ -179,30 +179,6 @@ void WarpX::zeroOutBoundary(amrex::MultiFab& input_data,
     bndry_data.FillBoundary();
 }
 
-void WarpX::sumFineToCrseNodal(const amrex::MultiFab& fine,
-                               amrex::MultiFab& crse,
-                               const amrex::Geometry& cgeom,
-                               const amrex::IntVect& ratio) {
-    const BoxArray& fine_BA = fine.boxArray();
-    const DistributionMapping& fine_dm = fine.DistributionMap();
-    BoxArray coarsened_fine_BA = fine_BA;
-    coarsened_fine_BA.coarsen(ratio);
-
-    MultiFab coarsened_fine_data(coarsened_fine_BA, fine_dm, 1, 0);
-    coarsened_fine_data.setVal(0.0);
-
-    for (MFIter mfi(coarsened_fine_data); mfi.isValid(); ++mfi) {
-        const Box& bx = mfi.validbox();
-        const Box& crse_box = coarsened_fine_data[mfi].box();
-        const Box& fine_box = fine[mfi].box();
-        WRPX_SUM_FINE_TO_CRSE_NODAL(bx.loVect(), bx.hiVect(), ratio.getVect(),
-                                    coarsened_fine_data[mfi].dataPtr(), crse_box.loVect(), crse_box.hiVect(),
-                                    fine[mfi].dataPtr(), fine_box.loVect(), fine_box.hiVect());
-    }
-
-    crse.copy(coarsened_fine_data, cgeom.periodicity(), FabArrayBase::ADD);
-}
-
 void
 WarpX::fixRHSForSolve(Vector<std::unique_ptr<MultiFab> >& rhs,
                       const Vector<std::unique_ptr<FabArray<BaseFab<int> > > >& masks) const {
diff --git a/Source/Filter/Filter.H b/Source/Filter/Filter.H
index 5eb84b96a..1ecb1bff4 100644
--- a/Source/Filter/Filter.H
+++ b/Source/Filter/Filter.H
@@ -1,5 +1,4 @@
 #include <AMReX_MultiFab.H>
-#include <AMReX_CudaContainers.H>
 
 #ifndef WARPX_FILTER_H_
 #define WARPX_FILTER_H_
diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H
index b51a83387..e7f02197f 100644
--- a/Source/FortranInterface/WarpX_f.H
+++ b/Source/FortranInterface/WarpX_f.H
@@ -16,11 +16,9 @@
 
 #if (AMREX_SPACEDIM == 3)
 
-#define WRPX_SUM_FINE_TO_CRSE_NODAL      warpx_sum_fine_to_crse_nodal_3d
 #define WRPX_ZERO_OUT_BNDRY              warpx_zero_out_bndry_3d
 #define WRPX_BUILD_MASK                  warpx_build_mask_3d
 #define WRPX_COMPUTE_E_NODAL             warpx_compute_E_nodal_3d
-#define WRPX_DEPOSIT_CIC                 warpx_deposit_cic_3d
 #define WRPX_INTERPOLATE_CIC             warpx_interpolate_cic_3d
 #define WRPX_INTERPOLATE_CIC_TWO_LEVELS  warpx_interpolate_cic_two_levels_3d
 #define WRPX_PUSH_LEAPFROG               warpx_push_leapfrog_3d
@@ -28,11 +26,9 @@
 
 #elif (AMREX_SPACEDIM == 2)
 
-#define WRPX_SUM_FINE_TO_CRSE_NODAL      warpx_sum_fine_to_crse_nodal_2d
 #define WRPX_ZERO_OUT_BNDRY              warpx_zero_out_bndry_2d
 #define WRPX_BUILD_MASK                  warpx_build_mask_2d
 #define WRPX_COMPUTE_E_NODAL             warpx_compute_E_nodal_2d
-#define WRPX_DEPOSIT_CIC                 warpx_deposit_cic_2d
 #define WRPX_INTERPOLATE_CIC             warpx_interpolate_cic_2d
 #define WRPX_INTERPOLATE_CIC_TWO_LEVELS  warpx_interpolate_cic_two_levels_2d
 #define WRPX_PUSH_LEAPFROG               warpx_push_leapfrog_2d
diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp
index 9493672e0..35eadf064 100644
--- a/Source/Laser/LaserParticleContainer.cpp
+++ b/Source/Laser/LaserParticleContainer.cpp
@@ -454,7 +454,7 @@ LaserParticleContainer::Evolve (int lev,
         int const thread_num = 0;
 #endif
 
-        Cuda::ManagedDeviceVector<Real> plane_Xp, plane_Yp, amplitude_E;
+        Gpu::ManagedDeviceVector<Real> plane_Xp, plane_Yp, amplitude_E;
 
         for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
         {
diff --git a/Source/Parallelization/WarpXComm.H b/Source/Parallelization/WarpXComm.H
new file mode 100644
index 000000000..81f00088e
--- /dev/null
+++ b/Source/Parallelization/WarpXComm.H
@@ -0,0 +1,33 @@
+#ifndef WARPX_PARALLELIZATION_COMM_H_
+#define WARPX_PARALLELIZATION_COMM_H_
+
+#include <AMReX_MultiFab.H>
+
+/** \brief Fills the values of the current on the coarse patch by
+ *  averaging the values of the current of the fine patch (on the same level).
+ *  Also fills the guards of the coarse patch.
+ *
+ * \param[in]  fine             fine patches to interpolate from
+ * \param[out] coarse           coarse patches to interpolate to
+ * \param[in]  refinement_ratio integer ratio between the two
+ */
+void
+interpolateCurrentFineToCoarse (
+      std::array< amrex::MultiFab const *, 3 > const & fine,
+      std::array< amrex::MultiFab       *, 3 > const & coarse,
+      int const refinement_ratio);
+
+/** \brief Fills the values of the charge density on the coarse patch by
+ *  averaging the values of the charge density of the fine patch (on the same level).
+ *
+ * \param[in]  fine             fine patches to interpolate from
+ * \param[out] coarse           coarse patches to interpolate to
+ * \param[in]  refinement_ratio integer ratio between the two
+ */
+void
+interpolateDensityFineToCoarse (
+     const amrex::MultiFab& fine,
+     amrex::MultiFab& coarse,
+     int const refinement_ratio);
+
+#endif // WARPX_PARALLELIZATION_COMM_H_
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index 851b78748..b61ae4fc7 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -1,4 +1,4 @@
-#include "WarpXComm.H"
+#include <WarpXComm.H>
 #include <WarpXComm_K.H>
 #include <WarpX.H>
 #include <WarpX_f.H>
diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H
index 2737eb008..c1502e311 100644
--- a/Source/Particles/Deposition/CurrentDeposition.H
+++ b/Source/Particles/Deposition/CurrentDeposition.H
@@ -15,15 +15,14 @@
                          required to have the charge of each macroparticle
                          since q is a scalar. For non-ionizable species,
                          ion_lev is a null pointer.
- * \param jx_arr       : Array4 of current density, either full array or tile.
- * \param jy_arr       : Array4 of current density, either full array or tile.
- * \param jz_arr       : Array4 of current density, either full array or tile.
+ * \param jx_fab       : FArrayBox of current density, either full array or tile.
+ * \param jy_fab       : FArrayBox of current density, either full array or tile.
+ * \param jz_fab       : FArrayBox of current density, either full array or tile.
  * \param np_to_depose : Number of particles for which current is deposited.
  * \param dt           : Time step for particle level
  * \param dx           : 3D cell size
  * \param xyzmin       : Physical lower bounds of domain.
  * \param lo           : Index lower bounds of domain.
- * \param stagger_shift: 0 if nodal, 0.5 if staggered.
  * /param q            : species charge.
  */
 template <int depos_order>
@@ -35,14 +34,13 @@ void doDepositionShapeN(const amrex::ParticleReal * const xp,
                         const amrex::ParticleReal * const uyp,
                         const amrex::ParticleReal * const uzp,
                         const int * const ion_lev,
-                        const amrex::Array4<amrex::Real>& jx_arr,
-                        const amrex::Array4<amrex::Real>& jy_arr,
-                        const amrex::Array4<amrex::Real>& jz_arr,
+                        amrex::FArrayBox& jx_fab,
+                        amrex::FArrayBox& jy_fab,
+                        amrex::FArrayBox& jz_fab,
                         const long np_to_depose, const amrex::Real dt,
                         const std::array<amrex::Real,3>& dx,
-                        const std::array<amrex::Real, 3> xyzmin,
+                        const std::array<amrex::Real,3>& xyzmin,
                         const amrex::Dim3 lo,
-                        const amrex::Real stagger_shift,
                         const amrex::Real q)
 {
     // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
@@ -63,16 +61,28 @@ void doDepositionShapeN(const amrex::ParticleReal * const xp,
     const amrex::Real xmin = xyzmin[0];
     const amrex::Real ymin = xyzmin[1];
     const amrex::Real zmin = xyzmin[2];
+
     const amrex::Real clightsq = 1.0/PhysConst::c/PhysConst::c;
 
-    // Loop over particles and deposit into jx_arr, jy_arr and jz_arr
+    amrex::Array4<amrex::Real> const& jx_arr = jx_fab.array();
+    amrex::Array4<amrex::Real> const& jy_arr = jy_fab.array();
+    amrex::Array4<amrex::Real> const& jz_arr = jz_fab.array();
+    amrex::IntVect const jx_type = jx_fab.box().type();
+    amrex::IntVect const jy_type = jy_fab.box().type();
+    amrex::IntVect const jz_type = jz_fab.box().type();
+
+    constexpr int zdir = (AMREX_SPACEDIM - 1);
+    constexpr int NODE = amrex::IndexType::NODE;
+    constexpr int CELL = amrex::IndexType::CELL;
+
+    // Loop over particles and deposit into jx_fab, jy_fab and jz_fab
     amrex::ParallelFor(
         np_to_depose,
         [=] AMREX_GPU_DEVICE (long ip) {
             // --- Get particle quantities
             const amrex::Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq
-                                                     + uyp[ip]*uyp[ip]*clightsq
-                                                     + uzp[ip]*uzp[ip]*clightsq);
+                                                         + uyp[ip]*uyp[ip]*clightsq
+                                                         + uzp[ip]*uzp[ip]*clightsq);
             amrex::Real wq  = q*wp[ip];
             if (do_ionization){
                 wq *= ion_lev[ip];
@@ -108,47 +118,84 @@ void doDepositionShapeN(const amrex::ParticleReal * const xp,
             // x direction
             // Get particle position after 1/2 push back in position
 #if (defined WARPX_DIM_RZ)
-            const amrex::Real xmid = (rpmid-xmin)*dxi;
+            const amrex::Real xmid = (rpmid - xmin)*dxi;
 #else
-            const amrex::Real xmid = (xp[ip]-xmin)*dxi-dts2dx*vx;
+            const amrex::Real xmid = (xp[ip] - xmin)*dxi - dts2dx*vx;
 #endif
-            // Compute shape factors for node-centered quantities
-            amrex::Real sx [depos_order + 1];
-            // j: leftmost grid point (node-centered) that the particle touches
-            const int j  = compute_shape_factor<depos_order>(sx,  xmid);
-            // Compute shape factors for cell-centered quantities
-            amrex::Real sx0[depos_order + 1];
-            // j0: leftmost grid point (cell-centered) that the particle touches
-            const int j0 = compute_shape_factor<depos_order>(sx0, xmid-stagger_shift);
+            // j_j[xyz] leftmost grid point in x that the particle touches for the centering of each current
+            // sx_j[xyz] shape factor along x for the centering of each current
+            // There are only two possible centerings, node or cell centered, so at most only two shape factor
+            // arrays will be needed.
+            amrex::Real sx_node[depos_order + 1];
+            amrex::Real sx_cell[depos_order + 1];
+            int j_node;
+            int j_cell;
+            if (jx_type[0] == NODE || jy_type[0] == NODE || jz_type[0] == NODE) {
+                j_node = compute_shape_factor<depos_order>(sx_node, xmid);
+            }
+            if (jx_type[0] == CELL || jy_type[0] == CELL || jz_type[0] == CELL) {
+                j_cell = compute_shape_factor<depos_order>(sx_cell, xmid - 0.5);
+            }
+            const amrex::Real (&sx_jx)[depos_order + 1] = ((jx_type[0] == NODE) ? sx_node : sx_cell);
+            const amrex::Real (&sx_jy)[depos_order + 1] = ((jy_type[0] == NODE) ? sx_node : sx_cell);
+            const amrex::Real (&sx_jz)[depos_order + 1] = ((jz_type[0] == NODE) ? sx_node : sx_cell);
+            int const j_jx = ((jx_type[0] == NODE) ? j_node : j_cell);
+            int const j_jy = ((jy_type[0] == NODE) ? j_node : j_cell);
+            int const j_jz = ((jz_type[0] == NODE) ? j_node : j_cell);
 
 #if (defined WARPX_DIM_3D)
             // y direction
-            const amrex::Real ymid= (yp[ip]-ymin)*dyi-dts2dy*vy;
-            amrex::Real sy [depos_order + 1];
-            const int k  = compute_shape_factor<depos_order>(sy,  ymid);
-            amrex::Real sy0[depos_order + 1];
-            const int k0 = compute_shape_factor<depos_order>(sy0, ymid-stagger_shift);
+            const amrex::Real ymid = (yp[ip] - ymin)*dyi - dts2dy*vy;
+            amrex::Real sy_node[depos_order + 1];
+            amrex::Real sy_cell[depos_order + 1];
+            int k_node;
+            int k_cell;
+            if (jx_type[1] == NODE || jy_type[1] == NODE || jz_type[1] == NODE) {
+                k_node = compute_shape_factor<depos_order>(sy_node, ymid);
+            }
+            if (jx_type[1] == CELL || jy_type[1] == CELL || jz_type[1] == CELL) {
+                k_cell = compute_shape_factor<depos_order>(sy_cell, ymid - 0.5);
+            }
+            const amrex::Real (&sy_jx)[depos_order + 1] = ((jx_type[1] == NODE) ? sy_node : sy_cell);
+            const amrex::Real (&sy_jy)[depos_order + 1] = ((jy_type[1] == NODE) ? sy_node : sy_cell);
+            const amrex::Real (&sy_jz)[depos_order + 1] = ((jz_type[1] == NODE) ? sy_node : sy_cell);
+            int const k_jx = ((jx_type[1] == NODE) ? k_node : k_cell);
+            int const k_jy = ((jy_type[1] == NODE) ? k_node : k_cell);
+            int const k_jz = ((jz_type[1] == NODE) ? k_node : k_cell);
 #endif
+
             // z direction
-            const amrex::Real zmid= (zp[ip]-zmin)*dzi-dts2dz*vz;
-            amrex::Real sz [depos_order + 1];
-            const int l  = compute_shape_factor<depos_order>(sz,  zmid);
-            amrex::Real sz0[depos_order + 1];
-            const int l0 = compute_shape_factor<depos_order>(sz0, zmid-stagger_shift);
+            const amrex::Real zmid = (zp[ip] - zmin)*dzi - dts2dz*vz;
+            amrex::Real sz_node[depos_order + 1];
+            amrex::Real sz_cell[depos_order + 1];
+            int l_node;
+            int l_cell;
+            if (jx_type[zdir] == NODE || jy_type[zdir] == NODE || jz_type[zdir] == NODE) {
+                l_node = compute_shape_factor<depos_order>(sz_node, zmid);
+            }
+            if (jx_type[zdir] == CELL || jy_type[zdir] == CELL || jz_type[zdir] == CELL) {
+                l_cell = compute_shape_factor<depos_order>(sz_cell, zmid - 0.5);
+            }
+            const amrex::Real (&sz_jx)[depos_order + 1] = ((jx_type[zdir] == NODE) ? sz_node : sz_cell);
+            const amrex::Real (&sz_jy)[depos_order + 1] = ((jy_type[zdir] == NODE) ? sz_node : sz_cell);
+            const amrex::Real (&sz_jz)[depos_order + 1] = ((jz_type[zdir] == NODE) ? sz_node : sz_cell);
+            int const l_jx = ((jx_type[zdir] == NODE) ? l_node : l_cell);
+            int const l_jy = ((jy_type[zdir] == NODE) ? l_node : l_cell);
+            int const l_jz = ((jz_type[zdir] == NODE) ? l_node : l_cell);
 
             // Deposit current into jx_arr, jy_arr and jz_arr
 #if (defined WARPX_DIM_XZ) || (defined WARPX_DIM_RZ)
             for (int iz=0; iz<=depos_order; iz++){
                 for (int ix=0; ix<=depos_order; ix++){
                     amrex::Gpu::Atomic::Add(
-                        &jx_arr(lo.x+j0+ix, lo.y+l +iz, 0),
-                        sx0[ix]*sz [iz]*wqx);
+                        &jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 0),
+                        sx_jx[ix]*sz_jx[iz]*wqx);
                     amrex::Gpu::Atomic::Add(
-                        &jy_arr(lo.x+j +ix, lo.y+l +iz, 0),
-                        sx [ix]*sz [iz]*wqy);
+                        &jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 0),
+                        sx_jy[ix]*sz_jy[iz]*wqy);
                     amrex::Gpu::Atomic::Add(
-                        &jz_arr(lo.x+j +ix, lo.y+l0+iz, 0),
-                        sx [ix]*sz0[iz]*wqz);
+                        &jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 0),
+                        sx_jz[ix]*sz_jz[iz]*wqz);
                 }
             }
 #elif (defined WARPX_DIM_3D)
@@ -156,14 +203,14 @@ void doDepositionShapeN(const amrex::ParticleReal * const xp,
                 for (int iy=0; iy<=depos_order; iy++){
                     for (int ix=0; ix<=depos_order; ix++){
                         amrex::Gpu::Atomic::Add(
-                            &jx_arr(lo.x+j0+ix, lo.y+k +iy, lo.z+l +iz),
-                            sx0[ix]*sy [iy]*sz [iz]*wqx);
+                            &jx_arr(lo.x+j_jx+ix, lo.y+k_jx+iy, lo.z+l_jx+iz),
+                            sx_jx[ix]*sy_jx[iy]*sz_jx[iz]*wqx);
                         amrex::Gpu::Atomic::Add(
-                            &jy_arr(lo.x+j +ix, lo.y+k0+iy, lo.z+l +iz),
-                            sx [ix]*sy0[iy]*sz [iz]*wqy);
+                            &jy_arr(lo.x+j_jy+ix, lo.y+k_jy+iy, lo.z+l_jy+iz),
+                            sx_jy[ix]*sy_jy[iy]*sz_jy[iz]*wqy);
                         amrex::Gpu::Atomic::Add(
-                            &jz_arr(lo.x+j +ix, lo.y+k +iy, lo.z+l0+iz),
-                            sx [ix]*sy [iy]*sz0[iz]*wqz);
+                            &jz_arr(lo.x+j_jz+ix, lo.y+k_jz+iy, lo.z+l_jz+iz),
+                            sx_jz[ix]*sy_jz[iy]*sz_jz[iz]*wqz);
                     }
                 }
             }
diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package
index a6c124ddd..6d34fa0ef 100644
--- a/Source/Particles/Make.package
+++ b/Source/Particles/Make.package
@@ -1,4 +1,3 @@
-F90EXE_sources += deposit_cic.F90
 F90EXE_sources += interpolate_cic.F90
 F90EXE_sources += push_particles_ES.F90
 
diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp
index fca22daa2..55768c6fc 100644
--- a/Source/Particles/MultiParticleContainer.cpp
+++ b/Source/Particles/MultiParticleContainer.cpp
@@ -321,11 +321,7 @@ void
 MultiParticleContainer::Redistribute ()
 {
     for (auto& pc : allcontainers) {
-        if ( (pc->NumRuntimeRealComps()>0) || (pc->NumRuntimeIntComps()>0) ) {
-            pc->RedistributeCPU();
-        } else {
-            pc->Redistribute();
-        }
+        pc->Redistribute();
     }
 }
 
@@ -333,11 +329,7 @@ void
 MultiParticleContainer::RedistributeLocal (const int num_ghost)
 {
     for (auto& pc : allcontainers) {
-        if ( (pc->NumRuntimeRealComps()>0) || (pc->NumRuntimeIntComps()>0) ) {
-            pc->RedistributeCPU(0, 0, 0, num_ghost);
-        } else {
-            pc->Redistribute(0, 0, 0, num_ghost);
-        }
+        pc->Redistribute(0, 0, 0, num_ghost);
     }
 }
 
diff --git a/Source/Particles/ParticleCreation/CopyParticle.H b/Source/Particles/ParticleCreation/CopyParticle.H
index bd2d530fb..5e51c5283 100644
--- a/Source/Particles/ParticleCreation/CopyParticle.H
+++ b/Source/Particles/ParticleCreation/CopyParticle.H
@@ -13,7 +13,7 @@
  * Pointers to SoA data are saved when constructor is called.
  * AoS data is passed as argument of operator ().
  */
-class copyParticle
+class CopyParticle
 {
 
 public:
@@ -33,7 +33,7 @@ public:
 
     // Simple constructor
     AMREX_GPU_HOST_DEVICE
-    copyParticle(
+    CopyParticle(
         const int cpuid, const int do_back_transformed_product,
         const amrex::GpuArray<amrex::ParticleReal*,3> runtime_uold_source,
         const amrex::GpuArray<amrex::ParticleReal*,PIdx::nattribs> attribs_source,
@@ -54,7 +54,7 @@ public:
      * \param ip index of product particle
      * \param pid_product ID of first product particle
      * \param p_source Struct with data for 1 source particle (position etc.)
-     * \param p_source Struct with data for 1 product particle (position etc.)
+     * \param p_product Struct with data for 1 product particle (position etc.)
      */
     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
     void operator () (int is, int ip, int pid_product,
diff --git a/Source/Particles/ParticleCreation/ElementaryProcess.H b/Source/Particles/ParticleCreation/ElementaryProcess.H
index fa791c244..75443cb38 100644
--- a/Source/Particles/ParticleCreation/ElementaryProcess.H
+++ b/Source/Particles/ParticleCreation/ElementaryProcess.H
@@ -21,8 +21,8 @@
  * The class is templated on the process type. This gives flexibility
  * on source and product particle transformations.
  */
-template<elementaryProcessType ProcessT>
-class elementaryProcess
+template<ElementaryProcessType ProcessT>
+class ElementaryProcess
 {
 public:
 
@@ -36,16 +36,17 @@ public:
      * \param attribs_product attribs of product particles
      * \param runtime_attribs_product runtime attribs for product, to store old attribs
      */
-    copyParticle initialize_copy(
+    CopyParticle initialize_copy(
         const int cpuid, const int do_back_transformed_product,
         const amrex::GpuArray<amrex::ParticleReal*,3> runtime_uold_source,
         const amrex::GpuArray<amrex::ParticleReal*,PIdx::nattribs> attribs_source,
         const amrex::GpuArray<amrex::ParticleReal*,PIdx::nattribs> attribs_product,
         const amrex::GpuArray<amrex::ParticleReal*,6> runtime_attribs_product) const noexcept
     {
-        return copyParticle (
+        return CopyParticle(
             cpuid, do_back_transformed_product, runtime_uold_source,
-            attribs_source, attribs_product, runtime_attribs_product);
+            attribs_source, attribs_product, runtime_attribs_product
+        );
     };
 
     /**
@@ -123,7 +124,8 @@ public:
         int np_flagged = i_product[np_source-1];
         if (np_flagged == 0) return;
 
-        amrex::Vector<copyParticle> v_copy_functor;
+        amrex::Gpu::ManagedDeviceVector<CopyParticle> v_copy_functor;
+        v_copy_functor.reserve(nproducts);
         amrex::Gpu::ManagedDeviceVector<int> v_pid_product(nproducts);
         amrex::Gpu::ManagedDeviceVector<WarpXParticleContainer::ParticleType*> v_particles_product(nproducts);
         for (int iproduct=0; iproduct<nproducts; iproduct++){
@@ -172,12 +174,13 @@ public:
             const int cpuid = amrex::ParallelDescriptor::MyProc();
 
             // Create instance of copy functor, and add it to the vector
-            v_copy_functor.push_back (initialize_copy(
-                                          cpuid, v_do_back_transformed_product[iproduct],
-                                          runtime_uold_source,
-                                          attribs_source,
-                                          attribs_product,
-                                          runtime_attribs_product) );
+            v_copy_functor.push_back( initialize_copy(
+                cpuid, v_do_back_transformed_product[iproduct],
+                runtime_uold_source,
+                attribs_source,
+                attribs_product,
+                runtime_attribs_product
+            ) );
             v_pid_product[iproduct] = pid_product;
 
         }
@@ -212,12 +215,12 @@ public:
         amrex::Gpu::ManagedDeviceVector<int> v_pid_product,
         amrex::Gpu::ManagedDeviceVector<WarpXParticleContainer::ParticleType*> v_particles_product,
         WarpXParticleContainer::ParticleType* particles_source,
-        const amrex::Vector<copyParticle>& v_copy_functor,
+        const amrex::Gpu::ManagedDeviceVector<CopyParticle>& v_copy_functor,
         amrex::GpuArray<int*,1> runtime_iattribs_source)
     {
         int const * const AMREX_RESTRICT p_is_flagged = is_flagged.dataPtr();
         int const * const AMREX_RESTRICT p_i_product = i_product.dataPtr();
-        copyParticle const * const p_copy_functor = v_copy_functor.data();
+        CopyParticle const * const AMREX_RESTRICT p_copy_functor = v_copy_functor.dataPtr();
         WarpXParticleContainer::ParticleType * * p_particles_product = v_particles_product.data();
         int const * const p_pid_product = v_pid_product.data();
 
@@ -251,7 +254,7 @@ public:
 };
 
 // Derived class for ionization process
-class IonizationProcess: public elementaryProcess<elementaryProcessType::Ionization>
+class IonizationProcess: public ElementaryProcess<ElementaryProcessType::Ionization>
 {};
 
 #endif // ELEMENTARYPROCESS_H_
diff --git a/Source/Particles/ParticleCreation/TransformParticle.H b/Source/Particles/ParticleCreation/TransformParticle.H
index 14e534d0e..1b71df723 100644
--- a/Source/Particles/ParticleCreation/TransformParticle.H
+++ b/Source/Particles/ParticleCreation/TransformParticle.H
@@ -3,7 +3,7 @@
 
 #include "WarpXParticleContainer.H"
 
-enum struct elementaryProcessType { Ionization };
+enum struct ElementaryProcessType { Ionization };
 
 /**
  * \brief small modifications on source particle
@@ -11,7 +11,7 @@ enum struct elementaryProcessType { Ionization };
  * \param particle particle struct
  * \param runtime_iattribs integer attribs
  */
-template < elementaryProcessType ProcessT >
+template < ElementaryProcessType ProcessT >
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 void transformSourceParticle(
     int i,
@@ -23,7 +23,7 @@ void transformSourceParticle(
  * \param i index of particle
  * \param particle particle struct
  */
-template < elementaryProcessType ProcessT >
+template < ElementaryProcessType ProcessT >
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 void transformProductParticle(
     int i,
@@ -32,7 +32,7 @@ void transformProductParticle(
 // For ionization, increase ionization level of source particle
 template <>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void transformSourceParticle < elementaryProcessType::Ionization > (
+void transformSourceParticle < ElementaryProcessType::Ionization > (
     int i,
     WarpXParticleContainer::ParticleType& particle,
     amrex::GpuArray<int*,1> runtime_iattribs)
diff --git a/Source/Particles/PhotonParticleContainer.H b/Source/Particles/PhotonParticleContainer.H
index 9132cf4a9..851726d57 100644
--- a/Source/Particles/PhotonParticleContainer.H
+++ b/Source/Particles/PhotonParticleContainer.H
@@ -41,9 +41,9 @@ public:
                          DtType a_dt_type=DtType::Full) override;
 
     virtual void PushPX(WarpXParIter& pti,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& xp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& yp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& zp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& xp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& yp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& zp,
                         amrex::Real dt, DtType a_dt_type=DtType::Full) override;
 
     // Don't push momenta for photons
diff --git a/Source/Particles/PhotonParticleContainer.cpp b/Source/Particles/PhotonParticleContainer.cpp
index 7e52b52e1..fd47ac8a0 100644
--- a/Source/Particles/PhotonParticleContainer.cpp
+++ b/Source/Particles/PhotonParticleContainer.cpp
@@ -51,9 +51,9 @@ void PhotonParticleContainer::InitData()
 
 void
 PhotonParticleContainer::PushPX(WarpXParIter& pti,
-                                Cuda::ManagedDeviceVector<ParticleReal>& xp,
-                                Cuda::ManagedDeviceVector<ParticleReal>& yp,
-                                Cuda::ManagedDeviceVector<ParticleReal>& zp,
+                                Gpu::ManagedDeviceVector<ParticleReal>& xp,
+                                Gpu::ManagedDeviceVector<ParticleReal>& yp,
+                                Gpu::ManagedDeviceVector<ParticleReal>& zp,
                                 Real dt, DtType a_dt_type)
 {
 
diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H
index 17a504719..174488eb9 100644
--- a/Source/Particles/PhysicalParticleContainer.H
+++ b/Source/Particles/PhysicalParticleContainer.H
@@ -94,9 +94,9 @@ public:
                          DtType a_dt_type=DtType::Full) override;
 
     virtual void PushPX(WarpXParIter& pti,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& xp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& yp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& zp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& xp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& yp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& zp,
                         amrex::Real dt, DtType a_dt_type=DtType::Full);
 
     virtual void PushP (int lev, amrex::Real dt,
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 6c2aef8a6..cb2a7c0e8 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -63,16 +63,6 @@ PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int isp
         "Radiation reaction can be enabled only if Boris pusher is used");
     //_____________________________
 
-#ifdef AMREX_USE_GPU
-    Print()<<"\n-----------------------------------------------------\n";
-    Print()<<"WARNING: field ionization on GPU uses RedistributeCPU\n";
-    Print()<<"-----------------------------------------------------\n\n";
-    //AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
-        //do_field_ionization == 0,
-        //"Field ionization does not work on GPU so far, because the current "
-        //"version of Redistribute in AMReX does not work with runtime parameters");
-#endif
-
 #ifdef WARPX_QED
     //Add real component if QED is enabled
     pp.query("do_qed", m_do_qed);
@@ -1407,7 +1397,7 @@ PhysicalParticleContainer::SplitParticles(int lev)
 {
     auto& mypc = WarpX::GetInstance().GetPartContainer();
     auto& pctmp_split = mypc.GetPCtmp();
-    Cuda::ManagedDeviceVector<ParticleReal> xp, yp, zp;
+    Gpu::ManagedDeviceVector<ParticleReal> xp, yp, zp;
     RealVector psplit_x, psplit_y, psplit_z, psplit_w;
     RealVector psplit_ux, psplit_uy, psplit_uz;
     long np_split_to_add = 0;
@@ -1565,9 +1555,9 @@ PhysicalParticleContainer::SplitParticles(int lev)
 
 void
 PhysicalParticleContainer::PushPX(WarpXParIter& pti,
-                                  Cuda::ManagedDeviceVector<ParticleReal>& xp,
-                                  Cuda::ManagedDeviceVector<ParticleReal>& yp,
-                                  Cuda::ManagedDeviceVector<ParticleReal>& zp,
+                                  Gpu::ManagedDeviceVector<ParticleReal>& xp,
+                                  Gpu::ManagedDeviceVector<ParticleReal>& yp,
+                                  Gpu::ManagedDeviceVector<ParticleReal>& zp,
                                   Real dt, DtType a_dt_type)
 {
 
diff --git a/Source/Particles/RigidInjectedParticleContainer.H b/Source/Particles/RigidInjectedParticleContainer.H
index 3abbb4afe..a2473c5ad 100644
--- a/Source/Particles/RigidInjectedParticleContainer.H
+++ b/Source/Particles/RigidInjectedParticleContainer.H
@@ -44,9 +44,9 @@ public:
                          DtType a_dt_type=DtType::Full) override;
 
     virtual void PushPX(WarpXParIter& pti,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& xp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& yp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& zp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& xp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& yp,
+                        amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& zp,
                         amrex::Real dt, DtType a_dt_type=DtType::Full) override;
 
     virtual void PushP (int lev, amrex::Real dt,
diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp
index 507206041..bee71fba1 100644
--- a/Source/Particles/RigidInjectedParticleContainer.cpp
+++ b/Source/Particles/RigidInjectedParticleContainer.cpp
@@ -78,7 +78,7 @@ RigidInjectedParticleContainer::RemapParticles()
                 // Note that the particles are already in the boosted frame.
                 // This value is saved to advance the particles not injected yet
 
-                Cuda::ManagedDeviceVector<ParticleReal> xp, yp, zp;
+                Gpu::ManagedDeviceVector<ParticleReal> xp, yp, zp;
 
                 for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
                 {
@@ -138,7 +138,7 @@ RigidInjectedParticleContainer::BoostandRemapParticles()
 #pragma omp parallel
 #endif
     {
-        Cuda::ManagedDeviceVector<ParticleReal> xp, yp, zp;
+        Gpu::ManagedDeviceVector<ParticleReal> xp, yp, zp;
 
         for (WarpXParIter pti(*this, 0); pti.isValid(); ++pti)
         {
@@ -209,9 +209,9 @@ RigidInjectedParticleContainer::BoostandRemapParticles()
 
 void
 RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
-                                       Cuda::ManagedDeviceVector<ParticleReal>& xp,
-                                       Cuda::ManagedDeviceVector<ParticleReal>& yp,
-                                       Cuda::ManagedDeviceVector<ParticleReal>& zp,
+                                       Gpu::ManagedDeviceVector<ParticleReal>& xp,
+                                       Gpu::ManagedDeviceVector<ParticleReal>& yp,
+                                       Gpu::ManagedDeviceVector<ParticleReal>& zp,
                                        Real dt, DtType a_dt_type)
 {
 
@@ -222,7 +222,7 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
     auto& uzp = attribs[PIdx::uz];
 
     // Save the position and momenta, making copies
-    Cuda::ManagedDeviceVector<ParticleReal> xp_save, yp_save, zp_save;
+    Gpu::ManagedDeviceVector<ParticleReal> xp_save, yp_save, zp_save;
     RealVector uxp_save, uyp_save, uzp_save;
 
     ParticleReal* const AMREX_RESTRICT x = xp.dataPtr();
diff --git a/Source/Particles/Sorting/SortingUtils.H b/Source/Particles/Sorting/SortingUtils.H
index 28a1b73b7..80eaaf9cb 100644
--- a/Source/Particles/Sorting/SortingUtils.H
+++ b/Source/Particles/Sorting/SortingUtils.H
@@ -2,7 +2,6 @@
 #define WARPX_PARTICLES_SORTING_SORTINGUTILS_H_
 
 #include <WarpXParticleContainer.H>
-#include <AMReX_CudaContainers.H>
 #include <AMReX_Gpu.H>
 #ifdef AMREX_USE_GPU
     #include <thrust/partition.h>
@@ -43,7 +42,7 @@ ForwardIterator stablePartition(ForwardIterator const index_begin,
     // On GPU: Use thrust
     int const* AMREX_RESTRICT predicate_ptr = predicate.dataPtr();
     ForwardIterator const sep = thrust::stable_partition(
-        thrust::cuda::par(amrex::Cuda::The_ThrustCachedAllocator()),
+        thrust::cuda::par(amrex::Gpu::The_ThrustCachedAllocator()),
         index_begin, index_end,
         [predicate_ptr] AMREX_GPU_DEVICE (long i) { return predicate_ptr[i]; }
     );
diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H
index cdfe112af..7da25b452 100644
--- a/Source/Particles/WarpXParticleContainer.H
+++ b/Source/Particles/WarpXParticleContainer.H
@@ -73,12 +73,12 @@ public:
     WarpXParIter (ContainerType& pc, int level);
 
 #if (AMREX_SPACEDIM == 2)
-    void GetPosition (amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& x,
-                      amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& y,
-                      amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& z) const;
-    void SetPosition (const amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& x,
-                      const amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& y,
-                      const amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& z);
+    void GetPosition (amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& x,
+                      amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& y,
+                      amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& z) const;
+    void SetPosition (const amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& x,
+                      const amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& y,
+                      const amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& z);
 #endif
     const std::array<RealVector, PIdx::nattribs>& GetAttribs () const {
         return GetStructOfArrays().GetRealData();
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 4be339707..01a2e5ccb 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -4,6 +4,7 @@
 #include <MultiParticleContainer.H>
 #include <WarpXParticleContainer.H>
 #include <AMReX_AmrParGDB.H>
+#include <WarpXComm.H>
 #include <WarpX_f.H>
 #include <WarpX.H>
 #include <WarpXAlgorithmSelection.H>
@@ -272,9 +273,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
 
     const long ngJ = jx->nGrow();
     const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0));
-    int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal();
     Real q = this->charge;
-    const Real stagger_shift = j_is_nodal ? 0.0 : 0.5;
 
     BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
     BL_PROFILE_VAR_NS("PPC::CurrentDeposition", blp_deposit);
@@ -300,6 +299,9 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
 #ifdef AMREX_USE_GPU
     // No tiling on GPU: jx_ptr points to the full
     // jx array (same for jy_ptr and jz_ptr).
+    auto & jx_fab = jx->get(pti);
+    auto & jy_fab = jy->get(pti);
+    auto & jz_fab = jz->get(pti);
     Array4<Real> const& jx_arr = jx->array(pti);
     Array4<Real> const& jy_arr = jy->array(pti);
     Array4<Real> const& jz_arr = jz->array(pti);
@@ -319,6 +321,9 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
     local_jy[thread_num].setVal(0.0);
     local_jz[thread_num].setVal(0.0);
 
+    auto & jx_fab = local_jx[thread_num];
+    auto & jy_fab = local_jy[thread_num];
+    auto & jz_fab = local_jz[thread_num];
     Array4<Real> const& jx_arr = local_jx[thread_num].array();
     Array4<Real> const& jy_arr = local_jy[thread_num].array();
     Array4<Real> const& jz_arr = local_jz[thread_num].array();
@@ -333,13 +338,8 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
 
     // Lower corner of tile box physical domain
     // Note that this includes guard cells since it is after tilebox.ngrow
-    const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);
-    // xyzmin is built on pti.tilebox(), so it does
-    // not include staggering, so the stagger_shift has to be done by hand.
-    // Alternatively, we could define xyzminx from tbx (and the same for 3
-    // directions and for jx, jy, jz). This way, sx0 would not be needed.
-    // Better for memory? worth trying?
     const Dim3 lo = lbound(tilebox);
+    const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);
 
     BL_PROFILE_VAR_START(blp_deposit);
     if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) {
@@ -367,20 +367,20 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
             doDepositionShapeN<1>(
                 xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset,
                 uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
-                jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo,
-                stagger_shift, q);
+                jx_fab, jy_fab, jz_fab, np_to_depose, dt, dx,
+                xyzmin, lo, q);
         } else if (WarpX::nox == 2){
             doDepositionShapeN<2>(
                 xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset,
                 uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
-                jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo,
-                stagger_shift, q);
+                jx_fab, jy_fab, jz_fab, np_to_depose, dt, dx,
+                xyzmin, lo, q);
         } else if (WarpX::nox == 3){
             doDepositionShapeN<3>(
                 xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset,
                 uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
-                jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo,
-                stagger_shift, q);
+                jx_fab, jy_fab, jz_fab, np_to_depose, dt, dx,
+                xyzmin, lo, q);
         }
     }
     BL_PROFILE_VAR_STOP(blp_deposit);
@@ -540,7 +540,7 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho,
 #endif
 
 #ifdef WARPX_DIM_RZ
-        WarpX::GetInstance().ApplyInverseVolumeScalingToChargeDensity(rho[lev].get(), lev);
+        if (reset) WarpX::GetInstance().ApplyInverseVolumeScalingToChargeDensity(rho[lev].get(), lev);
 #endif
 
         // Exchange guard cells
diff --git a/Source/Particles/deposit_cic.F90 b/Source/Particles/deposit_cic.F90
deleted file mode 100644
index 2831ce96b..000000000
--- a/Source/Particles/deposit_cic.F90
+++ /dev/null
@@ -1,134 +0,0 @@
-module warpx_ES_deposit_cic
-
-  use iso_c_binding
-  use amrex_fort_module, only : amrex_real, amrex_particle_real
-
-  implicit none
-
-contains
-
-! This routine computes the charge density due to the particles using cloud-in-cell
-! deposition. The Fab rho is assumed to be node-centered.
-!
-! Arguments:
-!     particles : a pointer to the particle array-of-structs
-!     ns        : the stride length of particle struct (the size of the struct in number of reals)
-!     np        : the number of particles
-!     weights   : the particle weights
-!     charge    : the charge of this particle species
-!     rho       : a Fab that will contain the charge density on exit
-!     lo        : a pointer to the lo corner of this valid box for rho, in index space
-!     hi        : a pointer to the hi corner of this valid box for rho, in index space
-!     plo       : the real position of the left-hand corner of the problem domain
-!     dx        : the mesh spacing
-!     ng        : the number of ghost cells in rho
-!
-  subroutine warpx_deposit_cic_3d(particles, ns, np,                     &
-                                  weights, charge, rho, lo, hi, plo, dx, &
-                                  ng)                                    &
-       bind(c,name='warpx_deposit_cic_3d')
-    integer, value,   intent(in)     :: ns, np
-    real(amrex_particle_real), intent(in)     :: particles(ns,np)
-    real(amrex_particle_real), intent(in)     :: weights(np)
-    real(amrex_real), intent(in)     :: charge
-    integer,          intent(in)     :: lo(3)
-    integer,          intent(in)     :: hi(3)
-    integer,          intent(in)     :: ng
-    real(amrex_real), intent(inout)  :: rho(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng)
-    real(amrex_real), intent(in)     :: plo(3)
-    real(amrex_real), intent(in)     :: dx(3)
-
-    integer i, j, k, n
-    real(amrex_real) wx_lo, wy_lo, wz_lo, wx_hi, wy_hi, wz_hi
-    real(amrex_real) lx, ly, lz
-    real(amrex_real) inv_dx(3)
-    real(amrex_real) qp, inv_vol
-
-    inv_dx = 1.0d0/dx
-
-    inv_vol = inv_dx(1) * inv_dx(2) * inv_dx(3)
-
-    do n = 1, np
-
-       qp = weights(n) * charge * inv_vol
-
-       lx = (particles(1, n) - plo(1))*inv_dx(1)
-       ly = (particles(2, n) - plo(2))*inv_dx(2)
-       lz = (particles(3, n) - plo(3))*inv_dx(3)
-
-       i = floor(lx)
-       j = floor(ly)
-       k = floor(lz)
-
-       wx_hi = lx - i
-       wy_hi = ly - j
-       wz_hi = lz - k
-
-       wx_lo = 1.0d0 - wx_hi
-       wy_lo = 1.0d0 - wy_hi
-       wz_lo = 1.0d0 - wz_hi
-
-       rho(i,   j,   k  ) = rho(i,   j,   k  ) + wx_lo*wy_lo*wz_lo*qp
-       rho(i,   j,   k+1) = rho(i,   j,   k+1) + wx_lo*wy_lo*wz_hi*qp
-       rho(i,   j+1, k  ) = rho(i,   j+1, k  ) + wx_lo*wy_hi*wz_lo*qp
-       rho(i,   j+1, k+1) = rho(i,   j+1, k+1) + wx_lo*wy_hi*wz_hi*qp
-       rho(i+1, j,   k  ) = rho(i+1, j,   k  ) + wx_hi*wy_lo*wz_lo*qp
-       rho(i+1, j,   k+1) = rho(i+1, j,   k+1) + wx_hi*wy_lo*wz_hi*qp
-       rho(i+1, j+1, k  ) = rho(i+1, j+1, k  ) + wx_hi*wy_hi*wz_lo*qp
-       rho(i+1, j+1, k+1) = rho(i+1, j+1, k+1) + wx_hi*wy_hi*wz_hi*qp
-
-    end do
-
-  end subroutine warpx_deposit_cic_3d
-
-  subroutine warpx_deposit_cic_2d(particles, ns, np,                     &
-                                  weights, charge, rho, lo, hi, plo, dx, &
-                                  ng)                                    &
-       bind(c,name='warpx_deposit_cic_2d')
-    integer, value,   intent(in)     :: ns, np
-    real(amrex_particle_real), intent(in)     :: particles(ns,np)
-    real(amrex_particle_real), intent(in)     :: weights(np)
-    real(amrex_real), intent(in)     :: charge
-    integer,          intent(in)     :: lo(2)
-    integer,          intent(in)     :: hi(2)
-    integer,          intent(in)     :: ng
-    real(amrex_real), intent(inout)  :: rho(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng)
-    real(amrex_real), intent(in)     :: plo(2)
-    real(amrex_real), intent(in)     :: dx(2)
-
-    integer i, j, n
-    real(amrex_real) wx_lo, wy_lo, wx_hi, wy_hi
-    real(amrex_real) lx, ly
-    real(amrex_real) inv_dx(2)
-    real(amrex_real) qp, inv_vol
-
-    inv_dx = 1.0d0/dx
-
-    inv_vol = inv_dx(1) * inv_dx(2)
-
-    do n = 1, np
-
-       qp = weights(n) * charge * inv_vol
-
-       lx = (particles(1, n) - plo(1))*inv_dx(1)
-       ly = (particles(2, n) - plo(2))*inv_dx(2)
-
-       i = floor(lx)
-       j = floor(ly)
-
-       wx_hi = lx - i
-       wy_hi = ly - j
-
-       wx_lo = 1.0d0 - wx_hi
-       wy_lo = 1.0d0 - wy_hi
-
-       rho(i,   j  ) = rho(i,   j  ) + wx_lo*wy_lo*qp
-       rho(i,   j+1) = rho(i,   j+1) + wx_lo*wy_hi*qp
-       rho(i+1, j  ) = rho(i+1, j  ) + wx_hi*wy_lo*qp
-       rho(i+1, j+1) = rho(i+1, j+1) + wx_hi*wy_hi*qp
-
-    end do
-
-  end subroutine warpx_deposit_cic_2d
-
-end module warpx_ES_deposit_cic
diff --git a/Source/Utils/utils_ES.F90 b/Source/Utils/utils_ES.F90
index ce143bb94..baadeb7af 100644
--- a/Source/Utils/utils_ES.F90
+++ b/Source/Utils/utils_ES.F90
@@ -7,79 +7,6 @@ module warpx_ES_utils
 
 contains
 
-  subroutine warpx_sum_fine_to_crse_nodal_3d (lo, hi, lrat, crse, clo, chi, fine, flo, fhi) &
-       bind(c, name="warpx_sum_fine_to_crse_nodal_3d")
-
-    integer, intent(in)             ::   lo(3),  hi(3)
-    integer, intent(in)             ::  clo(3), chi(3)
-    integer, intent(in)             ::  flo(3), fhi(3)
-    integer, intent(in)             ::  lrat(3)
-    real(amrex_real), intent(inout) :: crse(clo(1):chi(1),clo(2):chi(2),clo(3):chi(3))
-    real(amrex_real), intent(in)    :: fine(flo(1):fhi(1),flo(2):fhi(2),flo(3):fhi(3))
-
-    integer :: i, j, k, ii, jj, kk
-
-    do k        = lo(3), hi(3)
-       kk       = k * lrat(3)
-       do j     = lo(2), hi(2)
-          jj    = j * lrat(2)
-          do i  = lo(1), hi(1)
-             ii = i * lrat(1)
-             crse(i,j,k)  =  fine(ii,jj,kk)                              + &
-! These six fine nodes are shared by two coarse nodes...
-                  0.5d0   * (fine(ii-1,jj,kk)     + fine(ii+1,jj,kk)     + &
-                             fine(ii,jj-1,kk)     + fine(ii,jj+1,kk)     + &
-                             fine(ii,jj,kk-1)     + fine(ii,jj,kk+1))    + &
-! ... these twelve are shared by four...
-                  0.25d0  * (fine(ii,jj-1,kk-1)   + fine(ii,jj+1,kk-1)   + &
-                             fine(ii,jj-1,kk+1)   + fine(ii,jj+1,kk+1)   + &
-                             fine(ii-1,jj,kk-1)   + fine(ii+1,jj,kk-1)   + &
-                             fine(ii-1,jj,kk+1)   + fine(ii+1,jj,kk+1)   + &
-                             fine(ii-1,jj-1,kk)   + fine(ii+1,jj-1,kk)   + &
-                             fine(ii-1,jj+1,kk)   + fine(ii+1,jj+1,kk))  + &
-! ... and these eight are shared by eight
-                  0.125d0 * (fine(ii-1,jj-1,kk-1) + fine(ii-1,jj-1,kk+1) + &
-                             fine(ii-1,jj+1,kk-1) + fine(ii-1,jj+1,kk+1) + &
-                             fine(ii+1,jj-1,kk-1) + fine(ii+1,jj-1,kk+1) + &
-                             fine(ii+1,jj+1,kk-1) + fine(ii+1,jj+1,kk+1))
-! ... note that we have 27 nodes in total...
-             crse(i,j,k) = crse(i,j,k) / 8.d0
-          end do
-       end do
-    end do
-
-  end subroutine warpx_sum_fine_to_crse_nodal_3d
-
-  subroutine warpx_sum_fine_to_crse_nodal_2d (lo, hi, lrat, crse, clo, chi, fine, flo, fhi) &
-       bind(c, name="warpx_sum_fine_to_crse_nodal_2d")
-
-    integer, intent(in)             ::   lo(2),  hi(2)
-    integer, intent(in)             ::  clo(2), chi(2)
-    integer, intent(in)             ::  flo(2), fhi(2)
-    integer, intent(in)             ::  lrat(2)
-    real(amrex_real), intent(inout) :: crse(clo(1):chi(1),clo(2):chi(2))
-    real(amrex_real), intent(in)    :: fine(flo(1):fhi(1),flo(2):fhi(2))
-
-    integer :: i, j, ii, jj
-
-    do j     = lo(2), hi(2)
-       jj    = j * lrat(2)
-       do i  = lo(1), hi(1)
-          ii = i * lrat(1)
-          crse(i,j)  =  fine(ii,jj)                             + &
-! These four fine nodes are shared by two coarse nodes...
-               0.5d0   * (fine(ii-1,jj)     + fine(ii+1,jj)     + &
-               fine(ii,jj-1)     + fine(ii,jj+1))               + &
-! ... and these four are shared by four...
-               0.25d0  * (fine(ii-1,jj-1)   + fine(ii-1,jj+1)   + &
-               fine(ii-1,jj+1)   + fine(ii+1,jj+1))
-! ... note that we have 9 nodes in total...
-             crse(i,j) = crse(i,j) / 4.d0
-       end do
-    end do
-
-  end subroutine warpx_sum_fine_to_crse_nodal_2d
-
   subroutine warpx_zero_out_bndry_3d (lo, hi, input_data, bndry_data, mask) &
        bind(c,name='warpx_zero_out_bndry_3d')
 
diff --git a/Source/WarpX.H b/Source/WarpX.H
index d17787c57..2bf3a8479 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -250,6 +250,9 @@ public:
     static amrex::RealBox getRealBox(const amrex::Box& bx, int lev);
     static std::array<amrex::Real,3> LowerCorner (const amrex::Box& bx, int lev);
     static std::array<amrex::Real,3> UpperCorner (const amrex::Box& bx, int lev);
+    // Returns the locations of the lower corner of the box, shifted up
+    // a half cell if cell centered.
+    static std::array<amrex::Real,3> LowerCornerWithCentering (const amrex::Box& bx, int lev);
 
     static amrex::IntVect RefRatio (int lev);
 
@@ -367,9 +370,6 @@ private:
     void zeroOutBoundary(amrex::MultiFab& input_data, amrex::MultiFab& bndry_data,
                          const amrex::FabArray<amrex::BaseFab<int> >& mask) const;
 
-    void sumFineToCrseNodal(const amrex::MultiFab& fine, amrex::MultiFab& crse,
-                            const amrex::Geometry& cgeom, const amrex::IntVect& ratio);
-
     void fixRHSForSolve(amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rhs,
                         const amrex::Vector<std::unique_ptr<amrex::FabArray<amrex::BaseFab<int> > > >& masks) const ;
 
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index eef033236..b04bbb380 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -1062,6 +1062,21 @@ WarpX::UpperCorner(const Box& bx, int lev)
 #endif
 }
 
+std::array<Real,3>
+WarpX::LowerCornerWithCentering(const Box& bx, int lev)
+{
+    std::array<Real,3> corner = LowerCorner(bx, lev);
+    std::array<Real,3> dx = CellSize(lev);
+    if (!bx.type(0)) corner[0] += 0.5*dx[0];
+#if (AMREX_SPACEDIM == 3)
+    if (!bx.type(1)) corner[1] += 0.5*dx[1];
+    if (!bx.type(2)) corner[2] += 0.5*dx[2];
+#else
+    if (!bx.type(1)) corner[2] += 0.5*dx[2];
+#endif
+    return corner;
+}
+
 IntVect
 WarpX::RefRatio (int lev)
 {