4 files changed, 122 insertions, 58 deletions
diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H
index 2737eb008..c1502e311 100644
--- a/Source/Particles/Deposition/CurrentDeposition.H
+++ b/Source/Particles/Deposition/CurrentDeposition.H
@@ -15,15 +15,14 @@
                          required to have the charge of each macroparticle
                          since q is a scalar. For non-ionizable species,
                          ion_lev is a null pointer.
- * \param jx_arr       : Array4 of current density, either full array or tile.
- * \param jy_arr       : Array4 of current density, either full array or tile.
- * \param jz_arr       : Array4 of current density, either full array or tile.
+ * \param jx_fab       : FArrayBox of current density, either full array or tile.
+ * \param jy_fab       : FArrayBox of current density, either full array or tile.
+ * \param jz_fab       : FArrayBox of current density, either full array or tile.
  * \param np_to_depose : Number of particles for which current is deposited.
  * \param dt           : Time step for particle level
  * \param dx           : 3D cell size
  * \param xyzmin       : Physical lower bounds of domain.
  * \param lo           : Index lower bounds of domain.
- * \param stagger_shift: 0 if nodal, 0.5 if staggered.
  * /param q            : species charge.
  */
 template <int depos_order>
@@ -35,14 +34,13 @@ void doDepositionShapeN(const amrex::ParticleReal * const xp,
                         const amrex::ParticleReal * const uyp,
                         const amrex::ParticleReal * const uzp,
                         const int * const ion_lev,
-                        const amrex::Array4<amrex::Real>& jx_arr,
-                        const amrex::Array4<amrex::Real>& jy_arr,
-                        const amrex::Array4<amrex::Real>& jz_arr,
+                        amrex::FArrayBox& jx_fab,
+                        amrex::FArrayBox& jy_fab,
+                        amrex::FArrayBox& jz_fab,
                         const long np_to_depose, const amrex::Real dt,
                         const std::array<amrex::Real,3>& dx,
-                        const std::array<amrex::Real, 3> xyzmin,
+                        const std::array<amrex::Real,3>& xyzmin,
                         const amrex::Dim3 lo,
-                        const amrex::Real stagger_shift,
                         const amrex::Real q)
 {
     // Whether ion_lev is a null pointer (do_ionization=0) or a real pointer
@@ -63,16 +61,28 @@ void doDepositionShapeN(const amrex::ParticleReal * const xp,
     const amrex::Real xmin = xyzmin[0];
     const amrex::Real ymin = xyzmin[1];
     const amrex::Real zmin = xyzmin[2];
+
     const amrex::Real clightsq = 1.0/PhysConst::c/PhysConst::c;
 
-    // Loop over particles and deposit into jx_arr, jy_arr and jz_arr
+    amrex::Array4<amrex::Real> const& jx_arr = jx_fab.array();
+    amrex::Array4<amrex::Real> const& jy_arr = jy_fab.array();
+    amrex::Array4<amrex::Real> const& jz_arr = jz_fab.array();
+    amrex::IntVect const jx_type = jx_fab.box().type();
+    amrex::IntVect const jy_type = jy_fab.box().type();
+    amrex::IntVect const jz_type = jz_fab.box().type();
+
+    constexpr int zdir = (AMREX_SPACEDIM - 1);
+    constexpr int NODE = amrex::IndexType::NODE;
+    constexpr int CELL = amrex::IndexType::CELL;
+
+    // Loop over particles and deposit into jx_fab, jy_fab and jz_fab
     amrex::ParallelFor(
         np_to_depose,
         [=] AMREX_GPU_DEVICE (long ip) {
             // --- Get particle quantities
             const amrex::Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq
-                                                     + uyp[ip]*uyp[ip]*clightsq
-                                                     + uzp[ip]*uzp[ip]*clightsq);
+                                                         + uyp[ip]*uyp[ip]*clightsq
+                                                         + uzp[ip]*uzp[ip]*clightsq);
             amrex::Real wq  = q*wp[ip];
             if (do_ionization){
                 wq *= ion_lev[ip];
@@ -108,47 +118,84 @@ void doDepositionShapeN(const amrex::ParticleReal * const xp,
             // x direction
             // Get particle position after 1/2 push back in position
 #if (defined WARPX_DIM_RZ)
-            const amrex::Real xmid = (rpmid-xmin)*dxi;
+            const amrex::Real xmid = (rpmid - xmin)*dxi;
 #else
-            const amrex::Real xmid = (xp[ip]-xmin)*dxi-dts2dx*vx;
+            const amrex::Real xmid = (xp[ip] - xmin)*dxi - dts2dx*vx;
 #endif
-            // Compute shape factors for node-centered quantities
-            amrex::Real sx [depos_order + 1];
-            // j: leftmost grid point (node-centered) that the particle touches
-            const int j  = compute_shape_factor<depos_order>(sx,  xmid);
-            // Compute shape factors for cell-centered quantities
-            amrex::Real sx0[depos_order + 1];
-            // j0: leftmost grid point (cell-centered) that the particle touches
-            const int j0 = compute_shape_factor<depos_order>(sx0, xmid-stagger_shift);
+            // j_j[xyz] leftmost grid point in x that the particle touches for the centering of each current
+            // sx_j[xyz] shape factor along x for the centering of each current
+            // There are only two possible centerings, node or cell centered, so at most only two shape factor
+            // arrays will be needed.
+            amrex::Real sx_node[depos_order + 1];
+            amrex::Real sx_cell[depos_order + 1];
+            int j_node;
+            int j_cell;
+            if (jx_type[0] == NODE || jy_type[0] == NODE || jz_type[0] == NODE) {
+                j_node = compute_shape_factor<depos_order>(sx_node, xmid);
+            }
+            if (jx_type[0] == CELL || jy_type[0] == CELL || jz_type[0] == CELL) {
+                j_cell = compute_shape_factor<depos_order>(sx_cell, xmid - 0.5);
+            }
+            const amrex::Real (&sx_jx)[depos_order + 1] = ((jx_type[0] == NODE) ? sx_node : sx_cell);
+            const amrex::Real (&sx_jy)[depos_order + 1] = ((jy_type[0] == NODE) ? sx_node : sx_cell);
+            const amrex::Real (&sx_jz)[depos_order + 1] = ((jz_type[0] == NODE) ? sx_node : sx_cell);
+            int const j_jx = ((jx_type[0] == NODE) ? j_node : j_cell);
+            int const j_jy = ((jy_type[0] == NODE) ? j_node : j_cell);
+            int const j_jz = ((jz_type[0] == NODE) ? j_node : j_cell);
 
 #if (defined WARPX_DIM_3D)
             // y direction
-            const amrex::Real ymid= (yp[ip]-ymin)*dyi-dts2dy*vy;
-            amrex::Real sy [depos_order + 1];
-            const int k  = compute_shape_factor<depos_order>(sy,  ymid);
-            amrex::Real sy0[depos_order + 1];
-            const int k0 = compute_shape_factor<depos_order>(sy0, ymid-stagger_shift);
+            const amrex::Real ymid = (yp[ip] - ymin)*dyi - dts2dy*vy;
+            amrex::Real sy_node[depos_order + 1];
+            amrex::Real sy_cell[depos_order + 1];
+            int k_node;
+            int k_cell;
+            if (jx_type[1] == NODE || jy_type[1] == NODE || jz_type[1] == NODE) {
+                k_node = compute_shape_factor<depos_order>(sy_node, ymid);
+            }
+            if (jx_type[1] == CELL || jy_type[1] == CELL || jz_type[1] == CELL) {
+                k_cell = compute_shape_factor<depos_order>(sy_cell, ymid - 0.5);
+            }
+            const amrex::Real (&sy_jx)[depos_order + 1] = ((jx_type[1] == NODE) ? sy_node : sy_cell);
+            const amrex::Real (&sy_jy)[depos_order + 1] = ((jy_type[1] == NODE) ? sy_node : sy_cell);
+            const amrex::Real (&sy_jz)[depos_order + 1] = ((jz_type[1] == NODE) ? sy_node : sy_cell);
+            int const k_jx = ((jx_type[1] == NODE) ? k_node : k_cell);
+            int const k_jy = ((jy_type[1] == NODE) ? k_node : k_cell);
+            int const k_jz = ((jz_type[1] == NODE) ? k_node : k_cell);
 #endif
+
             // z direction
-            const amrex::Real zmid= (zp[ip]-zmin)*dzi-dts2dz*vz;
-            amrex::Real sz [depos_order + 1];
-            const int l  = compute_shape_factor<depos_order>(sz,  zmid);
-            amrex::Real sz0[depos_order + 1];
-            const int l0 = compute_shape_factor<depos_order>(sz0, zmid-stagger_shift);
+            const amrex::Real zmid = (zp[ip] - zmin)*dzi - dts2dz*vz;
+            amrex::Real sz_node[depos_order + 1];
+            amrex::Real sz_cell[depos_order + 1];
+            int l_node;
+            int l_cell;
+            if (jx_type[zdir] == NODE || jy_type[zdir] == NODE || jz_type[zdir] == NODE) {
+                l_node = compute_shape_factor<depos_order>(sz_node, zmid);
+            }
+            if (jx_type[zdir] == CELL || jy_type[zdir] == CELL || jz_type[zdir] == CELL) {
+                l_cell = compute_shape_factor<depos_order>(sz_cell, zmid - 0.5);
+            }
+            const amrex::Real (&sz_jx)[depos_order + 1] = ((jx_type[zdir] == NODE) ? sz_node : sz_cell);
+            const amrex::Real (&sz_jy)[depos_order + 1] = ((jy_type[zdir] == NODE) ? sz_node : sz_cell);
+            const amrex::Real (&sz_jz)[depos_order + 1] = ((jz_type[zdir] == NODE) ? sz_node : sz_cell);
+            int const l_jx = ((jx_type[zdir] == NODE) ? l_node : l_cell);
+            int const l_jy = ((jy_type[zdir] == NODE) ? l_node : l_cell);
+            int const l_jz = ((jz_type[zdir] == NODE) ? l_node : l_cell);
 
             // Deposit current into jx_arr, jy_arr and jz_arr
 #if (defined WARPX_DIM_XZ) || (defined WARPX_DIM_RZ)
             for (int iz=0; iz<=depos_order; iz++){
                 for (int ix=0; ix<=depos_order; ix++){
                     amrex::Gpu::Atomic::Add(
-                        &jx_arr(lo.x+j0+ix, lo.y+l +iz, 0),
-                        sx0[ix]*sz [iz]*wqx);
+                        &jx_arr(lo.x+j_jx+ix, lo.y+l_jx+iz, 0, 0),
+                        sx_jx[ix]*sz_jx[iz]*wqx);
                     amrex::Gpu::Atomic::Add(
-                        &jy_arr(lo.x+j +ix, lo.y+l +iz, 0),
-                        sx [ix]*sz [iz]*wqy);
+                        &jy_arr(lo.x+j_jy+ix, lo.y+l_jy+iz, 0, 0),
+                        sx_jy[ix]*sz_jy[iz]*wqy);
                     amrex::Gpu::Atomic::Add(
-                        &jz_arr(lo.x+j +ix, lo.y+l0+iz, 0),
-                        sx [ix]*sz0[iz]*wqz);
+                        &jz_arr(lo.x+j_jz+ix, lo.y+l_jz+iz, 0, 0),
+                        sx_jz[ix]*sz_jz[iz]*wqz);
                 }
             }
 #elif (defined WARPX_DIM_3D)
@@ -156,14 +203,14 @@ void doDepositionShapeN(const amrex::ParticleReal * const xp,
                 for (int iy=0; iy<=depos_order; iy++){
                     for (int ix=0; ix<=depos_order; ix++){
                         amrex::Gpu::Atomic::Add(
-                            &jx_arr(lo.x+j0+ix, lo.y+k +iy, lo.z+l +iz),
-                            sx0[ix]*sy [iy]*sz [iz]*wqx);
+                            &jx_arr(lo.x+j_jx+ix, lo.y+k_jx+iy, lo.z+l_jx+iz),
+                            sx_jx[ix]*sy_jx[iy]*sz_jx[iz]*wqx);
                         amrex::Gpu::Atomic::Add(
-                            &jy_arr(lo.x+j +ix, lo.y+k0+iy, lo.z+l +iz),
-                            sx [ix]*sy0[iy]*sz [iz]*wqy);
+                            &jy_arr(lo.x+j_jy+ix, lo.y+k_jy+iy, lo.z+l_jy+iz),
+                            sx_jy[ix]*sy_jy[iy]*sz_jy[iz]*wqy);
                         amrex::Gpu::Atomic::Add(
-                            &jz_arr(lo.x+j +ix, lo.y+k +iy, lo.z+l0+iz),
-                            sx [ix]*sy [iy]*sz0[iz]*wqz);
+                            &jz_arr(lo.x+j_jz+ix, lo.y+k_jz+iy, lo.z+l_jz+iz),
+                            sx_jz[ix]*sy_jz[iy]*sz_jz[iz]*wqz);
                     }
                 }
             }
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 7fb57500d..d5520ba06 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -272,9 +272,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
 
     const long ngJ = jx->nGrow();
     const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0));
-    int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal();
     Real q = this->charge;
-    const Real stagger_shift = j_is_nodal ? 0.0 : 0.5;
 
     BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
     BL_PROFILE_VAR_NS("PPC::CurrentDeposition", blp_deposit);
@@ -300,6 +298,9 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
 #ifdef AMREX_USE_GPU
     // No tiling on GPU: jx_ptr points to the full
     // jx array (same for jy_ptr and jz_ptr).
+    auto & jx_fab = jx->get(pti);
+    auto & jy_fab = jy->get(pti);
+    auto & jz_fab = jz->get(pti);
     Array4<Real> const& jx_arr = jx->array(pti);
     Array4<Real> const& jy_arr = jy->array(pti);
     Array4<Real> const& jz_arr = jz->array(pti);
@@ -319,6 +320,9 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
     local_jy[thread_num].setVal(0.0);
     local_jz[thread_num].setVal(0.0);
 
+    auto & jx_fab = local_jx[thread_num];
+    auto & jy_fab = local_jy[thread_num];
+    auto & jz_fab = local_jz[thread_num];
     Array4<Real> const& jx_arr = local_jx[thread_num].array();
     Array4<Real> const& jy_arr = local_jy[thread_num].array();
     Array4<Real> const& jz_arr = local_jz[thread_num].array();
@@ -333,13 +337,8 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
 
     // Lower corner of tile box physical domain
     // Note that this includes guard cells since it is after tilebox.ngrow
-    const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);
-    // xyzmin is built on pti.tilebox(), so it does
-    // not include staggering, so the stagger_shift has to be done by hand.
-    // Alternatively, we could define xyzminx from tbx (and the same for 3
-    // directions and for jx, jy, jz). This way, sx0 would not be needed.
-    // Better for memory? worth trying?
     const Dim3 lo = lbound(tilebox);
+    const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);
 
     BL_PROFILE_VAR_START(blp_deposit);
     if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) {
@@ -367,20 +366,20 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
             doDepositionShapeN<1>(
                 xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset,
                 uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
-                jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo,
-                stagger_shift, q);
+                jx_fab, jy_fab, jz_fab, np_to_depose, dt, dx,
+                xyzmin, lo, q);
         } else if (WarpX::nox == 2){
             doDepositionShapeN<2>(
                 xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset,
                 uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
-                jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo,
-                stagger_shift, q);
+                jx_fab, jy_fab, jz_fab, np_to_depose, dt, dx,
+                xyzmin, lo, q);
         } else if (WarpX::nox == 3){
             doDepositionShapeN<3>(
                 xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset,
                 uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
-                jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo,
-                stagger_shift, q);
+                jx_fab, jy_fab, jz_fab, np_to_depose, dt, dx,
+                xyzmin, lo, q);
         }
     }
     BL_PROFILE_VAR_STOP(blp_deposit);
diff --git a/Source/WarpX.H b/Source/WarpX.H
index f55670cfb..216c8f6a8 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -250,6 +250,9 @@ public:
     static amrex::RealBox getRealBox(const amrex::Box& bx, int lev);
     static std::array<amrex::Real,3> LowerCorner (const amrex::Box& bx, int lev);
     static std::array<amrex::Real,3> UpperCorner (const amrex::Box& bx, int lev);
+    // Returns the locations of the lower corner of the box, shifted up
+    // a half cell if cell centered.
+    static std::array<amrex::Real,3> LowerCornerWithCentering (const amrex::Box& bx, int lev);
 
     static amrex::IntVect RefRatio (int lev);
 
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index eef033236..b04bbb380 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -1062,6 +1062,21 @@ WarpX::UpperCorner(const Box& bx, int lev)
 #endif
 }
 
+std::array<Real,3>
+WarpX::LowerCornerWithCentering(const Box& bx, int lev)
+{
+    std::array<Real,3> corner = LowerCorner(bx, lev);
+    std::array<Real,3> dx = CellSize(lev);
+    if (!bx.type(0)) corner[0] += 0.5*dx[0];
+#if (AMREX_SPACEDIM == 3)
+    if (!bx.type(1)) corner[1] += 0.5*dx[1];
+    if (!bx.type(2)) corner[2] += 0.5*dx[2];
+#else
+    if (!bx.type(1)) corner[2] += 0.5*dx[2];
+#endif
+    return corner;
+}
+
 IntVect
 WarpX::RefRatio (int lev)
 {