4 files changed, 235 insertions, 303 deletions
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index 4f7a75f3f..cc7f4b7cc 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -41,43 +41,16 @@ WarpX::UpdateAuxilaryDataStagToNodal ()
             "WarpX::UpdateAuxilaryDataStagToNodal: PSATD solver requires "
             "WarpX build with spectral solver support.");
     }
-#else
-    amrex::Gpu::DeviceVector<Real> d_stencil_coef_x;
-    amrex::Gpu::DeviceVector<Real> d_stencil_coef_y;
-    amrex::Gpu::DeviceVector<Real> d_stencil_coef_z;
-    if (maxwell_solver_id == MaxwellSolverAlgo::PSATD) {
-        const int fg_nox = WarpX::field_gathering_nox;
-        const int fg_noy = WarpX::field_gathering_noy;
-        const int fg_noz = WarpX::field_gathering_noz;
-
-        // Compute real-space stencil coefficients along x
-        amrex::Vector<Real> h_stencil_coef_x = getFornbergStencilCoefficients(fg_nox, false);
-        d_stencil_coef_x.resize(h_stencil_coef_x.size());
-        amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice,
-                              h_stencil_coef_x.begin(),
-                              h_stencil_coef_x.end(),
-                              d_stencil_coef_x.begin());
-
-        // Compute real-space stencil coefficients along y
-        amrex::Vector<Real> h_stencil_coef_y = getFornbergStencilCoefficients(fg_noy, false);
-        d_stencil_coef_y.resize(h_stencil_coef_y.size());
-        amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice,
-                              h_stencil_coef_y.begin(),
-                              h_stencil_coef_y.end(),
-                              d_stencil_coef_y.begin());
-
-        // Compute real-space stencil coefficients along z
-        amrex::Vector<Real> h_stencil_coef_z = getFornbergStencilCoefficients(fg_noz, false);
-        d_stencil_coef_z.resize(h_stencil_coef_z.size());
-        amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice,
-                              h_stencil_coef_z.begin(),
-                              h_stencil_coef_z.end(),
-                              d_stencil_coef_z.begin());
-
-        amrex::Gpu::synchronize();
-    }
 #endif
 
+    const amrex::IntVect& Bx_stag = Bfield_fp[0][0]->ixType().toIntVect();
+    const amrex::IntVect& By_stag = Bfield_fp[0][1]->ixType().toIntVect();
+    const amrex::IntVect& Bz_stag = Bfield_fp[0][2]->ixType().toIntVect();
+
+    const amrex::IntVect& Ex_stag = Efield_fp[0][0]->ixType().toIntVect();
+    const amrex::IntVect& Ey_stag = Efield_fp[0][1]->ixType().toIntVect();
+    const amrex::IntVect& Ez_stag = Efield_fp[0][2]->ixType().toIntVect();
+
     // For level 0, we only need to do the average.
 #ifdef AMREX_USE_OMP
 #pragma omp parallel if (Gpu::notInLaunchRegion())
@@ -109,28 +82,40 @@ WarpX::UpdateAuxilaryDataStagToNodal ()
             const int fg_nox = WarpX::field_gathering_nox;
             const int fg_noy = WarpX::field_gathering_noy;
             const int fg_noz = WarpX::field_gathering_noz;
-            amrex::Real const * r_stencil_coef_x = d_stencil_coef_x.data();
-            amrex::Real const * r_stencil_coef_y = d_stencil_coef_y.data();
-            amrex::Real const * r_stencil_coef_z = d_stencil_coef_z.data();
+            // Device vectors for Fornberg stencil coefficients used for finite-order centering
+            amrex::Real const * stencil_coeffs_x = WarpX::device_centering_stencil_coeffs_x.data();
+            amrex::Real const * stencil_coeffs_y = WarpX::device_centering_stencil_coeffs_y.data();
+            amrex::Real const * stencil_coeffs_z = WarpX::device_centering_stencil_coeffs_z.data();
             amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept
             {
-                warpx_interp_nd_bfield_x(j,k,l, bx_aux, bx_fp, fg_noy, fg_noz, r_stencil_coef_y, r_stencil_coef_z);
-                warpx_interp_nd_bfield_y(j,k,l, by_aux, by_fp, fg_nox, fg_noz, r_stencil_coef_x, r_stencil_coef_z);
-                warpx_interp_nd_bfield_z(j,k,l, bz_aux, bz_fp, fg_nox, fg_noy, r_stencil_coef_x, r_stencil_coef_y);
-                warpx_interp_nd_efield_x(j,k,l, ex_aux, ex_fp, fg_nox, r_stencil_coef_x);
-                warpx_interp_nd_efield_y(j,k,l, ey_aux, ey_fp, fg_noy, r_stencil_coef_y);
-                warpx_interp_nd_efield_z(j,k,l, ez_aux, ez_fp, fg_noz, r_stencil_coef_z);
+                warpx_interp(j, k, l, bx_aux, bx_fp, Bx_stag, fg_nox, fg_noy, fg_noz,
+                             stencil_coeffs_x, stencil_coeffs_y, stencil_coeffs_z);
+
+                warpx_interp(j, k, l, by_aux, by_fp, By_stag, fg_nox, fg_noy, fg_noz,
+                             stencil_coeffs_x, stencil_coeffs_y, stencil_coeffs_z);
+
+                warpx_interp(j, k, l, bz_aux, bz_fp, Bz_stag, fg_nox, fg_noy, fg_noz,
+                             stencil_coeffs_x, stencil_coeffs_y, stencil_coeffs_z);
+
+                warpx_interp(j, k, l, ex_aux, ex_fp, Ex_stag, fg_nox, fg_noy, fg_noz,
+                             stencil_coeffs_x, stencil_coeffs_y, stencil_coeffs_z);
+
+                warpx_interp(j, k, l, ey_aux, ey_fp, Ey_stag, fg_nox, fg_noy, fg_noz,
+                             stencil_coeffs_x, stencil_coeffs_y, stencil_coeffs_z);
+
+                warpx_interp(j, k, l, ez_aux, ez_fp, Ez_stag, fg_nox, fg_noy, fg_noz,
+                             stencil_coeffs_x, stencil_coeffs_y, stencil_coeffs_z);
             });
 #endif
         } else { // FDTD
             amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept
             {
-                warpx_interp_nd_bfield_x(j,k,l, bx_aux, bx_fp);
-                warpx_interp_nd_bfield_y(j,k,l, by_aux, by_fp);
-                warpx_interp_nd_bfield_z(j,k,l, bz_aux, bz_fp);
-                warpx_interp_nd_efield_x(j,k,l, ex_aux, ex_fp);
-                warpx_interp_nd_efield_y(j,k,l, ey_aux, ey_fp);
-                warpx_interp_nd_efield_z(j,k,l, ez_aux, ez_fp);
+                warpx_interp(j, k, l, bx_aux, bx_fp, Bx_stag);
+                warpx_interp(j, k, l, by_aux, by_fp, By_stag);
+                warpx_interp(j, k, l, bz_aux, bz_fp, Bz_stag);
+                warpx_interp(j, k, l, ex_aux, ex_fp, Ex_stag);
+                warpx_interp(j, k, l, ey_aux, ey_fp, Ey_stag);
+                warpx_interp(j, k, l, ez_aux, ez_fp, Ez_stag);
             });
         }
     }
diff --git a/Source/Parallelization/WarpXComm_K.H b/Source/Parallelization/WarpXComm_K.H
index 092a13565..0f183e422 100644
--- a/Source/Parallelization/WarpXComm_K.H
+++ b/Source/Parallelization/WarpXComm_K.H
@@ -277,161 +277,6 @@ void warpx_interp_nd_bfield_z (int j, int k, int l,
     Bza(j,k,l) = bg + (bf-bc);
 }
 
-// With the FDTD Maxwell solver, this is the linear interpolation function used to
-// interpolate the Bx field from a staggered grid to a nodal grid, before gathering
-// the field from the nodes to the particle positions (momentum-conserving gathering)
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_bfield_x (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Bxa,
-                               amrex::Array4<amrex::Real const> const& Bxf)
-{
-    using namespace amrex;
-#if (AMREX_SPACEDIM == 2)
-    Bxa(j,k,0) = 0.5_rt*(Bxf(j,k-1,0) + Bxf(j,k,0));
-    amrex::ignore_unused(l);
-#else
-    Bxa(j,k,l) = 0.25_rt*(Bxf(j,k-1,l-1) + Bxf(j,k,l-1) + Bxf(j,k-1,l) + Bxf(j,k,l));
-#endif
-}
-
-// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to
-// interpolate the Bx field from a staggered grid to a nodal grid, before gathering
-// the field from the nodes to the particle positions (momentum-conserving gathering)
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_bfield_x (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Bxa,
-                               amrex::Array4<amrex::Real const> const& Bxf,
-                               const int noy,
-                               const int noz,
-                               amrex::Real const* stencil_coef_y,
-                               amrex::Real const* stencil_coef_z)
-{
-    using namespace amrex;
-#if (AMREX_SPACEDIM == 2)
-    amrex::ignore_unused(noy);
-    amrex::ignore_unused(stencil_coef_y);
-    amrex::Real res = 0.;
-    for (int nz = 1; nz < noz/2+1; nz++) {
-        res += 0.5_rt * stencil_coef_z[nz-1] * (Bxf(j, k + nz - 1, l) + Bxf(j, k - nz, l));
-    }
-    Bxa(j,k,l) = res;
-#else
-    amrex::Real res = 0.;
-    for     (int nz = 1; nz < noz/2+1; nz++) {
-        for (int ny = 1; ny < noy/2+1; ny++) {
-            res += 0.25_rt * stencil_coef_y[ny-1] * stencil_coef_z[nz-1] *
-                   (Bxf(j, k + ny - 1, l + nz - 1) + Bxf(j, k - ny, l + nz - 1)
-                  + Bxf(j, k + ny - 1, l - nz    ) + Bxf(j, k - ny, l - nz    ));
-        }
-    }
-    Bxa(j,k,l) = res;
-#endif
-}
-
-// With the FDTD Maxwell solver, this is the linear interpolation function used to
-// interpolate the By field from a staggered grid to a nodal grid, before gathering
-// the field from the nodes to the particle positions (momentum-conserving gathering)
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_bfield_y (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Bya,
-                               amrex::Array4<amrex::Real const> const& Byf)
-{
-    using namespace amrex;
-#if (AMREX_SPACEDIM == 2)
-    Bya(j,k,0) = 0.25_rt*(Byf(j,k,0) + Byf(j-1,k,0) + Byf(j,k-1,0) + Byf(j-1,k-1,0));
-    amrex::ignore_unused(l);
-#else
-    Bya(j,k,l) = 0.25_rt*(Byf(j-1,k,l-1) + Byf(j,k,l-1) + Byf(j-1,k,l) + Byf(j,k,l));
-#endif
-}
-
-// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to
-// interpolate the By field from a staggered grid to a nodal grid, before gathering
-// the field from the nodes to the particle positions (momentum-conserving gathering)
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_bfield_y (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Bya,
-                               amrex::Array4<amrex::Real const> const& Byf,
-                               const int nox,
-                               const int noz,
-                               amrex::Real const* stencil_coef_x,
-                               amrex::Real const* stencil_coef_z)
-{
-    using namespace amrex;
-#if (AMREX_SPACEDIM == 2)
-    amrex::Real res = 0.;
-    for     (int nz = 1; nz < noz/2+1; nz++) {
-        for (int nx = 1; nx < nox/2+1; nx++) {
-            res += 0.25_rt * stencil_coef_x[nx-1] * stencil_coef_z[nz-1] *
-                   (Byf(j + nx - 1, k + nz - 1, l) + Byf(j - nx, k + nz - 1, l)
-                  + Byf(j + nx - 1, k - nz    , l) + Byf(j - nx, k - nz    , l));
-        }
-    }
-    Bya(j,k,l) = res;
-#else
-    amrex::Real res = 0.;
-    for     (int nz = 1; nz < noz/2+1; nz++) {
-        for (int nx = 1; nx < nox/2+1; nx++) {
-            res += 0.25_rt * stencil_coef_x[nx-1] * stencil_coef_z[nz-1] *
-                   (Byf(j + nx - 1, k, l + nz - 1) + Byf(j - nx, k, l + nz - 1)
-                  + Byf(j + nx - 1, k, l - nz    ) + Byf(j - nx, k, l - nz    ));
-        }
-    }
-    Bya(j,k,l) = res;
-#endif
-}
-
-// With the FDTD Maxwell solver, this is the linear interpolation function used to
-// interpolate the Bz field from a staggered grid to a nodal grid, before gathering
-// the field from the nodes to the particle positions (momentum-conserving gathering)
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_bfield_z (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Bza,
-                               amrex::Array4<amrex::Real const> const& Bzf)
-{
-    using namespace amrex;
-#if (AMREX_SPACEDIM == 2)
-    Bza(j,k,0) = 0.5_rt*(Bzf(j-1,k,0) + Bzf(j,k,0));
-    amrex::ignore_unused(l);
-#else
-    Bza(j,k,l) = 0.25_rt*(Bzf(j-1,k-1,l) + Bzf(j,k-1,l) + Bzf(j-1,k,l) + Bzf(j,k,l));
-#endif
-}
-
-// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to
-// interpolate the Bz field from a staggered grid to a nodal grid, before gathering
-// the field from the nodes to the particle positions (momentum-conserving gathering)
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_bfield_z (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Bza,
-                               amrex::Array4<amrex::Real const> const& Bzf,
-                               const int nox,
-                               const int noy,
-                               amrex::Real const* stencil_coef_x,
-                               amrex::Real const* stencil_coef_y)
-{
-    using namespace amrex;
-#if (AMREX_SPACEDIM == 2)
-    amrex::ignore_unused(noy);
-    amrex::ignore_unused(stencil_coef_y);
-    amrex::Real res = 0.;
-    for (int nx = 1; nx < nox/2+1; nx++) {
-        res += 0.5_rt * stencil_coef_x[nx-1] * (Bzf(j + nx - 1, k, l) + Bzf(j - nx, k, l));
-    }
-    Bza(j,k,l) = res;
-#else
-    amrex::Real res = 0.;
-    for     (int ny = 1; ny < noy/2+1; ny++) {
-        for (int nx = 1; nx < nox/2+1; nx++) {
-            res += 0.25_rt * stencil_coef_x[nx-1] * stencil_coef_y[ny-1] *
-                   (Bzf(j + nx - 1, k + ny - 1, l) + Bzf(j - nx, k + ny - 1, l)
-                  + Bzf(j + nx - 1, k - ny    , l) + Bzf(j - nx, k - ny    , l));
-        }
-    }
-    Bza(j,k,l) = res;
-#endif
-}
-
 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
 void warpx_interp_nd_efield_x (int j, int k, int l,
                                amrex::Array4<amrex::Real> const& Exa,
@@ -641,118 +486,181 @@ void warpx_interp_nd_efield_z (int j, int k, int l,
     Eza(j,k,l) = eg + (ef-ec);
 }
 
-// With the FDTD Maxwell solver, this is the linear interpolation function used to
-// interpolate the Ex field from a staggered grid to a nodal grid, before gathering
-// the field from the nodes to the particle positions (momentum-conserving gathering)
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_efield_x (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Exa,
-                               amrex::Array4<amrex::Real const> const& Exf)
-{
-    using namespace amrex;
-    Exa(j,k,l) = 0.5_rt*(Exf(j-1,k,l) + Exf(j,k,l));
-}
-
-// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to
-// interpolate the Ex field from a staggered grid to a nodal grid, before gathering
-// the field from the nodes to the particle positions (momentum-conserving gathering)
+/**
+ * \brief Arbitrary-order interpolation function used to interpolate a given field from the Yee grid
+ * to the nodal grid, before gathering the field from the nodes to the particle positions
+ * (momentum-conserving field gathering). With the FDTD solver, this performs simple linear interpolation.
+ * With the PSATD solver, this performs arbitrary-order interpolation based on the Fornberg coefficients.
+ * The result is stored in the output array \c arr_aux, allocated on the \c aux patch.
+ *
+ * \param[in] j index along x of the output array
+ * \param[in] k index along y (in 3D) or z (in 2D) of the output array
+ * \param[in] l index along z (in 3D, \c l = 0 in 2D) of the output array
+ * \param[in,out] arr_aux output array allocated on the \c aux patch where interpolated values are stored
+ * \param[in] arr_fine input array allocated on the fine patch
+ * \param[in] arr_stag \c IndexType of the input array (Yee staggering)
+ * \param[in] nox order of finite-order interpolation along x
+ * \param[in] noy order of finite-order interpolation along y
+ * \param[in] noz order of finite-order interpolation along z
+ * \param[in] stencil_coeffs_x array of Fornberg coefficients for finite-order interpolation stencil along x
+ * \param[in] stencil_coeffs_y array of Fornberg coefficients for finite-order interpolation stencil along y
+ * \param[in] stencil_coeffs_z array of Fornberg coefficients for finite-order interpolation stencil along z
+ */
 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_efield_x (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Exa,
-                               amrex::Array4<amrex::Real const> const& Exf,
-                               const int nox,
-                               amrex::Real const* stencil_coef_x)
+void warpx_interp (const int j,
+                   const int k,
+                   const int l,
+                   amrex::Array4<amrex::Real      > const& arr_aux,
+                   amrex::Array4<amrex::Real const> const& arr_fine,
+                   const amrex::IntVect& arr_stag,
+                   const int nox = 2,
+                   const int noy = 2,
+                   const int noz = 2,
+                   amrex::Real const* stencil_coeffs_x = nullptr,
+                   amrex::Real const* stencil_coeffs_y = nullptr,
+                   amrex::Real const* stencil_coeffs_z = nullptr)
 {
     using namespace amrex;
-    amrex::Real res = 0.;
-    for (int nx = 1; nx < nox/2+1; nx++) {
-        res += 0.5_rt * stencil_coef_x[nx-1] * (Exf(j + nx - 1, k, l) + Exf(j - nx, k, l));
-    }
-    Exa(j,k,l) = res;
-}
 
-// With the FDTD Maxwell solver, this is the linear interpolation function used to
-// interpolate the Ey field from a staggered grid to a nodal grid, before gathering
-// the field from the nodes to the particle positions (momentum-conserving gathering)
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_efield_y (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Eya,
-                               amrex::Array4<amrex::Real const> const& Eyf)
-{
-    using namespace amrex;
+    // Avoid compiler warnings
 #if (AMREX_SPACEDIM == 2)
-    Eya(j,k,0) = Eyf(j,k,0);
-    amrex::ignore_unused(l);
-#else
-    Eya(j,k,l) = 0.5_rt*(Eyf(j,k-1,l) + Eyf(j,k,l));
+    amrex::ignore_unused(noy, stencil_coeffs_y);
 #endif
-}
 
-// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to
-// interpolate the Ey field from a staggered grid to a nodal grid, before gathering
-// the field from the nodes to the particle positions (momentum-conserving gathering)
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_efield_y (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Eya,
-                               amrex::Array4<amrex::Real const> const& Eyf,
-                               const int noy,
-                               amrex::Real const* stencil_coef_y)
-{
-    using namespace amrex;
-#if (AMREX_SPACEDIM == 2)
-    amrex::ignore_unused(noy);
-    amrex::ignore_unused(stencil_coef_y);
-    Eya(j,k,l) = Eyf(j,k,l);
-#else
-    amrex::Real res = 0._rt;
-    for (int ny = 1; ny < noy/2+1; ny++) {
-        res += 0.5_rt * stencil_coef_y[ny-1] * (Eyf(j, k + ny - 1, l) + Eyf(j, k - ny, l));
-    }
-    Eya(j,k,l) = res;
+    // Avoid compiler warnings
+#ifndef WARPX_USE_PSATD
+    amrex::ignore_unused(stencil_coeffs_x, stencil_coeffs_y, stencil_coeffs_z);
 #endif
-}
 
-// With the FDTD Maxwell solver, this is the linear interpolation function used to
-// interpolate the Ez field from a staggered grid to a nodal grid, before gathering
-// the field from the nodes to the particle positions (momentum-conserving gathering)
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_efield_z (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Eza,
-                               amrex::Array4<amrex::Real const> const& Ezf)
-{
-    using namespace amrex;
-#if (AMREX_SPACEDIM == 2)
-    Eza(j,k,0) = 0.5_rt*(Ezf(j,k-1,0) + Ezf(j,k,0));
-    amrex::ignore_unused(l);
-#else
-    Eza(j,k,l) = 0.5_rt*(Ezf(j,k,l-1) + Ezf(j,k,l));
+    // Staggering (s = 0 if cell-centered, s = 1 if nodal)
+    const int sj = arr_stag[0];
+    const int sk = arr_stag[1];
+#if (AMREX_SPACEDIM == 3)
+    const int sl = arr_stag[2];
 #endif
-}
 
-// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to
-// interpolate the Ez field from a staggered grid to a nodal grid, before gathering
-// the field from the nodes to the particle positions (momentum-conserving gathering)
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_efield_z (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Eza,
-                               amrex::Array4<amrex::Real const> const& Ezf,
-                               const int noz,
-                               amrex::Real const* stencil_coef_z)
-{
-    using namespace amrex;
-#if (AMREX_SPACEDIM == 2)
-    amrex::Real res = 0._rt;
-    for (int nz = 1; nz < noz/2+1; nz++) {
-        res += 0.5_rt * stencil_coef_z[nz-1] * (Ezf(j, k + nz - 1, l) + Ezf(j, k - nz, l));
+    // Number of points (+1) used for interpolation from the staggered grid to the nodal grid
+    // (if the input field is nodal along a given direction, then s = 1, the variable n = 0
+    // is actually never used, and there is no interpolation, only a copy of one single value)
+    const int nj = (sj == 0) ? nox/2 + 1 : 0;
+#if   (AMREX_SPACEDIM == 2)
+    const int nk = (sk == 0) ? noz/2 + 1 : 0;
+#elif (AMREX_SPACEDIM == 3)
+    const int nk = (sk == 0) ? noy/2 + 1 : 0;
+    const int nl = (sl == 0) ? noz/2 + 1 : 0;
+#endif
+
+    // Additional normalization factor
+    const amrex::Real wj = (sj == 0) ? 0.5_rt : 1.0_rt;
+    const amrex::Real wk = (sk == 0) ? 0.5_rt : 1.0_rt;
+#if   (AMREX_SPACEDIM == 2)
+    constexpr amrex::Real wl = 1.0_rt;
+#elif (AMREX_SPACEDIM == 3)
+    const amrex::Real wl = (sl == 0) ? 0.5_rt : 1.0_rt;
+#endif
+
+    // Auxiliary variables for stencil coefficients
+#ifdef WARPX_USE_PSATD
+    amrex::Real cj = 1.0_rt;
+    amrex::Real ck = 1.0_rt;
+    amrex::Real cl = 1.0_rt;
+    int dj, dk;
+#if (AMREX_SPACEDIM == 3)
+    int dl;
+#endif
+#endif
+
+    // Min and max for interpolation loop along j
+    const int jmin = (sj == 0) ? j - nj + 1 : j;
+    const int jmax = (sj == 0) ? j + nj - 2 : j;
+
+    // Min and max for interpolation loop along k
+    const int kmin = (sk == 0) ? k - nk + 1 : k;
+    const int kmax = (sk == 0) ? k + nk - 2 : k;
+
+    // Min and max for interpolation loop along l
+#if   (AMREX_SPACEDIM == 2)
+    // l = 0 always
+    const int lmin = l;
+    const int lmax = l;
+#elif (AMREX_SPACEDIM == 3)
+    const int lmin = (sl == 0) ? l - nl + 1 : l;
+    const int lmax = (sl == 0) ? l + nl - 2 : l;
+#endif
+
+    amrex::Real res = 0.0_rt;
+
+#ifndef WARPX_USE_PSATD // FDTD (linear interpolation)
+
+    // Example of 1D interpolation from nodal grid to nodal grid:
+    //
+    //         j
+    // --o-----o-----o--  result(j) = f(j)
+    // --o-----o-----o--
+    //  j-1    j    j+1
+    //
+    // Example of 1D interpolation from staggered grid to nodal grid:
+    //
+    //         j
+    // --o-----o-----o--  result(j) = (f(j-1) + f(j)) / 2
+    // -----x-----x-----
+    //     j-1    j
+
+    for (int ll = lmin; ll <= lmax; ll++)
+    {
+        for (int kk = kmin; kk <= kmax; kk++)
+        {
+            for (int jj = jmin; jj <= jmax; jj++)
+            {
+                res += arr_fine(jj,kk,ll);
+            }
+        }
     }
-    Eza(j,k,l) = res;
-#else
-    amrex::Real res = 0.;
-    for (int nz = 1; nz < noz/2+1; nz++) {
-        res += 0.5_rt * stencil_coef_z[nz-1] * (Ezf(j, k, l + nz - 1) + Ezf(j, k, l - nz));
+
+#else // PSATD (finite-order interpolation)
+
+    // Example of 1D interpolation from nodal grid to nodal grid:
+    //
+    //         j
+    // --o-----o-----o--  result(j) = f(j)
+    // --o-----o-----o--
+    //  j-1    j    j+1
+    //
+    // Example of 1D interpolation from staggered grid to nodal grid:
+    //
+    //                     j
+    // --o-----o-----o-----o-----o-----o-----o--  result(j) = c_0 * (f(j-1) + f(j)  ) / 2
+    // -----x-----x-----x-----x-----x-----x-----            + c_1 * (f(j-2) + f(j+1)) / 2
+    //     j-3   j-2   j-1    j    j+1   j+2                + c_2 * (f(j-3) + f(j+2)) / 2
+    //     c_2   c_1   c_0   c_0   c_1   c_2                + ...
+
+    for (int ll = lmin; ll <= lmax; ll++)
+    {
+#if (AMREX_SPACEDIM == 3)
+        dl = (l - ll > 0) ? l - ll - 1 : ll - l;
+        if (sl == 0) cl = stencil_coeffs_z[dl];
+#endif
+        for (int kk = kmin; kk <= kmax; kk++)
+        {
+            dk = (k - kk > 0) ? k - kk - 1 : kk - k;
+#if   (AMREX_SPACEDIM == 2)
+            if (sk == 0) ck = stencil_coeffs_z[dk];
+#elif (AMREX_SPACEDIM == 3)
+            if (sk == 0) ck = stencil_coeffs_y[dk];
+#endif
+            for (int jj = jmin; jj <= jmax; jj++)
+            {
+                dj = (j - jj > 0) ? j - jj - 1 : jj - j;
+                if (sj == 0) cj = stencil_coeffs_x[dj];
+
+                res += cj * ck * cl * arr_fine(jj,kk,ll);
+            }
+        }
     }
-    Eza(j,k,l) = res;
+
 #endif
+
+    arr_aux(j,k,l) = wj * wk * wl * res;
 }
 
 #endif
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 26c2b78d4..4ee7fe47e 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -573,6 +573,16 @@ public:
 
     void ApplyFilterandSumBoundaryRho (int lev, int glev, amrex::MultiFab& rho, int icomp, int ncomp);
 
+#ifdef WARPX_USE_PSATD
+    // Host and device vectors for Fornberg stencil coefficients used for finite-order centering
+    amrex::Vector<amrex::Real> host_centering_stencil_coeffs_x;
+    amrex::Vector<amrex::Real> host_centering_stencil_coeffs_y;
+    amrex::Vector<amrex::Real> host_centering_stencil_coeffs_z;
+    amrex::Gpu::DeviceVector<amrex::Real> device_centering_stencil_coeffs_x;
+    amrex::Gpu::DeviceVector<amrex::Real> device_centering_stencil_coeffs_y;
+    amrex::Gpu::DeviceVector<amrex::Real> device_centering_stencil_coeffs_z;
+#endif
+
 protected:
 
     /**
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 7fdd6a3f1..ff42c4758 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -11,6 +11,9 @@
  */
 #include "WarpX.H"
 #include "FieldSolver/WarpX_FDTD.H"
+#ifdef WARPX_USE_PSATD
+#include "FieldSolver/SpectralSolver/SpectralKSpace.H"
+#endif
 #include "Python/WarpXWrappers.h"
 #include "Utils/WarpXConst.H"
 #include "Utils/WarpXUtil.H"
@@ -693,6 +696,32 @@ WarpX::ReadParameters ()
                     field_gathering_nox == 2 && field_gathering_noy == 2 && field_gathering_noz == 2,
                     "High-order interpolation (order > 2) is not implemented with mesh refinement");
             }
+
+            // Host vectors for Fornberg stencil coefficients used for finite-order centering
+            host_centering_stencil_coeffs_x = getFornbergStencilCoefficients(field_gathering_nox, false);
+            host_centering_stencil_coeffs_y = getFornbergStencilCoefficients(field_gathering_noy, false);
+            host_centering_stencil_coeffs_z = getFornbergStencilCoefficients(field_gathering_noz, false);
+
+            // Device vectors for Fornberg stencil coefficients used for finite-order centering
+            device_centering_stencil_coeffs_x.resize(host_centering_stencil_coeffs_x.size());
+            amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice,
+                                  host_centering_stencil_coeffs_x.begin(),
+                                  host_centering_stencil_coeffs_x.end(),
+                                  device_centering_stencil_coeffs_x.begin());
+
+            device_centering_stencil_coeffs_y.resize(host_centering_stencil_coeffs_y.size());
+            amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice,
+                                  host_centering_stencil_coeffs_y.begin(),
+                                  host_centering_stencil_coeffs_y.end(),
+                                  device_centering_stencil_coeffs_y.begin());
+
+            device_centering_stencil_coeffs_z.resize(host_centering_stencil_coeffs_z.size());
+            amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice,
+                                  host_centering_stencil_coeffs_z.begin(),
+                                  host_centering_stencil_coeffs_z.end(),
+                                  device_centering_stencil_coeffs_z.begin());
+
+            amrex::Gpu::synchronize();
         }
 #endif