3 files changed, 222 insertions, 100 deletions
diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
index 9c4e786de..138476f88 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
@@ -219,6 +219,15 @@ SpectralFieldData::BackwardTransform( const int lev,
     const bool is_nodal_z = mf.is_nodal(1);
 #endif
 
+    const int si = (is_nodal_x) ? 1 : 0;
+#if   (AMREX_SPACEDIM == 2)
+    const int sj = (is_nodal_z) ? 1 : 0;
+    const int sk = 0;
+#elif (AMREX_SPACEDIM == 3)
+    const int sj = (is_nodal_y) ? 1 : 0;
+    const int sk = (is_nodal_z) ? 1 : 0;
+#endif
+
     // Loop over boxes
     for ( MFIter mfi(mf); mfi.isValid(); ++mfi ){
         if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers)
@@ -256,51 +265,49 @@ SpectralFieldData::BackwardTransform( const int lev,
                 // Copy field into temporary array
                 tmp_arr(i,j,k) = spectral_field_value;
             });
-
         }
 
         // Perform Fourier transform from `tmpSpectralField` to `tmpRealField`
         AnyFFT::Execute(backward_plan[mfi]);
 
-        // Copy the temporary field `tmpRealField` to the real-space field `mf`
-        // (only in the valid cells ; not in the guard cells)
-        // Normalize (divide by 1/N) since the FFT+IFFT results in a factor N
+        // Copy the temporary field tmpRealField to the real-space field mf and
+        // normalize, dividing by N, since (FFT + inverse FFT) results in a factor N
         {
-            Array4<Real> mf_arr = mf[mfi].array();
-            Array4<const Real> tmp_arr = tmpRealField[mfi].array();
-            // Normalization: divide by the number of points in realspace
-            // (includes the guard cells)
-            const Box realspace_bx = tmpRealField[mfi].box();
-            const Real inv_N = 1./realspace_bx.numPts();
-
-            if (m_periodic_single_box) {
-                // Enforce periodicity on the nodes, by using modulo in indices
-                // This is because `tmp_arr` is cell-centered while `mf_arr` can be nodal
-                int const nx = realspace_bx.length(0);
-                int const ny = realspace_bx.length(1);
-#if (AMREX_SPACEDIM == 3)
-                int const nz = realspace_bx.length(2);
-#else
-                int constexpr nz = 1;
+            amrex::Box const& mf_box = (m_periodic_single_box) ? mfi.validbox() : mfi.fabbox();
+            amrex::Array4<amrex::Real> mf_arr = mf[mfi].array();
+            amrex::Array4<const amrex::Real> tmp_arr = tmpRealField[mfi].array();
+
+            const amrex::Real inv_N = 1._rt / tmpRealField[mfi].box().numPts();
+
+            // Total number of cells, including ghost cells (nj represents ny in 3D and nz in 2D)
+            const int ni = mf_box.length(0);
+            const int nj = mf_box.length(1);
+#if   (AMREX_SPACEDIM == 2)
+            constexpr int nk = 1;
+#elif (AMREX_SPACEDIM == 3)
+            const int nk = mf_box.length(2);
 #endif
-                ParallelFor(
-                    mfi.validbox(),
-                    /* GCC 8.1-8.2 work-around (ICE):
-                     *   named capture in nonexcept lambda needed for modulo operands
-                     *   https://godbolt.org/z/ppbAzd
-                     */
-                    [mf_arr, i_comp, inv_N, tmp_arr, nx, ny, nz]
-                    AMREX_GPU_DEVICE (int i, int j, int k) noexcept {
-                        mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i%nx, j%ny, k%nz);
-                    });
-            } else {
-                ParallelFor( mfi.validbox(),
-                [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
-                    // Copy and normalize field
-                    mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k);
-                });
-            }
-
+            // Lower bound of the box (lo_j represents lo_y in 3D and lo_z in 2D)
+            const int lo_i = amrex::lbound(mf_box).x;
+            const int lo_j = amrex::lbound(mf_box).y;
+#if   (AMREX_SPACEDIM == 2)
+            constexpr int lo_k = 0;
+#elif (AMREX_SPACEDIM == 3)
+            const int lo_k = amrex::lbound(mf_box).z;
+#endif
+            // Loop over cells within full box, including ghost cells
+            ParallelFor(mf_box, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
+            {
+                // Assume periodicity and set the last outer guard cell equal to the first one:
+                // this is necessary in order to get the correct value along a nodal direction,
+                // because the last point along a nodal direction is always discarded when FFTs
+                // are computed, as the real-space box is always cell-centered.
+                const int ii = (i == lo_i + ni - si) ? lo_i : i;
+                const int jj = (j == lo_j + nj - sj) ? lo_j : j;
+                const int kk = (k == lo_k + nk - sk) ? lo_k : k;
+                // Copy and normalize field
+                mf_arr(i,j,k,i_comp) = inv_N * tmp_arr(ii,jj,kk);
+            });
         }
 
         if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers)
diff --git a/Source/FieldSolver/WarpXPushFieldsEM.cpp b/Source/FieldSolver/WarpXPushFieldsEM.cpp
index 222a6ae6f..672eaec0c 100644
--- a/Source/FieldSolver/WarpXPushFieldsEM.cpp
+++ b/Source/FieldSolver/WarpXPushFieldsEM.cpp
@@ -11,6 +11,7 @@
 #include "BoundaryConditions/WarpX_PML_kernels.H"
 #include "BoundaryConditions/PML_current.H"
 #include "WarpX_FDTD.H"
+#include "WarpXPushFieldsEM_K.H"
 
 #ifdef BL_USE_SENSEI_INSITU
 #   include <AMReX_AmrMeshInSituBridge.H>
@@ -161,8 +162,16 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) {
         PushPSATDSinglePatch( lev, *spectral_solver_cp[lev],
              Efield_cp[lev], Bfield_cp[lev], Efield_avg_cp[lev], Bfield_avg_cp[lev], current_cp[lev], rho_cp[lev] );
     }
-    if (use_damp_fields_in_z_guard) {
-        DampFieldsInGuards( Efield_fp[lev], Bfield_fp[lev] );
+
+    // Damp the fields in the guard cells along z
+    if (use_damp_fields_in_z_guard)
+    {
+        DampFieldsInGuards(Efield_fp[lev], Bfield_fp[lev]);
+
+        if (WarpX::fft_do_time_averaging)
+        {
+            DampFieldsInGuards(Efield_avg_fp[lev], Bfield_avg_fp[lev]);
+        }
     }
 #endif
 }
@@ -419,7 +428,6 @@ WarpX::DampFieldsInGuards(std::array<std::unique_ptr<amrex::MultiFab>,3>& Efield
 
     for ( amrex::MFIter mfi(*Efield[0], amrex::TilingIfNotGPU()); mfi.isValid(); ++mfi )
     {
-
         amrex::Array4<amrex::Real> const& Ex_arr = Efield[0]->array(mfi);
         amrex::Array4<amrex::Real> const& Ey_arr = Efield[1]->array(mfi);
         amrex::Array4<amrex::Real> const& Ez_arr = Efield[2]->array(mfi);
@@ -427,74 +435,72 @@ WarpX::DampFieldsInGuards(std::array<std::unique_ptr<amrex::MultiFab>,3>& Efield
         amrex::Array4<amrex::Real> const& By_arr = Bfield[1]->array(mfi);
         amrex::Array4<amrex::Real> const& Bz_arr = Bfield[2]->array(mfi);
 
-        // Get the tilebox from Efield so that it includes the guard cells.
-        amrex::Box tilebox = (*Efield[0])[mfi].box();
-        int const nz_tile = tilebox.bigEnd(zdir);
+        // Get the tileboxes from Efield and Bfield so that they include the guard cells
+        // and take the staggering of each MultiFab into account
+        amrex::Box tex = amrex::convert((*Efield[0])[mfi].box(), Efield[0]->ixType().toIntVect());
+        amrex::Box tey = amrex::convert((*Efield[1])[mfi].box(), Efield[1]->ixType().toIntVect());
+        amrex::Box tez = amrex::convert((*Efield[2])[mfi].box(), Efield[2]->ixType().toIntVect());
+        amrex::Box tbx = amrex::convert((*Bfield[0])[mfi].box(), Bfield[0]->ixType().toIntVect());
+        amrex::Box tby = amrex::convert((*Bfield[1])[mfi].box(), Bfield[1]->ixType().toIntVect());
+        amrex::Box tbz = amrex::convert((*Bfield[2])[mfi].box(), Bfield[2]->ixType().toIntVect());
+
+        // Get smallEnd of tileboxes
+        const int tex_smallEnd_z = tex.smallEnd(zdir);
+        const int tey_smallEnd_z = tey.smallEnd(zdir);
+        const int tez_smallEnd_z = tez.smallEnd(zdir);
+        const int tbx_smallEnd_z = tbx.smallEnd(zdir);
+        const int tby_smallEnd_z = tby.smallEnd(zdir);
+        const int tbz_smallEnd_z = tbz.smallEnd(zdir);
+
+        // Get bigEnd of tileboxes
+        const int tex_bigEnd_z = tex.bigEnd(zdir);
+        const int tey_bigEnd_z = tey.bigEnd(zdir);
+        const int tez_bigEnd_z = tez.bigEnd(zdir);
+        const int tbx_bigEnd_z = tbx.bigEnd(zdir);
+        const int tby_bigEnd_z = tby.bigEnd(zdir);
+        const int tbz_bigEnd_z = tbz.bigEnd(zdir);
 
         // Box for the whole simulation domain
         amrex::Box const& domain = Geom(0).Domain();
         int const nz_domain = domain.bigEnd(zdir);
 
-        if (tilebox.smallEnd(zdir) < 0) {
-
-            // Apply damping factor in guards cells below the lower end of the domain
-            int const nz_guard = -tilebox.smallEnd(zdir);
+        // Set the tileboxes so that they only cover the lower/upper half of the guard cells
+        constrain_tilebox_to_guards(tex, zdir, nz_domain, tex_smallEnd_z, tex_bigEnd_z);
+        constrain_tilebox_to_guards(tey, zdir, nz_domain, tey_smallEnd_z, tey_bigEnd_z);
+        constrain_tilebox_to_guards(tez, zdir, nz_domain, tez_smallEnd_z, tez_bigEnd_z);
+        constrain_tilebox_to_guards(tbx, zdir, nz_domain, tbx_smallEnd_z, tbx_bigEnd_z);
+        constrain_tilebox_to_guards(tby, zdir, nz_domain, tby_smallEnd_z, tby_bigEnd_z);
+        constrain_tilebox_to_guards(tbz, zdir, nz_domain, tbz_smallEnd_z, tbz_bigEnd_z);
 
-            // Set so the box only covers the lower half of the guard cells
-            tilebox.setBig(zdir, -nz_guard/2-1);
-
-            amrex::ParallelFor(tilebox, Efield[0]->nComp(),
-            [=] AMREX_GPU_DEVICE (int i, int j, int k, int icomp)
+        amrex::ParallelFor(
+            tex, Efield[0]->nComp(), [=] AMREX_GPU_DEVICE (int i, int j, int k, int icomp)
             {
-#if (AMREX_SPACEDIM == 3)
-                amrex::Real zcell = static_cast<amrex::Real>(k + nz_guard);
-#else
-                amrex::Real zcell = static_cast<amrex::Real>(j + nz_guard);
-#endif
-                const amrex::Real phase = MathConst::pi*zcell/nz_guard;
-                const amrex::Real sin_phase = std::sin(phase);
-                const amrex::Real damp_factor = sin_phase*sin_phase;
-
-                Ex_arr(i,j,k,icomp) *= damp_factor;
-                Ey_arr(i,j,k,icomp) *= damp_factor;
-                Ez_arr(i,j,k,icomp) *= damp_factor;
-                Bx_arr(i,j,k,icomp) *= damp_factor;
-                By_arr(i,j,k,icomp) *= damp_factor;
-                Bz_arr(i,j,k,icomp) *= damp_factor;
-
-            });
-
-        }
-        else if (nz_tile > nz_domain) {
-
-            // Apply damping factor in guards cells above the upper end of the domain
-            int nz_guard = nz_tile - nz_domain;
-
-            // Set so the box only covers the upper half of the guard cells
-            tilebox.setSmall(zdir, nz_domain + nz_guard/2 + 1);
-
-            amrex::ParallelFor(tilebox, Efield[0]->nComp(),
-            [=] AMREX_GPU_DEVICE (int i, int j, int k, int icomp)
+                damp_field_in_guards(Ex_arr, i, j, k, icomp, zdir, nz_domain, tex_smallEnd_z, tex_bigEnd_z);
+            },
+            tey, Efield[1]->nComp(), [=] AMREX_GPU_DEVICE (int i, int j, int k, int icomp)
             {
-#if (AMREX_SPACEDIM == 3)
-                amrex::Real zcell = static_cast<amrex::Real>(nz_tile - k);
-#else
-                amrex::Real zcell = static_cast<amrex::Real>(nz_tile - j);
-#endif
-                const amrex::Real phase = MathConst::pi*zcell/nz_guard;
-                const amrex::Real sin_phase = std::sin(phase);
-                const amrex::Real damp_factor = sin_phase*sin_phase;
-
-                Ex_arr(i,j,k,icomp) *= damp_factor;
-                Ey_arr(i,j,k,icomp) *= damp_factor;
-                Ez_arr(i,j,k,icomp) *= damp_factor;
-                Bx_arr(i,j,k,icomp) *= damp_factor;
-                By_arr(i,j,k,icomp) *= damp_factor;
-                Bz_arr(i,j,k,icomp) *= damp_factor;
-
-            });
+                damp_field_in_guards(Ey_arr, i, j, k, icomp, zdir, nz_domain, tey_smallEnd_z, tey_bigEnd_z);
+            },
+            tez, Efield[2]->nComp(), [=] AMREX_GPU_DEVICE (int i, int j, int k, int icomp)
+            {
+                damp_field_in_guards(Ez_arr, i, j, k, icomp, zdir, nz_domain, tez_smallEnd_z, tez_bigEnd_z);
+            }
+        );
 
-        }
+        amrex::ParallelFor(
+            tbx, Bfield[0]->nComp(), [=] AMREX_GPU_DEVICE (int i, int j, int k, int icomp)
+            {
+                damp_field_in_guards(Bx_arr, i, j, k, icomp, zdir, nz_domain, tbx_smallEnd_z, tbx_bigEnd_z);
+            },
+            tby, Bfield[1]->nComp(), [=] AMREX_GPU_DEVICE (int i, int j, int k, int icomp)
+            {
+                damp_field_in_guards(By_arr, i, j, k, icomp, zdir, nz_domain, tby_smallEnd_z, tby_bigEnd_z);
+            },
+            tbz, Bfield[2]->nComp(), [=] AMREX_GPU_DEVICE (int i, int j, int k, int icomp)
+            {
+                damp_field_in_guards(Bz_arr, i, j, k, icomp, zdir, nz_domain, tbz_smallEnd_z, tbz_bigEnd_z);
+            }
+        );
     }
 }
 
diff --git a/Source/FieldSolver/WarpXPushFieldsEM_K.H b/Source/FieldSolver/WarpXPushFieldsEM_K.H
new file mode 100644
index 000000000..7f6ca5f69
--- /dev/null
+++ b/Source/FieldSolver/WarpXPushFieldsEM_K.H
@@ -0,0 +1,109 @@
+/* Copyright 2019-2020
+ *
+ * This file is part of WarpX.
+ *
+ * License: BSD-3-Clause-LBNL
+ */
+
+#ifndef WarpXPushFieldsEM_K_h
+#define WarpXPushFieldsEM_K_h
+
+#include "Utils/WarpXConst.H"
+#include <AMReX.H>
+
+/*
+ * \brief Set a tilebox so that it only covers the lower/upper half of the guard cells.
+ *
+ * \param[in,out] tb tilebox to be modified
+ * \param[in] dir direction where the tilebox smallEnd/bigEnd is modified
+ * \param[in] n_domain number of cells in the whole simulation domain
+ * \param[in] tb_smallEnd smallEnd of the tilebox
+ * \param[in] tb_bigEnd bigEnd of the tilebox
+ */
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+void constrain_tilebox_to_guards(
+    amrex::Box& tb,
+    const int dir,
+    const int n_domain,
+    const int tb_smallEnd,
+    const int tb_bigEnd)
+{
+    using namespace amrex;
+
+    const int n_tile = tb_bigEnd;
+
+    if (tb_smallEnd < 0)
+    {
+        const int n_guard = -tb_smallEnd;
+        tb.setBig(dir, 0 - n_guard/2 - 1);
+    }
+    else if (n_tile > n_domain)
+    {
+        const int n_guard = n_tile - n_domain;
+        tb.setSmall(dir, n_domain + n_guard/2 + 1);
+    }
+}
+
+/*
+ * \brief Damp a given field in the guard cells along a given direction
+ *
+ * \param[in,out] mf_arr array that contains the field values to be damped
+ * \oaram[in] i index along x
+ * \oaram[in] j index along y (in 3D) or z (in 2D/RZ)
+ * \oaram[in] k index along z (in 3D, \c k = 0 in 2D/RZ)
+ * \param[in] icomp index along the fourth component of the array
+ * \param]in] dir direction where the field will be damped
+ * \param[in] n_domain number of cells in the whole simulation domain
+ * \param[in] tb_smallEnd smallEnd of the current tilebox
+ * \param[in] tb_bigEnd bigEnd of the current tilebox
+ */
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+void damp_field_in_guards(
+    amrex::Array4<amrex::Real> const& mf_arr,
+    const int i,
+    const int j,
+    const int k,
+    const int icomp,
+    const int dir,
+    const int n_domain,
+    const int tb_smallEnd,
+    const int tb_bigEnd)
+{
+    using namespace amrex;
+
+    // dir = 0: idx = i (x)
+    // dir = 1: idx = j (y in 3D, z in 2D/RZ)
+    // dir = 2: idx = k (z in 3D)
+    const int idx = ((dir == 0) ? i : ((dir == 1) ? j : k));
+
+    const int n_tile = tb_bigEnd;
+
+    if (tb_smallEnd < 0)
+    {
+        // Apply damping factor in guards cells below the lower end of the domain
+        const int n_guard = -tb_smallEnd;
+
+        const amrex::Real cell = static_cast<amrex::Real>(idx + n_guard);
+
+        const amrex::Real phase = MathConst::pi * cell / n_guard;
+        const amrex::Real sin_phase = std::sin(phase);
+        const amrex::Real damp_factor = sin_phase * sin_phase;
+
+        mf_arr(i,j,k,icomp) *= damp_factor;
+    }
+    else if (n_tile > n_domain)
+    {
+        // Apply damping factor in guards cells above the upper end of the domain
+        const int n_guard = n_tile - n_domain;
+
+        const amrex::Real cell = static_cast<amrex::Real>(n_tile - idx);
+
+        const amrex::Real phase = MathConst::pi * cell / n_guard;
+        const amrex::Real sin_phase = std::sin(phase);
+        const amrex::Real damp_factor = sin_phase * sin_phase;
+
+        mf_arr(i,j,k,icomp) *= damp_factor;
+    }
+}
+
+#endif