2 files changed, 199 insertions, 27 deletions
diff --git a/Source/Parallelization/InterpolateCurrentFineToCoarse.H b/Source/Parallelization/InterpolateCurrentFineToCoarse.H
new file mode 100644
index 000000000..148b725d0
--- /dev/null
+++ b/Source/Parallelization/InterpolateCurrentFineToCoarse.H
@@ -0,0 +1,175 @@
+/* Copyright 2019 Axel Huebl, Weiqun Zhang
+ *
+ * This file is part of WarpX.
+ *
+ * License: BSD-3-Clause-LBNL
+ */
+
+#ifndef INTERPOLATECURRENTFINETOCOARSE_H
+#define INTERPOLATECURRENTFINETOCOARSE_H
+
+#include <AMReX_Array4.H>
+#include <AMReX_REAL.H>
+#include <AMReX_BLassert.H>
+#include <AMReX_Extension.H>
+#include <AMReX_GpuQualifiers.H>
+
+#include <utility>  // std::move
+
+
+/** Fill a current coarse patch with averaged values from a fine patch
+ *
+ * Fills the values of the current for a selected component on the coarse patch
+ * by averaging the values of the current of the fine patch.
+ *
+ * @tparam IDim j-field component on which the averaging is performed
+ */
+template< int IDim >
+class InterpolateCurrentFineToCoarse
+{
+public:
+    /** Construct with fine and coarse patch and their refinement ratio
+     *
+     * @param[in]     fine read-only fine patch
+     * @param[in,out] coarse overwritten coarse patch
+     * @param[in]     refinement_ratio ratio between coarse and fine patch granularity
+     *                (currently, only a value of is implemented)
+     */
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    InterpolateCurrentFineToCoarse(
+        amrex::Array4< amrex::Real const > const fine,
+        amrex::Array4< amrex::Real       > const coarse,
+        int const refinement_ratio
+    ) : m_fine(std::move(fine)),
+        m_coarse(std::move(coarse)),
+        m_refinement_ratio(std::move(refinement_ratio))
+    {
+        //! @note constants and stencils in operator() implementation assume 2x refinement
+        BL_ASSERT(refinement_ratio == 2);
+    }
+
+    AMREX_GPU_DEVICE AMREX_FORCE_INLINE
+    void
+    operator()(
+        int const i,
+        int const j,
+        int const k
+    ) const noexcept
+    {
+        auto const & fine_unsafe = m_fine;   // out-of-bounds access not secured with zero-values yet
+        auto const & coarse      = m_coarse; // out-of-bounds access not secured but will also not occur
+
+        // return zero for out-of-bounds accesses during interpolation
+        // this is efficiently used as a method to add neutral elements beyond guards in the average below
+        auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l) noexcept
+        {
+            return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l) :  amrex::Real{0.};
+        };
+
+        int const ii = i * m_refinement_ratio;
+        int const jj = j * m_refinement_ratio;
+        int const kk = k * m_refinement_ratio;
+#if AMREX_SPACEDIM == 2
+        if (IDim == 0) {
+            coarse(i, j, k) = 0.25 * (
+                fine(ii, jj, kk) + fine(ii + 1, jj, kk) +
+                0.5  * (
+                    fine(ii, jj - 1, kk) + fine(ii + 1, jj - 1, kk) +
+                    fine(ii, jj + 1, kk) + fine(ii + 1, jj + 1, kk)
+                )
+            );
+        } else if (IDim == 2) {
+            coarse(i, j, k) = 0.25 * (
+                fine(ii, jj, kk) + fine(ii, jj + 1, kk) +
+                0.5  * (
+                    fine(ii - 1, jj, kk) + fine(ii - 1, jj + 1, kk) +
+                    fine(ii + 1, jj, kk) + fine(ii + 1, jj + 1, kk)
+                )
+            );
+        } else {
+            coarse(i, j, k) = 0.25 * (
+                fine(ii, jj, kk) +
+                0.5  * (
+                    fine(ii - 1, jj    , kk) + fine(ii + 1, jj    , kk) +
+                    fine(ii    , jj - 1, kk) + fine(ii    , jj + 1, kk)
+                ) +
+                0.25 * (
+                    fine(ii - 1, jj - 1, kk) + fine(ii + 1, jj - 1, kk) +
+                    fine(ii - 1, jj + 1, kk) + fine(ii + 1, jj + 1, kk)
+                )
+            );
+        }
+#elif AMREX_SPACEDIM == 3
+        if (IDim == 0) {
+            coarse(i,j,k) = 0.125 * (
+                fine(ii  , jj, kk) +
+                0.5  * (
+                    fine(ii  , jj-1, kk  ) + fine(ii  , jj+1, kk  ) +
+                    fine(ii  , jj  , kk-1) + fine(ii  , jj  , kk+1)
+                ) +
+                0.25 * (
+                    fine(ii  , jj-1, kk-1) + fine(ii  , jj+1, kk-1) +
+                    fine(ii  , jj-1, kk+1) + fine(ii  , jj+1, kk+1)
+                ) +
+                fine(ii+1, jj, kk) +
+                0.5  * (
+                    fine(ii+1, jj-1, kk  ) + fine(ii+1, jj+1, kk  ) +
+                    fine(ii+1, jj  , kk-1) + fine(ii+1, jj  , kk+1)
+                ) +
+                0.25 * (
+                    fine(ii+1, jj-1, kk-1) + fine(ii+1, jj+1, kk-1) +
+                    fine(ii+1, jj-1, kk+1) + fine(ii+1, jj+1, kk+1)
+                )
+            );
+        } else if (IDim == 1) {
+            coarse(i, j, k) = 0.125 * (
+                fine(ii, jj , kk) +
+                0.5  * (
+                    fine(ii-1, jj  , kk  ) + fine(ii+1, jj  , kk  ) +
+                    fine(ii  , jj  , kk-1) + fine(ii  , jj  , kk+1)
+                 ) +
+                 0.25 * (
+                    fine(ii-1, jj  , kk-1) + fine(ii+1, jj  , kk-1) +
+                    fine(ii-1, jj  , kk+1) + fine(ii+1, jj  , kk+1)
+                 ) +
+                 fine(ii, jj+1, kk) +
+                 0.5  * (
+                    fine(ii-1, jj+1, kk  ) + fine(ii+1, jj+1, kk  ) +
+                    fine(ii  , jj+1, kk-1) + fine(ii  , jj+1, kk+1)
+                 ) +
+                 0.25 * (
+                    fine(ii-1, jj+1, kk-1) + fine(ii+1, jj+1, kk-1) +
+                    fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1)
+                 )
+             );
+        } else {
+            coarse(i, j, k) = 0.125 * (
+                fine(ii, jj, kk  ) +
+                0.5  * (
+                    fine(ii-1, jj  , kk  ) + fine(ii+1, jj  , kk  ) +
+                    fine(ii  , jj-1, kk  ) + fine(ii  , jj+1, kk  )
+                ) +
+                0.25 * (
+                    fine(ii-1, jj-1, kk  ) + fine(ii+1, jj-1, kk  ) +
+                    fine(ii-1, jj+1, kk  ) + fine(ii+1, jj+1, kk  )
+                ) +
+                fine(ii, jj, kk+1) +
+                0.5  * (
+                    fine(ii-1, jj  , kk+1) + fine(ii+1, jj  , kk+1) +
+                    fine(ii  , jj-1, kk+1) + fine(ii  , jj+1, kk+1)
+                ) +
+                0.25 * (
+                    fine(ii-1, jj-1, kk+1) + fine(ii+1, jj-1, kk+1) +
+                    fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1)
+                )
+             );
+         }
+#endif
+    }
+private:
+    amrex::Array4< amrex::Real const > m_fine;
+    amrex::Array4< amrex::Real       > m_coarse;
+    int m_refinement_ratio;
+};
+
+#endif // INTERPOLATECURRENTFINETOCOARSE_H
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index e67acec22..6128b27a4 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -2,6 +2,7 @@
 #include <WarpX.H>
 #include <WarpX_f.H>
 #include <WarpXSumGuardCells.H>
+#include <Parallelization/InterpolateCurrentFineToCoarse.H>
 
 #include <algorithm>
 #include <cstdlib>
@@ -487,7 +488,7 @@ WarpX::SyncCurrent ()
         std::array<      MultiFab*,3> crse { current_cp[lev][0].get(),
                                              current_cp[lev][1].get(),
                                              current_cp[lev][2].get() };
-        SyncCurrent(fine, crse, refinement_ratio[0]);
+        interpolateCurrentFineToCoarse(fine, crse, refinement_ratio[0]);
     }
 
     // For each level
@@ -499,36 +500,35 @@ WarpX::SyncCurrent ()
     }
 }
 
-/** \brief Fills the values of the current on the coarse patch by
- *  averaging the values of the current of the fine patch (on the same level).
- */
 void
-WarpX::SyncCurrent (const std::array<const amrex::MultiFab*,3>& fine,
-                    const std::array<      amrex::MultiFab*,3>& crse,
-                    int refinement_ratio)
+WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine,
+                                        std::array< amrex::MultiFab       *, 3 > const & coarse,
+                                        int const refinement_ratio)
 {
+    BL_PROFILE("InterpolateCurrentFineToCoarse()");
     BL_ASSERT(refinement_ratio == 2);
-    const IntVect& ng = (fine[0]->nGrowVect() + 1) /refinement_ratio;
+    const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch
 
 #ifdef _OPEMP
-#pragma omp parallel
+#pragma omp parallel if (Gpu::notInLaunchRegion())
 #endif
     {
-        FArrayBox ffab;
-        for (int idim = 0; idim < 3; ++idim)
+        for (int idim = 0; idim < fine.size(); ++idim)  // j-field components
         {
-            for (MFIter mfi(*crse[idim],true); mfi.isValid(); ++mfi)
+            // OMP in-box decomposition of coarse into tilebox
+            for (MFIter mfi(*coarse[idim], TilingIfNotGPU()); mfi.isValid(); ++mfi)
             {
-                const Box& bx = mfi.growntilebox(ng);
-                Box fbx = amrex::grow(amrex::refine(bx,refinement_ratio),1);
-                ffab.resize(fbx);
-                fbx &= (*fine[idim])[mfi].box();
-                ffab.setVal(0.0);
-                ffab.copy((*fine[idim])[mfi], fbx, 0, fbx, 0, fine[idim]->nComp());
-                WRPX_SYNC_CURRENT(bx.loVect(), bx.hiVect(),
-                                   BL_TO_FORTRAN_ANYD((*crse[idim])[mfi]),
-                                   BL_TO_FORTRAN_ANYD(ffab),
-                                   &idim);
+                const Box& bx = mfi.growntilebox(ng); // only grow to outer directions of tileboxes for filling guards
+
+                auto const & arrFine = fine[idim]->const_array(mfi);
+                auto const & arrCoarse = coarse[idim]->array(mfi);
+
+                if( idim == 0 )
+                    amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<0>(arrFine, arrCoarse, refinement_ratio) );
+                else if( idim == 1 )
+                    amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<1>(arrFine, arrCoarse, refinement_ratio) );
+                else if( idim == 2 )
+                    amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<2>(arrFine, arrCoarse, refinement_ratio) );
             }
         }
     }
@@ -558,9 +558,6 @@ WarpX::SyncRho ()
     }
 }
 
-/** \brief Fills the values of the charge density on the coarse patch by
- *  averaging the values of the charge density of the fine patch (on the same level).
- */
 void
 WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio)
 {
@@ -569,7 +566,7 @@ WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio)
     const int nc = fine.nComp();
 
 #ifdef _OPEMP
-#pragma omp parallel
+#pragma omp parallel if (Gpu::notInLaunchRegion())
 #endif
     {
         FArrayBox ffab;
@@ -607,7 +604,7 @@ WarpX::RestrictCurrentFromFineToCoarsePatch (int lev)
     std::array<      MultiFab*,3> crse { current_cp[lev][0].get(),
                                          current_cp[lev][1].get(),
                                          current_cp[lev][2].get() };
-    SyncCurrent(fine, crse, refinement_ratio[0]);
+    interpolateCurrentFineToCoarse(fine, crse, refinement_ratio[0]);
 }
 
 void