aboutsummaryrefslogtreecommitdiff
path: root/Source/Parallelization
diff options
context:
space:
mode:
Diffstat (limited to 'Source/Parallelization')
-rw-r--r--Source/Parallelization/InterpolateCurrentFineToCoarse.H175
-rw-r--r--Source/Parallelization/Make.package1
-rw-r--r--Source/Parallelization/WarpXComm.cpp200
-rw-r--r--Source/Parallelization/WarpXComm_K.H161
-rw-r--r--Source/Parallelization/WarpXSumGuardCells.H6
5 files changed, 416 insertions, 127 deletions
diff --git a/Source/Parallelization/InterpolateCurrentFineToCoarse.H b/Source/Parallelization/InterpolateCurrentFineToCoarse.H
new file mode 100644
index 000000000..148b725d0
--- /dev/null
+++ b/Source/Parallelization/InterpolateCurrentFineToCoarse.H
@@ -0,0 +1,175 @@
+/* Copyright 2019 Axel Huebl, Weiqun Zhang
+ *
+ * This file is part of WarpX.
+ *
+ * License: BSD-3-Clause-LBNL
+ */
+
+#ifndef INTERPOLATECURRENTFINETOCOARSE_H
+#define INTERPOLATECURRENTFINETOCOARSE_H
+
+#include <AMReX_Array4.H>
+#include <AMReX_REAL.H>
+#include <AMReX_BLassert.H>
+#include <AMReX_Extension.H>
+#include <AMReX_GpuQualifiers.H>
+
+#include <utility> // std::move
+
+
+/** Fill a current coarse patch with averaged values from a fine patch
+ *
+ * Fills the values of the current for a selected component on the coarse patch
+ * by averaging the values of the current of the fine patch.
+ *
+ * @tparam IDim j-field component on which the averaging is performed
+ */
+template< int IDim >
+class InterpolateCurrentFineToCoarse
+{
+public:
+ /** Construct with fine and coarse patch and their refinement ratio
+ *
+ * @param[in] fine read-only fine patch
+ * @param[in,out] coarse overwritten coarse patch
+ * @param[in] refinement_ratio ratio between coarse and fine patch granularity
+ * (currently, only a value of is implemented)
+ */
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ InterpolateCurrentFineToCoarse(
+ amrex::Array4< amrex::Real const > const fine,
+ amrex::Array4< amrex::Real > const coarse,
+ int const refinement_ratio
+ ) : m_fine(std::move(fine)),
+ m_coarse(std::move(coarse)),
+ m_refinement_ratio(std::move(refinement_ratio))
+ {
+ //! @note constants and stencils in operator() implementation assume 2x refinement
+ BL_ASSERT(refinement_ratio == 2);
+ }
+
+ AMREX_GPU_DEVICE AMREX_FORCE_INLINE
+ void
+ operator()(
+ int const i,
+ int const j,
+ int const k
+ ) const noexcept
+ {
+ auto const & fine_unsafe = m_fine; // out-of-bounds access not secured with zero-values yet
+ auto const & coarse = m_coarse; // out-of-bounds access not secured but will also not occur
+
+ // return zero for out-of-bounds accesses during interpolation
+ // this is efficiently used as a method to add neutral elements beyond guards in the average below
+ auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l) noexcept
+ {
+ return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l) : amrex::Real{0.};
+ };
+
+ int const ii = i * m_refinement_ratio;
+ int const jj = j * m_refinement_ratio;
+ int const kk = k * m_refinement_ratio;
+#if AMREX_SPACEDIM == 2
+ if (IDim == 0) {
+ coarse(i, j, k) = 0.25 * (
+ fine(ii, jj, kk) + fine(ii + 1, jj, kk) +
+ 0.5 * (
+ fine(ii, jj - 1, kk) + fine(ii + 1, jj - 1, kk) +
+ fine(ii, jj + 1, kk) + fine(ii + 1, jj + 1, kk)
+ )
+ );
+ } else if (IDim == 2) {
+ coarse(i, j, k) = 0.25 * (
+ fine(ii, jj, kk) + fine(ii, jj + 1, kk) +
+ 0.5 * (
+ fine(ii - 1, jj, kk) + fine(ii - 1, jj + 1, kk) +
+ fine(ii + 1, jj, kk) + fine(ii + 1, jj + 1, kk)
+ )
+ );
+ } else {
+ coarse(i, j, k) = 0.25 * (
+ fine(ii, jj, kk) +
+ 0.5 * (
+ fine(ii - 1, jj , kk) + fine(ii + 1, jj , kk) +
+ fine(ii , jj - 1, kk) + fine(ii , jj + 1, kk)
+ ) +
+ 0.25 * (
+ fine(ii - 1, jj - 1, kk) + fine(ii + 1, jj - 1, kk) +
+ fine(ii - 1, jj + 1, kk) + fine(ii + 1, jj + 1, kk)
+ )
+ );
+ }
+#elif AMREX_SPACEDIM == 3
+ if (IDim == 0) {
+ coarse(i,j,k) = 0.125 * (
+ fine(ii , jj, kk) +
+ 0.5 * (
+ fine(ii , jj-1, kk ) + fine(ii , jj+1, kk ) +
+ fine(ii , jj , kk-1) + fine(ii , jj , kk+1)
+ ) +
+ 0.25 * (
+ fine(ii , jj-1, kk-1) + fine(ii , jj+1, kk-1) +
+ fine(ii , jj-1, kk+1) + fine(ii , jj+1, kk+1)
+ ) +
+ fine(ii+1, jj, kk) +
+ 0.5 * (
+ fine(ii+1, jj-1, kk ) + fine(ii+1, jj+1, kk ) +
+ fine(ii+1, jj , kk-1) + fine(ii+1, jj , kk+1)
+ ) +
+ 0.25 * (
+ fine(ii+1, jj-1, kk-1) + fine(ii+1, jj+1, kk-1) +
+ fine(ii+1, jj-1, kk+1) + fine(ii+1, jj+1, kk+1)
+ )
+ );
+ } else if (IDim == 1) {
+ coarse(i, j, k) = 0.125 * (
+ fine(ii, jj , kk) +
+ 0.5 * (
+ fine(ii-1, jj , kk ) + fine(ii+1, jj , kk ) +
+ fine(ii , jj , kk-1) + fine(ii , jj , kk+1)
+ ) +
+ 0.25 * (
+ fine(ii-1, jj , kk-1) + fine(ii+1, jj , kk-1) +
+ fine(ii-1, jj , kk+1) + fine(ii+1, jj , kk+1)
+ ) +
+ fine(ii, jj+1, kk) +
+ 0.5 * (
+ fine(ii-1, jj+1, kk ) + fine(ii+1, jj+1, kk ) +
+ fine(ii , jj+1, kk-1) + fine(ii , jj+1, kk+1)
+ ) +
+ 0.25 * (
+ fine(ii-1, jj+1, kk-1) + fine(ii+1, jj+1, kk-1) +
+ fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1)
+ )
+ );
+ } else {
+ coarse(i, j, k) = 0.125 * (
+ fine(ii, jj, kk ) +
+ 0.5 * (
+ fine(ii-1, jj , kk ) + fine(ii+1, jj , kk ) +
+ fine(ii , jj-1, kk ) + fine(ii , jj+1, kk )
+ ) +
+ 0.25 * (
+ fine(ii-1, jj-1, kk ) + fine(ii+1, jj-1, kk ) +
+ fine(ii-1, jj+1, kk ) + fine(ii+1, jj+1, kk )
+ ) +
+ fine(ii, jj, kk+1) +
+ 0.5 * (
+ fine(ii-1, jj , kk+1) + fine(ii+1, jj , kk+1) +
+ fine(ii , jj-1, kk+1) + fine(ii , jj+1, kk+1)
+ ) +
+ 0.25 * (
+ fine(ii-1, jj-1, kk+1) + fine(ii+1, jj-1, kk+1) +
+ fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1)
+ )
+ );
+ }
+#endif
+ }
+private:
+ amrex::Array4< amrex::Real const > m_fine;
+ amrex::Array4< amrex::Real > m_coarse;
+ int m_refinement_ratio;
+};
+
+#endif // INTERPOLATECURRENTFINETOCOARSE_H
diff --git a/Source/Parallelization/Make.package b/Source/Parallelization/Make.package
index 3d1fcf1da..c74583522 100644
--- a/Source/Parallelization/Make.package
+++ b/Source/Parallelization/Make.package
@@ -1,6 +1,7 @@
CEXE_sources += WarpXComm.cpp
CEXE_sources += WarpXRegrid.cpp
CEXE_headers += WarpXSumGuardCells.H
+CEXE_headers += WarpXComm_K.H
INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Parallelization
VPATH_LOCATIONS += $(WARPX_HOME)/Source/Parallelization
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index 1c8c37cad..4f870e79c 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -1,8 +1,8 @@
+#include <WarpXComm_K.H>
#include <WarpX.H>
#include <WarpX_f.H>
#include <WarpXSumGuardCells.H>
-
-#include <AMReX_FillPatchUtil_F.H>
+#include <Parallelization/InterpolateCurrentFineToCoarse.H>
#include <algorithm>
#include <cstdlib>
@@ -52,8 +52,6 @@ WarpX::UpdateAuxilaryData ()
{
BL_PROFILE("UpdateAuxilaryData()");
- const int use_limiter = 0;
-
for (int lev = 1; lev <= finest_level; ++lev)
{
const auto& crse_period = Geom(lev-1).periodicity();
@@ -81,57 +79,37 @@ WarpX::UpdateAuxilaryData ()
MultiFab::Subtract(dBy, *Bfield_cp[lev][1], 0, 0, Bfield_cp[lev][1]->nComp(), ng);
MultiFab::Subtract(dBz, *Bfield_cp[lev][2], 0, 0, Bfield_cp[lev][2]->nComp(), ng);
- const Real* dx = Geom(lev-1).CellSize();
const int refinement_ratio = refRatio(lev-1)[0];
+ AMREX_ALWAYS_ASSERT(refinement_ratio == 2);
+
#ifdef _OPENMP
-#pragma omp parallel
+#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
+ for (MFIter mfi(*Bfield_aux[lev][0]); mfi.isValid(); ++mfi)
{
- std::array<FArrayBox,3> bfab;
- for (MFIter mfi(*Bfield_aux[lev][0]); mfi.isValid(); ++mfi)
+ Array4<Real> const& bx_aux = Bfield_aux[lev][0]->array(mfi);
+ Array4<Real> const& by_aux = Bfield_aux[lev][1]->array(mfi);
+ Array4<Real> const& bz_aux = Bfield_aux[lev][2]->array(mfi);
+ Array4<Real const> const& bx_fp = Bfield_fp[lev][0]->const_array(mfi);
+ Array4<Real const> const& by_fp = Bfield_fp[lev][1]->const_array(mfi);
+ Array4<Real const> const& bz_fp = Bfield_fp[lev][2]->const_array(mfi);
+ Array4<Real const> const& bx_c = dBx.const_array(mfi);
+ Array4<Real const> const& by_c = dBy.const_array(mfi);
+ Array4<Real const> const& bz_c = dBz.const_array(mfi);
+
+ amrex::ParallelFor(Box(bx_aux), Box(by_aux), Box(bz_aux),
+ [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept
{
- Box ccbx = mfi.fabbox();
- ccbx.enclosedCells();
- ccbx.coarsen(refinement_ratio).refine(refinement_ratio); // so that ccbx is coarsenable
-
- const FArrayBox& cxfab = dBx[mfi];
- const FArrayBox& cyfab = dBy[mfi];
- const FArrayBox& czfab = dBz[mfi];
- bfab[0].resize(amrex::convert(ccbx,Bx_nodal_flag));
- bfab[1].resize(amrex::convert(ccbx,By_nodal_flag));
- bfab[2].resize(amrex::convert(ccbx,Bz_nodal_flag));
-
-#if (AMREX_SPACEDIM == 3)
- amrex_interp_div_free_bfield(ccbx.loVect(), ccbx.hiVect(),
- BL_TO_FORTRAN_ANYD(bfab[0]),
- BL_TO_FORTRAN_ANYD(bfab[1]),
- BL_TO_FORTRAN_ANYD(bfab[2]),
- BL_TO_FORTRAN_ANYD(cxfab),
- BL_TO_FORTRAN_ANYD(cyfab),
- BL_TO_FORTRAN_ANYD(czfab),
- dx, &refinement_ratio,&use_limiter);
-#else
- amrex_interp_div_free_bfield(ccbx.loVect(), ccbx.hiVect(),
- BL_TO_FORTRAN_ANYD(bfab[0]),
- BL_TO_FORTRAN_ANYD(bfab[2]),
- BL_TO_FORTRAN_ANYD(cxfab),
- BL_TO_FORTRAN_ANYD(czfab),
- dx, &refinement_ratio,&use_limiter);
- amrex_interp_cc_bfield(ccbx.loVect(), ccbx.hiVect(),
- BL_TO_FORTRAN_ANYD(bfab[1]),
- BL_TO_FORTRAN_ANYD(cyfab),
- &refinement_ratio,&use_limiter);
-#endif
-
- for (int idim = 0; idim < 3; ++idim)
- {
- FArrayBox& aux = (*Bfield_aux[lev][idim])[mfi];
- FArrayBox& fp = (*Bfield_fp[lev][idim])[mfi];
- const Box& bx = aux.box();
- aux.copy(fp, bx, 0, bx, 0, 1);
- aux.plus(bfab[idim], bx, bx, 0, 0, 1);
- }
- }
+ warpx_interp_bfield_x(j,k,l, bx_aux, bx_fp, bx_c);
+ },
+ [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept
+ {
+ warpx_interp_bfield_y(j,k,l, by_aux, by_fp, by_c);
+ },
+ [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept
+ {
+ warpx_interp_bfield_z(j,k,l, bz_aux, bz_fp, bz_c);
+ });
}
}
@@ -156,56 +134,34 @@ WarpX::UpdateAuxilaryData ()
MultiFab::Subtract(dEy, *Efield_cp[lev][1], 0, 0, Efield_cp[lev][1]->nComp(), ng);
MultiFab::Subtract(dEz, *Efield_cp[lev][2], 0, 0, Efield_cp[lev][2]->nComp(), ng);
- const int refinement_ratio = refRatio(lev-1)[0];
#ifdef _OPEMP
-#pragma omp parallel
+#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
+ for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi)
{
- std::array<FArrayBox,3> efab;
- for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi)
+ Array4<Real> const& ex_aux = Efield_aux[lev][0]->array(mfi);
+ Array4<Real> const& ey_aux = Efield_aux[lev][1]->array(mfi);
+ Array4<Real> const& ez_aux = Efield_aux[lev][2]->array(mfi);
+ Array4<Real const> const& ex_fp = Efield_fp[lev][0]->const_array(mfi);
+ Array4<Real const> const& ey_fp = Efield_fp[lev][1]->const_array(mfi);
+ Array4<Real const> const& ez_fp = Efield_fp[lev][2]->const_array(mfi);
+ Array4<Real const> const& ex_c = dEx.const_array(mfi);
+ Array4<Real const> const& ey_c = dEy.const_array(mfi);
+ Array4<Real const> const& ez_c = dEz.const_array(mfi);
+
+ amrex::ParallelFor(Box(ex_aux), Box(ey_aux), Box(ez_aux),
+ [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept
{
- Box ccbx = mfi.fabbox();
- ccbx.enclosedCells();
- ccbx.coarsen(refinement_ratio).refine(refinement_ratio); // so that ccbx is coarsenable
-
- const FArrayBox& cxfab = dEx[mfi];
- const FArrayBox& cyfab = dEy[mfi];
- const FArrayBox& czfab = dEz[mfi];
- efab[0].resize(amrex::convert(ccbx,Ex_nodal_flag));
- efab[1].resize(amrex::convert(ccbx,Ey_nodal_flag));
- efab[2].resize(amrex::convert(ccbx,Ez_nodal_flag));
-
-#if (AMREX_SPACEDIM == 3)
- amrex_interp_efield(ccbx.loVect(), ccbx.hiVect(),
- BL_TO_FORTRAN_ANYD(efab[0]),
- BL_TO_FORTRAN_ANYD(efab[1]),
- BL_TO_FORTRAN_ANYD(efab[2]),
- BL_TO_FORTRAN_ANYD(cxfab),
- BL_TO_FORTRAN_ANYD(cyfab),
- BL_TO_FORTRAN_ANYD(czfab),
- &refinement_ratio,&use_limiter);
-#else
- amrex_interp_efield(ccbx.loVect(), ccbx.hiVect(),
- BL_TO_FORTRAN_ANYD(efab[0]),
- BL_TO_FORTRAN_ANYD(efab[2]),
- BL_TO_FORTRAN_ANYD(cxfab),
- BL_TO_FORTRAN_ANYD(czfab),
- &refinement_ratio,&use_limiter);
- amrex_interp_nd_efield(ccbx.loVect(), ccbx.hiVect(),
- BL_TO_FORTRAN_ANYD(efab[1]),
- BL_TO_FORTRAN_ANYD(cyfab),
- &refinement_ratio);
-#endif
-
- for (int idim = 0; idim < 3; ++idim)
- {
- FArrayBox& aux = (*Efield_aux[lev][idim])[mfi];
- FArrayBox& fp = (*Efield_fp[lev][idim])[mfi];
- const Box& bx = aux.box();
- aux.copy(fp, bx, 0, bx, 0, Efield_fp[lev][idim]->nComp());
- aux.plus(efab[idim], bx, bx, 0, 0, Efield_fp[lev][idim]->nComp());
- }
- }
+ warpx_interp_efield_x(j,k,l, ex_aux, ex_fp, ex_c);
+ },
+ [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept
+ {
+ warpx_interp_efield_y(j,k,l, ey_aux, ey_fp, ey_c);
+ },
+ [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept
+ {
+ warpx_interp_efield_z(j,k,l, ez_aux, ez_fp, ez_c);
+ });
}
}
}
@@ -381,7 +337,7 @@ WarpX::SyncCurrent ()
std::array< MultiFab*,3> crse { current_cp[lev][0].get(),
current_cp[lev][1].get(),
current_cp[lev][2].get() };
- SyncCurrent(fine, crse, refinement_ratio[0]);
+ interpolateCurrentFineToCoarse(fine, crse, refinement_ratio[0]);
}
// For each level
@@ -393,36 +349,35 @@ WarpX::SyncCurrent ()
}
}
-/** \brief Fills the values of the current on the coarse patch by
- * averaging the values of the current of the fine patch (on the same level).
- */
void
-WarpX::SyncCurrent (const std::array<const amrex::MultiFab*,3>& fine,
- const std::array< amrex::MultiFab*,3>& crse,
- int refinement_ratio)
+WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine,
+ std::array< amrex::MultiFab *, 3 > const & coarse,
+ int const refinement_ratio)
{
+ BL_PROFILE("InterpolateCurrentFineToCoarse()");
BL_ASSERT(refinement_ratio == 2);
- const IntVect& ng = (fine[0]->nGrowVect() + 1) /refinement_ratio;
+ const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch
#ifdef _OPEMP
-#pragma omp parallel
+#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
{
- FArrayBox ffab;
- for (int idim = 0; idim < 3; ++idim)
+ for (int idim = 0; idim < fine.size(); ++idim) // j-field components
{
- for (MFIter mfi(*crse[idim],true); mfi.isValid(); ++mfi)
+ // OMP in-box decomposition of coarse into tilebox
+ for (MFIter mfi(*coarse[idim], TilingIfNotGPU()); mfi.isValid(); ++mfi)
{
- const Box& bx = mfi.growntilebox(ng);
- Box fbx = amrex::grow(amrex::refine(bx,refinement_ratio),1);
- ffab.resize(fbx);
- fbx &= (*fine[idim])[mfi].box();
- ffab.setVal(0.0);
- ffab.copy((*fine[idim])[mfi], fbx, 0, fbx, 0, fine[idim]->nComp());
- WRPX_SYNC_CURRENT(bx.loVect(), bx.hiVect(),
- BL_TO_FORTRAN_ANYD((*crse[idim])[mfi]),
- BL_TO_FORTRAN_ANYD(ffab),
- &idim);
+ const Box& bx = mfi.growntilebox(ng); // only grow to outer directions of tileboxes for filling guards
+
+ auto const & arrFine = fine[idim]->const_array(mfi);
+ auto const & arrCoarse = coarse[idim]->array(mfi);
+
+ if( idim == 0 )
+ amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<0>(arrFine, arrCoarse, refinement_ratio) );
+ else if( idim == 1 )
+ amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<1>(arrFine, arrCoarse, refinement_ratio) );
+ else if( idim == 2 )
+ amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<2>(arrFine, arrCoarse, refinement_ratio) );
}
}
}
@@ -452,9 +407,6 @@ WarpX::SyncRho ()
}
}
-/** \brief Fills the values of the charge density on the coarse patch by
- * averaging the values of the charge density of the fine patch (on the same level).
- */
void
WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio)
{
@@ -463,7 +415,7 @@ WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio)
const int nc = fine.nComp();
#ifdef _OPEMP
-#pragma omp parallel
+#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
{
FArrayBox ffab;
@@ -501,7 +453,7 @@ WarpX::RestrictCurrentFromFineToCoarsePatch (int lev)
std::array< MultiFab*,3> crse { current_cp[lev][0].get(),
current_cp[lev][1].get(),
current_cp[lev][2].get() };
- SyncCurrent(fine, crse, refinement_ratio[0]);
+ interpolateCurrentFineToCoarse(fine, crse, refinement_ratio[0]);
}
void
@@ -688,7 +640,7 @@ WarpX::AddRhoFromFineLevelandSumBoundary(int lev, int icomp, int ncomp)
}
else if (charge_buf[lev+1]) // but no filter
{
- MultiFab::Copy(*charge_buf[lev+1],
+ MultiFab::Add(*charge_buf[lev+1],
*rho_cp[lev+1], icomp, icomp, ncomp,
rho_cp[lev+1]->nGrow());
mf.ParallelAdd(*charge_buf[lev+1], icomp, 0,
diff --git a/Source/Parallelization/WarpXComm_K.H b/Source/Parallelization/WarpXComm_K.H
new file mode 100644
index 000000000..093323ec3
--- /dev/null
+++ b/Source/Parallelization/WarpXComm_K.H
@@ -0,0 +1,161 @@
+#ifndef WARPX_COMM_K_H_
+#define WARPX_COMM_K_H_
+
+#include <AMReX_FArrayBox.H>
+
+AMREX_GPU_DEVICE AMREX_FORCE_INLINE
+void warpx_interp_bfield_x (int j, int k, int l,
+ amrex::Array4<amrex::Real> const& Bxa,
+ amrex::Array4<amrex::Real const> const& Bxf,
+ amrex::Array4<amrex::Real const> const& Bxc)
+{
+ using namespace amrex;
+
+ int lg = amrex::coarsen(l,2);
+ int kg = amrex::coarsen(k,2);
+ int jg = amrex::coarsen(j,2);
+
+ Real wx = (j == jg*2) ? 0.0 : 0.5;
+ Real owx = 1.0-wx;
+ Bxa(j,k,l) = owx * Bxc(jg,kg,lg) + wx * Bxc(jg+1,kg,lg) + Bxf(j,k,l);
+}
+
+AMREX_GPU_DEVICE AMREX_FORCE_INLINE
+void warpx_interp_bfield_y (int j, int k, int l,
+ amrex::Array4<amrex::Real> const& Bya,
+ amrex::Array4<amrex::Real const> const& Byf,
+ amrex::Array4<amrex::Real const> const& Byc)
+{
+ using namespace amrex;
+
+ int lg = amrex::coarsen(l,2);
+ int kg = amrex::coarsen(k,2);
+ int jg = amrex::coarsen(j,2);
+
+ // Note that for 2d, l=0, because the amrex convention is used here.
+
+#if (AMREX_SPACEDIM == 3)
+ Real wy = (k == kg*2) ? 0.0 : 0.5;
+ Real owy = 1.0-wy;
+ Bya(j,k,l) = owy * Byc(jg,kg,lg) + wy * Byc(jg,kg+1,lg) + Byf(j,k,l);
+#else
+ Bya(j,k,l) = Byc(jg,kg,lg) + Byf(j,k,l);
+#endif
+}
+
+AMREX_GPU_DEVICE AMREX_FORCE_INLINE
+void warpx_interp_bfield_z (int j, int k, int l,
+ amrex::Array4<amrex::Real> const& Bza,
+ amrex::Array4<amrex::Real const> const& Bzf,
+ amrex::Array4<amrex::Real const> const& Bzc)
+{
+ using namespace amrex;
+
+ int lg = amrex::coarsen(l,2);
+ int kg = amrex::coarsen(k,2);
+ int jg = amrex::coarsen(j,2);
+
+ // Note that for 2d, l=0, because the amrex convention is used here.
+
+#if (AMREX_SPACEDIM == 3)
+ Real wz = (l == lg*2) ? 0.0 : 0.5;
+ Real owz = 1.0-wz;
+ Bza(j,k,l) = owz * Bzc(jg,kg,lg) + owz * Bzc(jg,kg,lg+1) + Bzf(j,k,l);
+#else
+ Real wy = (k == kg*2) ? 0.0 : 0.5;
+ Real owy = 1.0-wy;
+ Bza(j,k,l) = owy * Bzc(jg,kg,lg) + owy * Bzc(jg,kg+1,lg) + Bzf(j,k,l);
+#endif
+}
+
+AMREX_GPU_DEVICE AMREX_FORCE_INLINE
+void warpx_interp_efield_x (int j, int k, int l,
+ amrex::Array4<amrex::Real> const& Exa,
+ amrex::Array4<amrex::Real const> const& Exf,
+ amrex::Array4<amrex::Real const> const& Exc)
+{
+ using namespace amrex;
+
+ int lg = amrex::coarsen(l,2);
+ int kg = amrex::coarsen(k,2);
+ int jg = amrex::coarsen(j,2);
+
+ Real wy = (k == kg*2) ? 0.0 : 0.5;
+ Real owy = 1.0-wy;
+
+#if (AMREX_SPACEDIM == 3)
+ Real wz = (l == lg*2) ? 0.0 : 0.5;
+ Real owz = 1.0-wz;
+ Exa(j,k,l) = owy * owz * Exc(jg ,kg ,lg )
+ + wy * owz * Exc(jg ,kg+1,lg )
+ + owy * wz * Exc(jg ,kg ,lg+1)
+ + wy * wz * Exc(jg ,kg+1,lg+1)
+ + Exf(j,k,l);
+#else
+ Exa(j,k,l) = owy * Exc(jg,kg,lg) + wy * Exc(jg,kg+1,lg) + Exf(j,k,l);
+#endif
+}
+
+AMREX_GPU_DEVICE AMREX_FORCE_INLINE
+void warpx_interp_efield_y (int j, int k, int l,
+ amrex::Array4<amrex::Real> const& Eya,
+ amrex::Array4<amrex::Real const> const& Eyf,
+ amrex::Array4<amrex::Real const> const& Eyc)
+{
+ using namespace amrex;
+
+ int lg = amrex::coarsen(l,2);
+ int kg = amrex::coarsen(k,2);
+ int jg = amrex::coarsen(j,2);
+
+ Real wx = (j == jg*2) ? 0.0 : 0.5;
+ Real owx = 1.0-wx;
+
+#if (AMREX_SPACEDIM == 3)
+ Real wz = (l == lg*2) ? 0.0 : 0.5;
+ Real owz = 1.0-wz;
+ Eya(j,k,l) = owx * owz * Eyc(jg ,kg ,lg )
+ + wx * owz * Eyc(jg+1,kg ,lg )
+ + owx * wz * Eyc(jg ,kg ,lg+1)
+ + wx * wz * Eyc(jg+1,kg ,lg+1)
+ + Eyf(j,k,l);
+#else
+ Real wy = (k == kg*2) ? 0.0 : 0.5;
+ Real owy = 1.0-wy;
+ Eya(j,k,l) = owx * owy * Eyc(jg ,kg ,lg)
+ + wx * owy * Eyc(jg+1,kg ,lg)
+ + owx * wy * Eyc(jg ,kg+1,lg)
+ + wx * wy * Eyc(jg+1,kg+1,lg)
+ + Eyf(j,k,l);
+#endif
+}
+
+AMREX_GPU_DEVICE AMREX_FORCE_INLINE
+void warpx_interp_efield_z (int j, int k, int l,
+ amrex::Array4<amrex::Real> const& Eza,
+ amrex::Array4<amrex::Real const> const& Ezf,
+ amrex::Array4<amrex::Real const> const& Ezc)
+{
+ using namespace amrex;
+
+ int lg = amrex::coarsen(l,2);
+ int kg = amrex::coarsen(k,2);
+ int jg = amrex::coarsen(j,2);
+
+ Real wx = (j == jg*2) ? 0.0 : 0.5;
+ Real owx = 1.0-wx;
+
+#if (AMREX_SPACEDIM == 3)
+ Real wy = (k == kg*2) ? 0.0 : 0.5;
+ Real owy = 1.0-wy;
+ Eza(j,k,l) = owx * owy * Ezc(jg ,kg ,lg )
+ + wx * owy * Ezc(jg+1,kg ,lg )
+ + owx * wy * Ezc(jg ,kg+1,lg )
+ + wx * wy * Ezc(jg+1,kg+1,lg )
+ + Ezf(j,k,l);
+#else
+ Eza(j,k,l) = owx * Ezc(jg,kg,lg) + wx * Ezc(jg+1,kg,lg) + Ezf(j,k,l);
+#endif
+}
+
+#endif
diff --git a/Source/Parallelization/WarpXSumGuardCells.H b/Source/Parallelization/WarpXSumGuardCells.H
index 24ad1b80f..ce353c2b6 100644
--- a/Source/Parallelization/WarpXSumGuardCells.H
+++ b/Source/Parallelization/WarpXSumGuardCells.H
@@ -15,7 +15,7 @@
* updates both the *valid* cells and *guard* cells. (This is because a
* spectral solver requires the value of the sources over a large stencil.)
*/
-void
+inline void
WarpXSumGuardCells(amrex::MultiFab& mf, const amrex::Periodicity& period,
const int icomp=0, const int ncomp=1){
#ifdef WARPX_USE_PSATD
@@ -43,7 +43,7 @@ WarpXSumGuardCells(amrex::MultiFab& mf, const amrex::Periodicity& period,
* Note: `i_comp` is the component where the results will be stored in `dst`;
* The component from which we copy in `src` is always 0.
*/
-void
+inline void
WarpXSumGuardCells(amrex::MultiFab& dst, amrex::MultiFab& src,
const amrex::Periodicity& period,
const int icomp=0, const int ncomp=1){
@@ -54,7 +54,7 @@ WarpXSumGuardCells(amrex::MultiFab& dst, amrex::MultiFab& src,
// Update only the valid cells
const amrex::IntVect n_updated_guards = amrex::IntVect::TheZeroVector();
#endif
- src.SumBoundary(icomp, ncomp, n_updated_guards, period);
+ src.SumBoundary(0, ncomp, n_updated_guards, period);
amrex::Copy( dst, src, 0, icomp, ncomp, n_updated_guards );
}