diff options
Diffstat (limited to 'Source/Parallelization')
-rw-r--r-- | Source/Parallelization/InterpolateCurrentFineToCoarse.H | 175 | ||||
-rw-r--r-- | Source/Parallelization/WarpXComm.cpp | 51 |
2 files changed, 199 insertions, 27 deletions
diff --git a/Source/Parallelization/InterpolateCurrentFineToCoarse.H b/Source/Parallelization/InterpolateCurrentFineToCoarse.H new file mode 100644 index 000000000..148b725d0 --- /dev/null +++ b/Source/Parallelization/InterpolateCurrentFineToCoarse.H @@ -0,0 +1,175 @@ +/* Copyright 2019 Axel Huebl, Weiqun Zhang + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef INTERPOLATECURRENTFINETOCOARSE_H +#define INTERPOLATECURRENTFINETOCOARSE_H + +#include <AMReX_Array4.H> +#include <AMReX_REAL.H> +#include <AMReX_BLassert.H> +#include <AMReX_Extension.H> +#include <AMReX_GpuQualifiers.H> + +#include <utility> // std::move + + +/** Fill a current coarse patch with averaged values from a fine patch + * + * Fills the values of the current for a selected component on the coarse patch + * by averaging the values of the current of the fine patch. + * + * @tparam IDim j-field component on which the averaging is performed + */ +template< int IDim > +class InterpolateCurrentFineToCoarse +{ +public: + /** Construct with fine and coarse patch and their refinement ratio + * + * @param[in] fine read-only fine patch + * @param[in,out] coarse overwritten coarse patch + * @param[in] refinement_ratio ratio between coarse and fine patch granularity + * (currently, only a value of is implemented) + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + InterpolateCurrentFineToCoarse( + amrex::Array4< amrex::Real const > const fine, + amrex::Array4< amrex::Real > const coarse, + int const refinement_ratio + ) : m_fine(std::move(fine)), + m_coarse(std::move(coarse)), + m_refinement_ratio(std::move(refinement_ratio)) + { + //! @note constants and stencils in operator() implementation assume 2x refinement + BL_ASSERT(refinement_ratio == 2); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + void + operator()( + int const i, + int const j, + int const k + ) const noexcept + { + auto const & fine_unsafe = m_fine; // out-of-bounds access not secured with zero-values yet + auto const & coarse = m_coarse; // out-of-bounds access not secured but will also not occur + + // return zero for out-of-bounds accesses during interpolation + // this is efficiently used as a method to add neutral elements beyond guards in the average below + auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l) noexcept + { + return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l) : amrex::Real{0.}; + }; + + int const ii = i * m_refinement_ratio; + int const jj = j * m_refinement_ratio; + int const kk = k * m_refinement_ratio; +#if AMREX_SPACEDIM == 2 + if (IDim == 0) { + coarse(i, j, k) = 0.25 * ( + fine(ii, jj, kk) + fine(ii + 1, jj, kk) + + 0.5 * ( + fine(ii, jj - 1, kk) + fine(ii + 1, jj - 1, kk) + + fine(ii, jj + 1, kk) + fine(ii + 1, jj + 1, kk) + ) + ); + } else if (IDim == 2) { + coarse(i, j, k) = 0.25 * ( + fine(ii, jj, kk) + fine(ii, jj + 1, kk) + + 0.5 * ( + fine(ii - 1, jj, kk) + fine(ii - 1, jj + 1, kk) + + fine(ii + 1, jj, kk) + fine(ii + 1, jj + 1, kk) + ) + ); + } else { + coarse(i, j, k) = 0.25 * ( + fine(ii, jj, kk) + + 0.5 * ( + fine(ii - 1, jj , kk) + fine(ii + 1, jj , kk) + + fine(ii , jj - 1, kk) + fine(ii , jj + 1, kk) + ) + + 0.25 * ( + fine(ii - 1, jj - 1, kk) + fine(ii + 1, jj - 1, kk) + + fine(ii - 1, jj + 1, kk) + fine(ii + 1, jj + 1, kk) + ) + ); + } +#elif AMREX_SPACEDIM == 3 + if (IDim == 0) { + coarse(i,j,k) = 0.125 * ( + fine(ii , jj, kk) + + 0.5 * ( + fine(ii , jj-1, kk ) + fine(ii , jj+1, kk ) + + fine(ii , jj , kk-1) + fine(ii , jj , kk+1) + ) + + 0.25 * ( + fine(ii , jj-1, kk-1) + fine(ii , jj+1, kk-1) + + fine(ii , jj-1, kk+1) + fine(ii , jj+1, kk+1) + ) + + fine(ii+1, jj, kk) + + 0.5 * ( + fine(ii+1, jj-1, kk ) + fine(ii+1, jj+1, kk ) + + fine(ii+1, jj , kk-1) + fine(ii+1, jj , kk+1) + ) + + 0.25 * ( + fine(ii+1, jj-1, kk-1) + fine(ii+1, jj+1, kk-1) + + fine(ii+1, jj-1, kk+1) + fine(ii+1, jj+1, kk+1) + ) + ); + } else if (IDim == 1) { + coarse(i, j, k) = 0.125 * ( + fine(ii, jj , kk) + + 0.5 * ( + fine(ii-1, jj , kk ) + fine(ii+1, jj , kk ) + + fine(ii , jj , kk-1) + fine(ii , jj , kk+1) + ) + + 0.25 * ( + fine(ii-1, jj , kk-1) + fine(ii+1, jj , kk-1) + + fine(ii-1, jj , kk+1) + fine(ii+1, jj , kk+1) + ) + + fine(ii, jj+1, kk) + + 0.5 * ( + fine(ii-1, jj+1, kk ) + fine(ii+1, jj+1, kk ) + + fine(ii , jj+1, kk-1) + fine(ii , jj+1, kk+1) + ) + + 0.25 * ( + fine(ii-1, jj+1, kk-1) + fine(ii+1, jj+1, kk-1) + + fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1) + ) + ); + } else { + coarse(i, j, k) = 0.125 * ( + fine(ii, jj, kk ) + + 0.5 * ( + fine(ii-1, jj , kk ) + fine(ii+1, jj , kk ) + + fine(ii , jj-1, kk ) + fine(ii , jj+1, kk ) + ) + + 0.25 * ( + fine(ii-1, jj-1, kk ) + fine(ii+1, jj-1, kk ) + + fine(ii-1, jj+1, kk ) + fine(ii+1, jj+1, kk ) + ) + + fine(ii, jj, kk+1) + + 0.5 * ( + fine(ii-1, jj , kk+1) + fine(ii+1, jj , kk+1) + + fine(ii , jj-1, kk+1) + fine(ii , jj+1, kk+1) + ) + + 0.25 * ( + fine(ii-1, jj-1, kk+1) + fine(ii+1, jj-1, kk+1) + + fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1) + ) + ); + } +#endif + } +private: + amrex::Array4< amrex::Real const > m_fine; + amrex::Array4< amrex::Real > m_coarse; + int m_refinement_ratio; +}; + +#endif // INTERPOLATECURRENTFINETOCOARSE_H diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index e67acec22..6128b27a4 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -2,6 +2,7 @@ #include <WarpX.H> #include <WarpX_f.H> #include <WarpXSumGuardCells.H> +#include <Parallelization/InterpolateCurrentFineToCoarse.H> #include <algorithm> #include <cstdlib> @@ -487,7 +488,7 @@ WarpX::SyncCurrent () std::array< MultiFab*,3> crse { current_cp[lev][0].get(), current_cp[lev][1].get(), current_cp[lev][2].get() }; - SyncCurrent(fine, crse, refinement_ratio[0]); + interpolateCurrentFineToCoarse(fine, crse, refinement_ratio[0]); } // For each level @@ -499,36 +500,35 @@ WarpX::SyncCurrent () } } -/** \brief Fills the values of the current on the coarse patch by - * averaging the values of the current of the fine patch (on the same level). - */ void -WarpX::SyncCurrent (const std::array<const amrex::MultiFab*,3>& fine, - const std::array< amrex::MultiFab*,3>& crse, - int refinement_ratio) +WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine, + std::array< amrex::MultiFab *, 3 > const & coarse, + int const refinement_ratio) { + BL_PROFILE("InterpolateCurrentFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); - const IntVect& ng = (fine[0]->nGrowVect() + 1) /refinement_ratio; + const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch #ifdef _OPEMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { - FArrayBox ffab; - for (int idim = 0; idim < 3; ++idim) + for (int idim = 0; idim < fine.size(); ++idim) // j-field components { - for (MFIter mfi(*crse[idim],true); mfi.isValid(); ++mfi) + // OMP in-box decomposition of coarse into tilebox + for (MFIter mfi(*coarse[idim], TilingIfNotGPU()); mfi.isValid(); ++mfi) { - const Box& bx = mfi.growntilebox(ng); - Box fbx = amrex::grow(amrex::refine(bx,refinement_ratio),1); - ffab.resize(fbx); - fbx &= (*fine[idim])[mfi].box(); - ffab.setVal(0.0); - ffab.copy((*fine[idim])[mfi], fbx, 0, fbx, 0, fine[idim]->nComp()); - WRPX_SYNC_CURRENT(bx.loVect(), bx.hiVect(), - BL_TO_FORTRAN_ANYD((*crse[idim])[mfi]), - BL_TO_FORTRAN_ANYD(ffab), - &idim); + const Box& bx = mfi.growntilebox(ng); // only grow to outer directions of tileboxes for filling guards + + auto const & arrFine = fine[idim]->const_array(mfi); + auto const & arrCoarse = coarse[idim]->array(mfi); + + if( idim == 0 ) + amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<0>(arrFine, arrCoarse, refinement_ratio) ); + else if( idim == 1 ) + amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<1>(arrFine, arrCoarse, refinement_ratio) ); + else if( idim == 2 ) + amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<2>(arrFine, arrCoarse, refinement_ratio) ); } } } @@ -558,9 +558,6 @@ WarpX::SyncRho () } } -/** \brief Fills the values of the charge density on the coarse patch by - * averaging the values of the charge density of the fine patch (on the same level). - */ void WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio) { @@ -569,7 +566,7 @@ WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio) const int nc = fine.nComp(); #ifdef _OPEMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { FArrayBox ffab; @@ -607,7 +604,7 @@ WarpX::RestrictCurrentFromFineToCoarsePatch (int lev) std::array< MultiFab*,3> crse { current_cp[lev][0].get(), current_cp[lev][1].get(), current_cp[lev][2].get() }; - SyncCurrent(fine, crse, refinement_ratio[0]); + interpolateCurrentFineToCoarse(fine, crse, refinement_ratio[0]); } void |