diff options
Diffstat (limited to 'Source/Parallelization')
-rw-r--r-- | Source/Parallelization/InterpolateDensityFineToCoarse.H | 120 | ||||
-rw-r--r-- | Source/Parallelization/WarpXComm.cpp | 35 |
2 files changed, 138 insertions, 17 deletions
diff --git a/Source/Parallelization/InterpolateDensityFineToCoarse.H b/Source/Parallelization/InterpolateDensityFineToCoarse.H new file mode 100644 index 000000000..947db2aac --- /dev/null +++ b/Source/Parallelization/InterpolateDensityFineToCoarse.H @@ -0,0 +1,120 @@ +/* Copyright 2019 Axel Huebl, Weiqun Zhang + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef INTERPOLATE_DENSITY_FINE_TO_COARSE_H +#define INTERPOLATE_DENSITY_FINE_TO_COARSE_H + +#include <AMReX_Array4.H> +#include <AMReX_REAL.H> +#include <AMReX_BLassert.H> +#include <AMReX_Extension.H> +#include <AMReX_GpuQualifiers.H> + +#include <utility> // std::move + + +/** Fill a charge density (rho) coarse patch with averaged values from a fine patch + * + * Fills the values of the charge density on the coarse patch + * by averaging the values of the charge density of the fine patch. + */ +class InterpolateDensityFineToCoarse +{ +public: + /** Construct with fine and coarse patch and their refinement ratio + * + * @param[in] fine read-only fine patch + * @param[in,out] coarse overwritten coarse patch + * @param[in] refinement_ratio ratio between coarse and fine patch granularity + * (currently, only a value of is implemented) + * @param[in] number_of_components the number of components + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + InterpolateDensityFineToCoarse( + amrex::Array4<amrex::Real const> const fine, + amrex::Array4<amrex::Real > const coarse, + int const refinement_ratio, + int const number_of_components + ) : m_fine(std::move(fine)), + m_coarse(std::move(coarse)), + m_refinement_ratio(std::move(refinement_ratio)), + m_number_of_components(std::move(number_of_components)) + { + //! @note constants and stencils in operator() implementation assume 2x refinement + BL_ASSERT(refinement_ratio == 2); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + void + operator()( + int const i, + int const j, + int const k + ) const noexcept + { + using namespace amrex; + + auto const & fine_unsafe = m_fine; // out-of-bounds access not secured with zero-values yet + auto const & coarse = m_coarse; // out-of-bounds access not secured but will also not occur + + // return zero for out-of-bounds accesses during interpolation + // this is efficiently used as a method to add neutral elements beyond guards in the average below + auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l, int const m) noexcept + { + return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l, m) : amrex::Real{0.}; + }; + + int const ii = i * m_refinement_ratio; + int const jj = j * m_refinement_ratio; + int const kk = k * m_refinement_ratio; + for( int m = 0; m < m_number_of_components; ++m ) { +#if AMREX_SPACEDIM == 2 + coarse(i,j,kk,m) = 0.25_rt * ( + fine(ii,jj,kk,m) + + 0.5_rt * ( + fine(ii-1,jj ,kk,m) + fine(ii+1,jj ,kk,m) + + fine(ii ,jj-1,kk,m) + fine(ii ,jj+1,kk,m) + ) + + 0.25_rt * ( + fine(ii-1,jj-1,kk,m) + fine(ii+1,jj-1,kk,m) + + fine(ii-1,jj+1,kk,m) + fine(ii+1,jj+1,kk,m) + ) + ); +#elif AMREX_SPACEDIM == 3 + coarse(i,j,k,m) = 0.125_rt * ( + fine(ii,jj,kk,m) + + 0.5_rt * ( + fine(ii-1,jj ,kk ,m) + fine(ii+1,jj ,kk ,m) + + fine(ii ,jj-1,kk ,m) + fine(ii ,jj+1,kk ,m) + + fine(ii ,jj ,kk-1,m) + fine(ii ,jj ,kk+1,m) + ) + + 0.25_rt * ( + fine(ii-1,jj-1,kk ,m) + fine(ii+1,jj-1,kk ,m) + + fine(ii-1,jj+1,kk ,m) + fine(ii+1,jj+1,kk ,m) + + fine(ii-1,jj ,kk-1,m) + fine(ii+1,jj ,kk-1,m) + + fine(ii-1,jj ,kk+1,m) + fine(ii+1,jj ,kk+1,m) + + fine(ii ,jj-1,kk-1,m) + fine(ii ,jj+1,kk-1,m) + + fine(ii ,jj-1,kk+1,m) + fine(ii ,jj+1,kk+1,m) + ) + + 0.125_rt * ( + fine(ii-1,jj-1,kk-1,m) + fine(ii-1,jj-1,kk+1,m) + + fine(ii-1,jj+1,kk-1,m) + fine(ii-1,jj+1,kk+1,m) + + fine(ii+1,jj-1,kk-1,m) + fine(ii+1,jj-1,kk+1,m) + + fine(ii+1,jj+1,kk-1,m) + fine(ii+1,jj+1,kk+1,m) + ) + ); +#endif + } + } +private: + amrex::Array4< amrex::Real const > m_fine; + amrex::Array4< amrex::Real > m_coarse; + int m_refinement_ratio; + int m_number_of_components; +}; + +#endif // INTERPOLATE_DENSITY_FINE_TO_COARSE_H diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index c5a8877d3..52df3dc25 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -3,10 +3,12 @@ #include <WarpX_f.H> #include <WarpXSumGuardCells.H> #include <Parallelization/InterpolateCurrentFineToCoarse.H> +#include <Parallelization/InterpolateDensityFineToCoarse.H> #include <algorithm> #include <cstdlib> + using namespace amrex; void @@ -505,7 +507,7 @@ WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > std::array< amrex::MultiFab *, 3 > const & coarse, int const refinement_ratio) { - BL_PROFILE("InterpolateCurrentFineToCoarse()"); + BL_PROFILE("interpolateCurrentFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch @@ -537,6 +539,8 @@ WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > void WarpX::SyncRho () { + BL_PROFILE("SyncRho()"); + if (!rho_fp[0]) return; const int ncomp = rho_fp[0]->nComp(); @@ -546,7 +550,7 @@ WarpX::SyncRho () { rho_cp[lev]->setVal(0.0); const IntVect& refinement_ratio = refRatio(lev-1); - SyncRho(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); + interpolateDensityFineToCoarse(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); } // For each level @@ -559,29 +563,26 @@ WarpX::SyncRho () } void -WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio) +WarpX::interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio) { + BL_PROFILE("interpolateDensityFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); - const IntVect& ng = (fine.nGrowVect()+1)/refinement_ratio; + const IntVect& ng = (fine.nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch const int nc = fine.nComp(); #ifdef _OPEMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { - FArrayBox ffab; - for (MFIter mfi(crse,true); mfi.isValid(); ++mfi) + // OMP in-box decomposition of coarse into tilebox + for (MFIter mfi(coarse, TilingIfNotGPU()); mfi.isValid(); ++mfi) { - const Box& bx = mfi.growntilebox(ng); - Box fbx = amrex::grow(amrex::refine(bx,refinement_ratio),1); - ffab.resize(fbx, nc); - fbx &= fine[mfi].box(); - ffab.setVal(0.0); - ffab.copy(fine[mfi], fbx, 0, fbx, 0, nc); - WRPX_SYNC_RHO(bx.loVect(), bx.hiVect(), - BL_TO_FORTRAN_ANYD(crse[mfi]), - BL_TO_FORTRAN_ANYD(ffab), - &nc); + const Box& bx = mfi.growntilebox(ng); // only grow to outer directions of tileboxes for filling guards + + amrex::ParallelFor( + bx, + InterpolateDensityFineToCoarse(fine.const_array(mfi), coarse.array(mfi), refinement_ratio, nc) + ); } } } @@ -713,7 +714,7 @@ WarpX::RestrictRhoFromFineToCoarsePatch (int lev) if (rho_fp[lev]) { rho_cp[lev]->setVal(0.0); const IntVect& refinement_ratio = refRatio(lev-1); - SyncRho(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); + interpolateDensityFineToCoarse(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); } } |