From 15658267709e5a53a11ef6f945d1bfb283f994a6 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Tue, 24 Sep 2019 16:25:49 -0700 Subject: Current Synchronize: Port to GPU Port the current synchronize functions to GPU. --- Source/Parallelization/WarpXComm.cpp | 44 ++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 24 deletions(-) (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 990d0f988..22fa4820e 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -348,36 +349,34 @@ WarpX::SyncCurrent () } } -/** \brief Fills the values of the current on the coarse patch by - * averaging the values of the current of the fine patch (on the same level). - */ void WarpX::SyncCurrent (const std::array& fine, - const std::array< amrex::MultiFab*,3>& crse, - int refinement_ratio) + const std::array< amrex::MultiFab*,3>& coarse, + int const refinement_ratio) { BL_ASSERT(refinement_ratio == 2); - const IntVect& ng = (fine[0]->nGrowVect() + 1) /refinement_ratio; + const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch #ifdef _OPEMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { - FArrayBox ffab; - for (int idim = 0; idim < 3; ++idim) + FArrayBox ffab; // contiguous, temporary, copy of the tiled fine patch to read from + for (int idim = 0; idim < fine.size(); ++idim) // j-field components { - for (MFIter mfi(*crse[idim],true); mfi.isValid(); ++mfi) + for (MFIter mfi(*coarse[idim],true); mfi.isValid(); ++mfi) // OMP in-box decomposition of coarse into tilebox { - const Box& bx = mfi.growntilebox(ng); - Box fbx = amrex::grow(amrex::refine(bx,refinement_ratio),1); - ffab.resize(fbx); - fbx &= (*fine[idim])[mfi].box(); - ffab.setVal(0.0); - ffab.copy((*fine[idim])[mfi], fbx, 0, fbx, 0, fine[idim]->nComp()); - WRPX_SYNC_CURRENT(bx.loVect(), bx.hiVect(), - BL_TO_FORTRAN_ANYD((*crse[idim])[mfi]), - BL_TO_FORTRAN_ANYD(ffab), - &idim); + const Box& bx = mfi.growntilebox(ng); // only grow to outer directions of tileboxes for filling guards + + auto const & arrFine = fine[idim]->const_array(mfi); + auto const & arrCoarse = coarse[idim]->array(mfi); + + if( idim == 0 ) + amrex::ParallelFor( bx, WarpxSyncCurrent<0>(arrFine, arrCoarse, refinement_ratio) ); + else if( idim == 1 ) + amrex::ParallelFor( bx, WarpxSyncCurrent<1>(arrFine, arrCoarse, refinement_ratio) ); + else if( idim == 2 ) + amrex::ParallelFor( bx, WarpxSyncCurrent<2>(arrFine, arrCoarse, refinement_ratio) ); } } } @@ -407,9 +406,6 @@ WarpX::SyncRho () } } -/** \brief Fills the values of the charge density on the coarse patch by - * averaging the values of the charge density of the fine patch (on the same level). - */ void WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio) { @@ -418,7 +414,7 @@ WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio) const int nc = fine.nComp(); #ifdef _OPEMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { FArrayBox ffab; -- cgit v1.2.3 From d67030eb709d965672a9900132d0085922335cc1 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Wed, 25 Sep 2019 09:54:52 -0700 Subject: SyncCurrent: Disable Tiling for GPU Co-authored-by: Weiqun Zhang --- Source/Parallelization/WarpXComm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 22fa4820e..40f8203a9 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -361,10 +361,10 @@ WarpX::SyncCurrent (const std::array& fine, #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { - FArrayBox ffab; // contiguous, temporary, copy of the tiled fine patch to read from for (int idim = 0; idim < fine.size(); ++idim) // j-field components { - for (MFIter mfi(*coarse[idim],true); mfi.isValid(); ++mfi) // OMP in-box decomposition of coarse into tilebox + // OMP in-box decomposition of coarse into tilebox + for (MFIter mfi(*coarse[idim], TilingIfNotGPU()); mfi.isValid(); ++mfi) { const Box& bx = mfi.growntilebox(ng); // only grow to outer directions of tileboxes for filling guards -- cgit v1.2.3 From c9577f8d200d99c40d15d5ff0d2fdacbddc2026f Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Fri, 27 Sep 2019 11:14:00 -0700 Subject: Rename, Profile & Remove TODO --- Source/Parallelization/CurrentSynchronize.H | 175 --------------------- .../InterpolateCurrentFineToCoarse.H | 175 +++++++++++++++++++++ Source/Parallelization/WarpXComm.cpp | 19 +-- Source/WarpX.H | 6 +- 4 files changed, 188 insertions(+), 187 deletions(-) delete mode 100644 Source/Parallelization/CurrentSynchronize.H create mode 100644 Source/Parallelization/InterpolateCurrentFineToCoarse.H (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/CurrentSynchronize.H b/Source/Parallelization/CurrentSynchronize.H deleted file mode 100644 index 5329ca242..000000000 --- a/Source/Parallelization/CurrentSynchronize.H +++ /dev/null @@ -1,175 +0,0 @@ -/* Copyright 2019 Axel Huebl, Weiqun Zhang - * - * This file is part of WarpX. - * - * License: BSD-3-Clause-LBNL - */ - -#ifndef WARPX_CURRENTSYNCHRONIZE_H -#define WARPX_CURRENTSYNCHRONIZE_H - -#include -#include -#include -#include -#include - -#include // std::move - - -/** Fill a current coarse patch with averaged values from a fine patch - * - * Fills the values of the current for a selected component on the coarse patch - * by averaging the values of the current of the fine patch. - * - * @tparam IDim j-field component on which the averaging is performed - */ -template< int IDim > -class WarpxSyncCurrent -{ -public: - /** Construct with fine and coarse patch and their refinement ratio - * - * @param[in] fine read-only fine patch - * @param[in,out] coarse overwritten coarse patch - * @param[in] refinement_ratio ratio between coarse and fine patch granularity - * (currently, only a value of is implemented) - */ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - WarpxSyncCurrent( - amrex::Array4 const fine, - amrex::Array4 const coarse, - int const refinement_ratio - ) : m_fine(std::move(fine)), - m_coarse(std::move(coarse)), - m_refinement_ratio(std::move(refinement_ratio)) - { - //! @note constants and stencils in operator() implementation assume 2x refinement - BL_ASSERT(refinement_ratio == 2); - } - - AMREX_GPU_DEVICE AMREX_FORCE_INLINE - void - operator()( - int const i, - int const j, - int const k - ) const noexcept // TODO rename to jkl - { - auto const & fine_unsafe = m_fine; // out-of-bounds access not secured with zero-values yet - auto const & coarse = m_coarse; // out-of-bounds access not secured but will also not occur - - // return zero for out-of-bounds accesses during interpolation - // this is efficiently used as a method to add neutral elements beyond guards in the average below - auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l) noexcept - { - return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l) : amrex::Real{0.}; - }; - - int const ii = i * m_refinement_ratio; - int const jj = j * m_refinement_ratio; - int const kk = k * m_refinement_ratio; -#if AMREX_SPACEDIM == 2 - if (IDim == 0) { - coarse(i, j, k) = 0.25 * ( - fine(ii, jj, kk) + fine(ii + 1, jj, kk) + - 0.5 * ( - fine(ii, jj - 1, kk) + fine(ii + 1, jj - 1, kk) + - fine(ii, jj + 1, kk) + fine(ii + 1, jj + 1, kk) - ) - ); - } else if (IDim == 2) { - coarse(i, j, k) = 0.25 * ( - fine(ii, jj, kk) + fine(ii, jj + 1, kk) + - 0.5 * ( - fine(ii - 1, jj, kk) + fine(ii - 1, jj + 1, kk) + - fine(ii + 1, jj, kk) + fine(ii + 1, jj + 1, kk) - ) - ); - } else { - coarse(i, j, k) = 0.25 * ( - fine(ii, jj, kk) + - 0.5 * ( - fine(ii - 1, jj , kk) + fine(ii + 1, jj , kk) + - fine(ii , jj - 1, kk) + fine(ii , jj + 1, kk) - ) + - 0.25 * ( - fine(ii - 1, jj - 1, kk) + fine(ii + 1, jj - 1, kk) + - fine(ii - 1, jj + 1, kk) + fine(ii + 1, jj + 1, kk) - ) - ); - } -#elif AMREX_SPACEDIM == 3 - if (IDim == 0) { - coarse(i,j,k) = 0.125 * ( - fine(ii , jj, kk) + - 0.5 * ( - fine(ii , jj-1, kk ) + fine(ii , jj+1, kk ) + - fine(ii , jj , kk-1) + fine(ii , jj , kk+1) - ) + - 0.25 * ( - fine(ii , jj-1, kk-1) + fine(ii , jj+1, kk-1) + - fine(ii , jj-1, kk+1) + fine(ii , jj+1, kk+1) - ) + - fine(ii+1, jj, kk) + - 0.5 * ( - fine(ii+1, jj-1, kk ) + fine(ii+1, jj+1, kk ) + - fine(ii+1, jj , kk-1) + fine(ii+1, jj , kk+1) - ) + - 0.25 * ( - fine(ii+1, jj-1, kk-1) + fine(ii+1, jj+1, kk-1) + - fine(ii+1, jj-1, kk+1) + fine(ii+1, jj+1, kk+1) - ) - ); - } else if (IDim == 1) { - coarse(i, j, k) = 0.125 * ( - fine(ii, jj , kk) + - 0.5 * ( - fine(ii-1, jj , kk ) + fine(ii+1, jj , kk ) + - fine(ii , jj , kk-1) + fine(ii , jj , kk+1) - ) + - 0.25 * ( - fine(ii-1, jj , kk-1) + fine(ii+1, jj , kk-1) + - fine(ii-1, jj , kk+1) + fine(ii+1, jj , kk+1) - ) + - fine(ii, jj+1, kk) + - 0.5 * ( - fine(ii-1, jj+1, kk ) + fine(ii+1, jj+1, kk ) + - fine(ii , jj+1, kk-1) + fine(ii , jj+1, kk+1) - ) + - 0.25 * ( - fine(ii-1, jj+1, kk-1) + fine(ii+1, jj+1, kk-1) + - fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1) - ) - ); - } else { - coarse(i, j, k) = 0.125 * ( - fine(ii, jj, kk ) + - 0.5 * ( - fine(ii-1, jj , kk ) + fine(ii+1, jj , kk ) + - fine(ii , jj-1, kk ) + fine(ii , jj+1, kk ) - ) + - 0.25 * ( - fine(ii-1, jj-1, kk ) + fine(ii+1, jj-1, kk ) + - fine(ii-1, jj+1, kk ) + fine(ii+1, jj+1, kk ) - ) + - fine(ii, jj, kk+1) + - 0.5 * ( - fine(ii-1, jj , kk+1) + fine(ii+1, jj , kk+1) + - fine(ii , jj-1, kk+1) + fine(ii , jj+1, kk+1) - ) + - 0.25 * ( - fine(ii-1, jj-1, kk+1) + fine(ii+1, jj-1, kk+1) + - fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1) - ) - ); - } -#endif - } -private: - amrex::Array4< amrex::Real const > m_fine; - amrex::Array4< amrex::Real > m_coarse; - int m_refinement_ratio; -}; - -#endif //WARPX_CURRENTSYNCHRONIZE_H diff --git a/Source/Parallelization/InterpolateCurrentFineToCoarse.H b/Source/Parallelization/InterpolateCurrentFineToCoarse.H new file mode 100644 index 000000000..148b725d0 --- /dev/null +++ b/Source/Parallelization/InterpolateCurrentFineToCoarse.H @@ -0,0 +1,175 @@ +/* Copyright 2019 Axel Huebl, Weiqun Zhang + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef INTERPOLATECURRENTFINETOCOARSE_H +#define INTERPOLATECURRENTFINETOCOARSE_H + +#include +#include +#include +#include +#include + +#include // std::move + + +/** Fill a current coarse patch with averaged values from a fine patch + * + * Fills the values of the current for a selected component on the coarse patch + * by averaging the values of the current of the fine patch. + * + * @tparam IDim j-field component on which the averaging is performed + */ +template< int IDim > +class InterpolateCurrentFineToCoarse +{ +public: + /** Construct with fine and coarse patch and their refinement ratio + * + * @param[in] fine read-only fine patch + * @param[in,out] coarse overwritten coarse patch + * @param[in] refinement_ratio ratio between coarse and fine patch granularity + * (currently, only a value of is implemented) + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + InterpolateCurrentFineToCoarse( + amrex::Array4< amrex::Real const > const fine, + amrex::Array4< amrex::Real > const coarse, + int const refinement_ratio + ) : m_fine(std::move(fine)), + m_coarse(std::move(coarse)), + m_refinement_ratio(std::move(refinement_ratio)) + { + //! @note constants and stencils in operator() implementation assume 2x refinement + BL_ASSERT(refinement_ratio == 2); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + void + operator()( + int const i, + int const j, + int const k + ) const noexcept + { + auto const & fine_unsafe = m_fine; // out-of-bounds access not secured with zero-values yet + auto const & coarse = m_coarse; // out-of-bounds access not secured but will also not occur + + // return zero for out-of-bounds accesses during interpolation + // this is efficiently used as a method to add neutral elements beyond guards in the average below + auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l) noexcept + { + return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l) : amrex::Real{0.}; + }; + + int const ii = i * m_refinement_ratio; + int const jj = j * m_refinement_ratio; + int const kk = k * m_refinement_ratio; +#if AMREX_SPACEDIM == 2 + if (IDim == 0) { + coarse(i, j, k) = 0.25 * ( + fine(ii, jj, kk) + fine(ii + 1, jj, kk) + + 0.5 * ( + fine(ii, jj - 1, kk) + fine(ii + 1, jj - 1, kk) + + fine(ii, jj + 1, kk) + fine(ii + 1, jj + 1, kk) + ) + ); + } else if (IDim == 2) { + coarse(i, j, k) = 0.25 * ( + fine(ii, jj, kk) + fine(ii, jj + 1, kk) + + 0.5 * ( + fine(ii - 1, jj, kk) + fine(ii - 1, jj + 1, kk) + + fine(ii + 1, jj, kk) + fine(ii + 1, jj + 1, kk) + ) + ); + } else { + coarse(i, j, k) = 0.25 * ( + fine(ii, jj, kk) + + 0.5 * ( + fine(ii - 1, jj , kk) + fine(ii + 1, jj , kk) + + fine(ii , jj - 1, kk) + fine(ii , jj + 1, kk) + ) + + 0.25 * ( + fine(ii - 1, jj - 1, kk) + fine(ii + 1, jj - 1, kk) + + fine(ii - 1, jj + 1, kk) + fine(ii + 1, jj + 1, kk) + ) + ); + } +#elif AMREX_SPACEDIM == 3 + if (IDim == 0) { + coarse(i,j,k) = 0.125 * ( + fine(ii , jj, kk) + + 0.5 * ( + fine(ii , jj-1, kk ) + fine(ii , jj+1, kk ) + + fine(ii , jj , kk-1) + fine(ii , jj , kk+1) + ) + + 0.25 * ( + fine(ii , jj-1, kk-1) + fine(ii , jj+1, kk-1) + + fine(ii , jj-1, kk+1) + fine(ii , jj+1, kk+1) + ) + + fine(ii+1, jj, kk) + + 0.5 * ( + fine(ii+1, jj-1, kk ) + fine(ii+1, jj+1, kk ) + + fine(ii+1, jj , kk-1) + fine(ii+1, jj , kk+1) + ) + + 0.25 * ( + fine(ii+1, jj-1, kk-1) + fine(ii+1, jj+1, kk-1) + + fine(ii+1, jj-1, kk+1) + fine(ii+1, jj+1, kk+1) + ) + ); + } else if (IDim == 1) { + coarse(i, j, k) = 0.125 * ( + fine(ii, jj , kk) + + 0.5 * ( + fine(ii-1, jj , kk ) + fine(ii+1, jj , kk ) + + fine(ii , jj , kk-1) + fine(ii , jj , kk+1) + ) + + 0.25 * ( + fine(ii-1, jj , kk-1) + fine(ii+1, jj , kk-1) + + fine(ii-1, jj , kk+1) + fine(ii+1, jj , kk+1) + ) + + fine(ii, jj+1, kk) + + 0.5 * ( + fine(ii-1, jj+1, kk ) + fine(ii+1, jj+1, kk ) + + fine(ii , jj+1, kk-1) + fine(ii , jj+1, kk+1) + ) + + 0.25 * ( + fine(ii-1, jj+1, kk-1) + fine(ii+1, jj+1, kk-1) + + fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1) + ) + ); + } else { + coarse(i, j, k) = 0.125 * ( + fine(ii, jj, kk ) + + 0.5 * ( + fine(ii-1, jj , kk ) + fine(ii+1, jj , kk ) + + fine(ii , jj-1, kk ) + fine(ii , jj+1, kk ) + ) + + 0.25 * ( + fine(ii-1, jj-1, kk ) + fine(ii+1, jj-1, kk ) + + fine(ii-1, jj+1, kk ) + fine(ii+1, jj+1, kk ) + ) + + fine(ii, jj, kk+1) + + 0.5 * ( + fine(ii-1, jj , kk+1) + fine(ii+1, jj , kk+1) + + fine(ii , jj-1, kk+1) + fine(ii , jj+1, kk+1) + ) + + 0.25 * ( + fine(ii-1, jj-1, kk+1) + fine(ii+1, jj-1, kk+1) + + fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1) + ) + ); + } +#endif + } +private: + amrex::Array4< amrex::Real const > m_fine; + amrex::Array4< amrex::Real > m_coarse; + int m_refinement_ratio; +}; + +#endif // INTERPOLATECURRENTFINETOCOARSE_H diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 40f8203a9..4f870e79c 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include @@ -337,7 +337,7 @@ WarpX::SyncCurrent () std::array< MultiFab*,3> crse { current_cp[lev][0].get(), current_cp[lev][1].get(), current_cp[lev][2].get() }; - SyncCurrent(fine, crse, refinement_ratio[0]); + interpolateCurrentFineToCoarse(fine, crse, refinement_ratio[0]); } // For each level @@ -350,10 +350,11 @@ WarpX::SyncCurrent () } void -WarpX::SyncCurrent (const std::array& fine, - const std::array< amrex::MultiFab*,3>& coarse, - int const refinement_ratio) +WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine, + std::array< amrex::MultiFab *, 3 > const & coarse, + int const refinement_ratio) { + BL_PROFILE("InterpolateCurrentFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch @@ -372,11 +373,11 @@ WarpX::SyncCurrent (const std::array& fine, auto const & arrCoarse = coarse[idim]->array(mfi); if( idim == 0 ) - amrex::ParallelFor( bx, WarpxSyncCurrent<0>(arrFine, arrCoarse, refinement_ratio) ); + amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<0>(arrFine, arrCoarse, refinement_ratio) ); else if( idim == 1 ) - amrex::ParallelFor( bx, WarpxSyncCurrent<1>(arrFine, arrCoarse, refinement_ratio) ); + amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<1>(arrFine, arrCoarse, refinement_ratio) ); else if( idim == 2 ) - amrex::ParallelFor( bx, WarpxSyncCurrent<2>(arrFine, arrCoarse, refinement_ratio) ); + amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<2>(arrFine, arrCoarse, refinement_ratio) ); } } } @@ -452,7 +453,7 @@ WarpX::RestrictCurrentFromFineToCoarsePatch (int lev) std::array< MultiFab*,3> crse { current_cp[lev][0].get(), current_cp[lev][1].get(), current_cp[lev][2].get() }; - SyncCurrent(fine, crse, refinement_ratio[0]); + interpolateCurrentFineToCoarse(fine, crse, refinement_ratio[0]); } void diff --git a/Source/WarpX.H b/Source/WarpX.H index 9b9fb045c..c59802427 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -428,9 +428,9 @@ private: * \param[out] coarse coarse patches to interpolate to * \param[in] refinement_ratio integer ratio between the two */ - void SyncCurrent (const std::array& fine, - const std::array< amrex::MultiFab*,3>& coarse, - int const refinement_ratio); + void interpolateCurrentFineToCoarse (std::array< amrex::MultiFab const *, 3 > const & fine, + std::array< amrex::MultiFab *, 3 > const & coarse, + int const refinement_ratio); /** \brief Fills the values of the charge density on the coarse patch by * averaging the values of the charge density of the fine patch (on the same level). -- cgit v1.2.3 From 69b0a98b3e8495eb2eca4269533516d66f3ff6f3 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 27 Sep 2019 11:41:49 -0700 Subject: nodal auxilary data --- Source/Parallelization/WarpXComm.cpp | 151 ++++++++++ Source/Parallelization/WarpXComm_K.H | 485 +++++++++++++++++++++++++++++++++ Source/Parallelization/WarpXRegrid.cpp | 3 +- Source/WarpX.H | 2 + Source/WarpX.cpp | 56 ++-- 5 files changed, 680 insertions(+), 17 deletions(-) (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 990d0f988..e67acec22 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -51,6 +51,157 @@ WarpX::UpdateAuxilaryData () { BL_PROFILE("UpdateAuxilaryData()"); + if (Bfield_aux[0][0]->is_nodal()) { + UpdateNodalAuxilaryData(); + } else { + UpdateStagAuxilaryData(); + } +} + +void +WarpX::UpdateNodalAuxilaryData () +{ + // For level 0, we only need to do the average. +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(*Bfield_aux[0][0]); mfi.isValid(); ++mfi) + { + Array4 const& bx_aux = Bfield_aux[0][0]->array(mfi); + Array4 const& by_aux = Bfield_aux[0][1]->array(mfi); + Array4 const& bz_aux = Bfield_aux[0][2]->array(mfi); + Array4 const& bx_fp = Bfield_fp[0][0]->const_array(mfi); + Array4 const& by_fp = Bfield_fp[0][1]->const_array(mfi); + Array4 const& bz_fp = Bfield_fp[0][2]->const_array(mfi); + + Array4 const& ex_aux = Efield_aux[0][0]->array(mfi); + Array4 const& ey_aux = Efield_aux[0][1]->array(mfi); + Array4 const& ez_aux = Efield_aux[0][2]->array(mfi); + Array4 const& ex_fp = Efield_fp[0][0]->const_array(mfi); + Array4 const& ey_fp = Efield_fp[0][1]->const_array(mfi); + Array4 const& ez_fp = Efield_fp[0][2]->const_array(mfi); + + const Box& bx = mfi.fabbox(); + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_nd_bfield_x(j,k,l, bx_aux, bx_fp); + warpx_interp_nd_bfield_y(j,k,l, by_aux, by_fp); + warpx_interp_nd_bfield_z(j,k,l, bz_aux, bz_fp); + warpx_interp_nd_efield_x(j,k,l, ex_aux, ex_fp); + warpx_interp_nd_efield_y(j,k,l, ey_aux, ey_fp); + warpx_interp_nd_efield_z(j,k,l, ez_aux, ez_fp); + }); + } + + for (int lev = 1; lev <= finest_level; ++lev) + { + BoxArray const& nba = Bfield_aux[lev][0]->boxArray(); + BoxArray const& cnba = amrex::coarsen(nba,2); + DistributionMapping const& dm = Bfield_aux[lev][0]->DistributionMap(); + auto const& cperiod = Geom(lev-1).periodicity(); + + // Bfield + { + Array,3> Btmp; + if (Bfield_cax[lev][0]) { + for (int i = 0; i < 3; ++i) { + Btmp[i].reset(new MultiFab(*Bfield_cax[lev][i], amrex::make_alias, 0, 1)); + } + } else { + IntVect ngtmp = Bfield_aux[lev-1][0]->nGrowVect(); + for (int i = 0; i < 3; ++i) { + Btmp[i].reset(new MultiFab(cnba, dm, 1, ngtmp)); + } + } + // ParallelCopy from coarse level + for (int i = 0; i < 3; ++i) { + IntVect ng = Btmp[i]->nGrowVect(); + Btmp[i]->ParallelCopy(*Bfield_aux[lev-1][i], 0, 0, 1, ng, ng, cperiod); + } + +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(*Bfield_aux[lev][0]); mfi.isValid(); ++mfi) + { + Array4 const& bx_aux = Bfield_aux[lev][0]->array(mfi); + Array4 const& by_aux = Bfield_aux[lev][1]->array(mfi); + Array4 const& bz_aux = Bfield_aux[lev][2]->array(mfi); + Array4 const& bx_fp = Bfield_fp[lev][0]->const_array(mfi); + Array4 const& by_fp = Bfield_fp[lev][1]->const_array(mfi); + Array4 const& bz_fp = Bfield_fp[lev][2]->const_array(mfi); + Array4 const& bx_cp = Bfield_cp[lev][0]->const_array(mfi); + Array4 const& by_cp = Bfield_cp[lev][1]->const_array(mfi); + Array4 const& bz_cp = Bfield_cp[lev][2]->const_array(mfi); + Array4 const& bx_c = Btmp[0]->const_array(mfi); + Array4 const& by_c = Btmp[1]->const_array(mfi); + Array4 const& bz_c = Btmp[2]->const_array(mfi); + + const Box& bx = mfi.fabbox(); + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_nd_bfield_x(j,k,l, bx_aux, bx_fp, bx_cp, bx_c); + warpx_interp_nd_bfield_y(j,k,l, by_aux, by_fp, by_cp, by_c); + warpx_interp_nd_bfield_z(j,k,l, bz_aux, bz_fp, bz_cp, bz_c); + }); + } + } + + // Efield + { + Array,3> Etmp; + if (Efield_cax[lev][0]) { + for (int i = 0; i < 3; ++i) { + Etmp[i].reset(new MultiFab(*Efield_cax[lev][i], amrex::make_alias, 0, 1)); + } + } else { + IntVect ngtmp = Efield_aux[lev-1][0]->nGrowVect(); + for (int i = 0; i < 3; ++i) { + Etmp[i].reset(new MultiFab(cnba, dm, 1, ngtmp)); + } + } + // ParallelCopy from coarse level + for (int i = 0; i < 3; ++i) { + IntVect ng = Etmp[i]->nGrowVect(); + Etmp[i]->ParallelCopy(*Efield_aux[lev-1][i], 0, 0, 1, ng, ng, cperiod); + } + +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi) + { + Array4 const& ex_aux = Efield_aux[lev][0]->array(mfi); + Array4 const& ey_aux = Efield_aux[lev][1]->array(mfi); + Array4 const& ez_aux = Efield_aux[lev][2]->array(mfi); + Array4 const& ex_fp = Efield_fp[lev][0]->const_array(mfi); + Array4 const& ey_fp = Efield_fp[lev][1]->const_array(mfi); + Array4 const& ez_fp = Efield_fp[lev][2]->const_array(mfi); + Array4 const& ex_cp = Efield_cp[lev][0]->const_array(mfi); + Array4 const& ey_cp = Efield_cp[lev][1]->const_array(mfi); + Array4 const& ez_cp = Efield_cp[lev][2]->const_array(mfi); + Array4 const& ex_c = Etmp[0]->const_array(mfi); + Array4 const& ey_c = Etmp[1]->const_array(mfi); + Array4 const& ez_c = Etmp[2]->const_array(mfi); + + const Box& bx = mfi.fabbox(); + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_nd_efield_x(j,k,l, ex_aux, ex_fp, ex_cp, ex_c); + warpx_interp_nd_efield_y(j,k,l, ey_aux, ey_fp, ey_cp, ey_c); + warpx_interp_nd_efield_z(j,k,l, ez_aux, ez_fp, ez_cp, ez_c); + }); + } + } + } +} + +void +WarpX::UpdateStagAuxilaryData () +{ for (int lev = 1; lev <= finest_level; ++lev) { const auto& crse_period = Geom(lev-1).periodicity(); diff --git a/Source/Parallelization/WarpXComm_K.H b/Source/Parallelization/WarpXComm_K.H index 093323ec3..5da867c9f 100644 --- a/Source/Parallelization/WarpXComm_K.H +++ b/Source/Parallelization/WarpXComm_K.H @@ -158,4 +158,489 @@ void warpx_interp_efield_z (int j, int k, int l, #endif } +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_bfield_x (int j, int k, int l, + amrex::Array4 const& Bxa, + amrex::Array4 const& Bxf, + amrex::Array4 const& Bxc, + amrex::Array4 const& Bxg) +{ + using namespace amrex; + + int jg = amrex::coarsen(j,2); + Real wx = (j == jg*2) ? 0.0 : 0.5; + Real owx = 1.0-wx; + + int kg = amrex::coarsen(k,2); + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + +#if (AMREX_SPACEDIM == 2) + + // interp from coarse nodal to fine nodal + Real bg = owx * owy * Bxg(jg ,kg ,0) + + owx * wy * Bxg(jg ,kg+1,0) + + wx * owy * Bxg(jg+1,kg ,0) + + wx * wy * Bxg(jg+1,kg+1,0); + + // interp from coarse staggered to fine nodal + wy = 0.5-wy; owy = 1.0-wy; + Real bc = owx * owy * Bxc(jg ,kg ,0) + + owx * wy * Bxc(jg ,kg-1,0) + + wx * owy * Bxc(jg+1,kg ,0) + + wx * wy * Bxc(jg+1,kg-1,0); + + // interp from fine staggered to fine nodal + Real bf = 0.5*(Bxf(j,k-1,0) + Bxf(j,k,0)); + +#else + + int lg = amrex::coarsen(l,2); + Real wz = (l == lg*2) ? 0.0 : 0.5; + Real owz = 1.0-wz; + + // interp from coarse nodal to fine nodal + Real bg = owx * owy * owz * Bxg(jg ,kg ,lg ) + + wx * owy * owz * Bxg(jg+1,kg ,lg ) + + owx * wy * owz * Bxg(jg ,kg+1,lg ) + + wx * wy * owz * Bxg(jg+1,kg+1,lg ) + + owx * owy * wz * Bxg(jg ,kg ,lg+1) + + wx * owy * wz * Bxg(jg+1,kg ,lg+1) + + owx * wy * wz * Bxg(jg ,kg+1,lg+1) + + wx * wy * wz * Bxg(jg+1,kg+1,lg+1); + + // interp from coarse staggered to fine nodal + wy = 0.5-wy; owy = 1.0-wy; + wz = 0.5-wz; owz = 1.0-wz; + Real bc = owx * owy * owz * Bxc(jg ,kg ,lg ) + + wx * owy * owz * Bxc(jg+1,kg ,lg ) + + owx * wy * owz * Bxc(jg ,kg-1,lg ) + + wx * wy * owz * Bxc(jg+1,kg-1,lg ) + + owx * owy * wz * Bxc(jg ,kg ,lg-1) + + wx * owy * wz * Bxc(jg+1,kg ,lg-1) + + owx * wy * wz * Bxc(jg ,kg-1,lg-1) + + wx * wy * wz * Bxc(jg+1,kg-1,lg-1); + + // interp from fine stagged to fine nodal + Real bf = 0.25*(Bxf(j,k-1,l-1) + Bxf(j,k,l-1) + Bxf(j,k-1,l) + Bxf(j,k,l)); +#endif + + Bxa(j,k,l) = bg + (bf-bc); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_bfield_y (int j, int k, int l, + amrex::Array4 const& Bya, + amrex::Array4 const& Byf, + amrex::Array4 const& Byc, + amrex::Array4 const& Byg) +{ + using namespace amrex; + + int jg = amrex::coarsen(j,2); + Real wx = (j == jg*2) ? 0.0 : 0.5; + Real owx = 1.0-wx; + + int kg = amrex::coarsen(k,2); + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + +#if (AMREX_SPACEDIM == 2) + + // interp from coarse nodal to fine nodal + Real bg = owx * owy * Byg(jg ,kg ,0) + + owx * wy * Byg(jg ,kg+1,0) + + wx * owy * Byg(jg+1,kg ,0) + + wx * wy * Byg(jg+1,kg+1,0); + + // interp from coarse stagged (cell-centered for By) to fine nodal + wx = 0.5-wx; owx = 1.0-wx; + wy = 0.5-wy; owy = 1.0-wy; + Real bc = owx * owy * Byc(jg ,kg ,0) + + owx * wy * Byc(jg ,kg-1,0) + + wx * owy * Byc(jg-1,kg ,0) + + wx * wy * Byc(jg-1,kg-1,0); + + // interp form fine stagger (cell-centered for By) to fine nodal + Real bf = 0.25*(Byf(j,k,0) + Byf(j-1,k,0) + Byf(j,k-1,0) + Byf(j-1,k-1,0)); + +#else + + int lg = amrex::coarsen(l,2); + Real wz = (l == lg*2) ? 0.0 : 0.5; + Real owz = 1.0-wz; + + // interp from coarse nodal to fine nodal + Real bg = owx * owy * owz * Byg(jg ,kg ,lg ) + + wx * owy * owz * Byg(jg+1,kg ,lg ) + + owx * wy * owz * Byg(jg ,kg+1,lg ) + + wx * wy * owz * Byg(jg+1,kg+1,lg ) + + owx * owy * wz * Byg(jg ,kg ,lg+1) + + wx * owy * wz * Byg(jg+1,kg ,lg+1) + + owx * wy * wz * Byg(jg ,kg+1,lg+1) + + wx * wy * wz * Byg(jg+1,kg+1,lg+1); + + // interp from coarse staggered to fine nodal + wx = 0.5-wx; owx = 1.0-wx; + wz = 0.5-wz; owz = 1.0-wz; + Real bc = owx * owy * owz * Byc(jg ,kg ,lg ) + + wx * owy * owz * Byc(jg-1,kg ,lg ) + + owx * wy * owz * Byc(jg ,kg+1,lg ) + + wx * wy * owz * Byc(jg-1,kg+1,lg ) + + owx * owy * wz * Byc(jg ,kg ,lg-1) + + wx * owy * wz * Byc(jg-1,kg ,lg-1) + + owx * wy * wz * Byc(jg ,kg+1,lg-1) + + wx * wy * wz * Byc(jg-1,kg+1,lg-1); + + // interp from fine stagged to fine nodal + Real bf = 0.25*(Byf(j-1,k,l-1) + Byf(j,k,l-1) + Byf(j-1,k,l) + Byf(j,k,l)); + +#endif + + Bya(j,k,l) = bg + (bf-bc); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_bfield_z (int j, int k, int l, + amrex::Array4 const& Bza, + amrex::Array4 const& Bzf, + amrex::Array4 const& Bzc, + amrex::Array4 const& Bzg) +{ + using namespace amrex; + + int jg = amrex::coarsen(j,2); + Real wx = (j == jg*2) ? 0.0 : 0.5; + Real owx = 1.0-wx; + + int kg = amrex::coarsen(k,2); + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + +#if (AMREX_SPACEDIM == 2) + + // interp from coarse nodal to fine nodal + Real bg = owx * owy * Bzg(jg ,kg ,0) + + owx * wy * Bzg(jg ,kg+1,0) + + wx * owy * Bzg(jg+1,kg ,0) + + wx * wy * Bzg(jg+1,kg+1,0); + + // interp from coarse staggered to fine nodal + wx = 0.5-wx; owx = 1.0-wx; + Real bc = owx * owy * Bzc(jg ,kg ,0) + + owx * wy * Bzc(jg ,kg+1,0) + + wx * owy * Bzc(jg-1,kg ,0) + + wx * wy * Bzc(jg-1,kg+1,0); + + // interp from fine staggered to fine nodal + Real bf = 0.5*(Bzf(j-1,k,0) + Bzf(j,k,0)); + +#else + + int lg = amrex::coarsen(l,2); + Real wz = (l == lg*2) ? 0.0 : 0.5; + Real owz = 1.0-wz; + + // interp from coarse nodal to fine nodal + Real bg = owx * owy * owz * Bzg(jg ,kg ,lg ) + + wx * owy * owz * Bzg(jg+1,kg ,lg ) + + owx * wy * owz * Bzg(jg ,kg+1,lg ) + + wx * wy * owz * Bzg(jg+1,kg+1,lg ) + + owx * owy * wz * Bzg(jg ,kg ,lg+1) + + wx * owy * wz * Bzg(jg+1,kg ,lg+1) + + owx * wy * wz * Bzg(jg ,kg+1,lg+1) + + wx * wy * wz * Bzg(jg+1,kg+1,lg+1); + + // interp from coarse staggered to fine nodal + wx = 0.5-wx; owx = 1.0-wx; + wy = 0.5-wy; owy = 1.0-wy; + Real bc = owx * owy * owz * Bzc(jg ,kg ,lg ) + + wx * owy * owz * Bzc(jg-1,kg ,lg ) + + owx * wy * owz * Bzc(jg ,kg-1,lg ) + + wx * wy * owz * Bzc(jg-1,kg-1,lg ) + + owx * owy * wz * Bzc(jg ,kg ,lg+1) + + wx * owy * wz * Bzc(jg-1,kg ,lg+1) + + owx * wy * wz * Bzc(jg ,kg-1,lg+1) + + wx * wy * wz * Bzc(jg-1,kg-1,lg+1); + + // interp from fine stagged to fine nodal + Real bf = 0.25*(Bzf(j-1,k-1,l) + Bzf(j,k-1,l) + Bzf(j-1,k,l) + Bzf(j,k,l)); + +#endif + + Bza(j,k,l) = bg + (bf-bc); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_bfield_x (int j, int k, int l, + amrex::Array4 const& Bxa, + amrex::Array4 const& Bxf) +{ +#if (AMREX_SPACEDIM == 2) + Bxa(j,k,0) = 0.5*(Bxf(j,k-1,0) + Bxf(j,k,0)); +#else + Bxa(j,k,l) = 0.25*(Bxf(j,k-1,l-1) + Bxf(j,k,l-1) + Bxf(j,k-1,l) + Bxf(j,k,l)); +#endif +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_bfield_y (int j, int k, int l, + amrex::Array4 const& Bya, + amrex::Array4 const& Byf) +{ +#if (AMREX_SPACEDIM == 2) + Bya(j,k,0) = 0.25*(Byf(j,k,0) + Byf(j-1,k,0) + Byf(j,k-1,0) + Byf(j-1,k-1,0)); +#else + Bya(j,k,l) = 0.25*(Byf(j-1,k,l-1) + Byf(j,k,l-1) + Byf(j-1,k,l) + Byf(j,k,l)); +#endif +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_bfield_z (int j, int k, int l, + amrex::Array4 const& Bza, + amrex::Array4 const& Bzf) +{ +#if (AMREX_SPACEDIM == 2) + Bza(j,k,0) = 0.5*(Bzf(j-1,k,0) + Bzf(j,k,0)); +#else + Bza(j,k,l) = 0.25*(Bzf(j-1,k-1,l) + Bzf(j,k-1,l) + Bzf(j-1,k,l) + Bzf(j,k,l)); +#endif +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_efield_x (int j, int k, int l, + amrex::Array4 const& Exa, + amrex::Array4 const& Exf, + amrex::Array4 const& Exc, + amrex::Array4 const& Exg) +{ + using namespace amrex; + + int jg = amrex::coarsen(j,2); + Real wx = (j == jg*2) ? 0.0 : 0.5; + Real owx = 1.0-wx; + + int kg = amrex::coarsen(k,2); + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + +#if (AMREX_SPACEDIM == 2) + + // interp from coarse nodal to fine nodal + Real eg = owx * owy * Exg(jg ,kg ,0) + + owx * wy * Exg(jg ,kg+1,0) + + wx * owy * Exg(jg+1,kg ,0) + + wx * wy * Exg(jg+1,kg+1,0); + + // interp from coarse staggered to fine nodal + wx = 0.5-wx; owx = 1.0-wx; + Real ec = owx * owy * Exc(jg ,kg ,0) + + owx * wy * Exc(jg ,kg+1,0) + + wx * owy * Exc(jg-1,kg ,0) + + wx * wy * Exc(jg-1,kg+1,0); + + // interp from fine staggered to fine nodal + Real ef = 0.5*(Exf(j-1,k,0) + Exf(j,k,0)); + +#else + + int lg = amrex::coarsen(l,2); + Real wz = (l == lg*2) ? 0.0 : 0.5; + Real owz = 1.0-wz; + + // interp from coarse nodal to fine nodal + Real eg = owx * owy * owz * Exg(jg ,kg ,lg ) + + wx * owy * owz * Exg(jg+1,kg ,lg ) + + owx * wy * owz * Exg(jg ,kg+1,lg ) + + wx * wy * owz * Exg(jg+1,kg+1,lg ) + + owx * owy * wz * Exg(jg ,kg ,lg+1) + + wx * owy * wz * Exg(jg+1,kg ,lg+1) + + owx * wy * wz * Exg(jg ,kg+1,lg+1) + + wx * wy * wz * Exg(jg+1,kg+1,lg+1); + + // interp from coarse staggered to fine nodal + wx = 0.5-wx; owx = 1.0-wx; + Real ec = owx * owy * owz * Exc(jg ,kg ,lg ) + + wx * owy * owz * Exc(jg-1,kg ,lg ) + + owx * wy * owz * Exc(jg ,kg+1,lg ) + + wx * wy * owz * Exc(jg-1,kg+1,lg ) + + owx * owy * wz * Exc(jg ,kg ,lg+1) + + wx * owy * wz * Exc(jg-1,kg ,lg+1) + + owx * wy * wz * Exc(jg ,kg+1,lg+1) + + wx * wy * wz * Exc(jg-1,kg+1,lg+1); + + // interp from fine staggered to fine nodal + Real ef = 0.5*(Exf(j-1,k,l) + Exf(j,k,l)); + +#endif + + Exa(j,k,l) = eg + (ef-ec); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_efield_y (int j, int k, int l, + amrex::Array4 const& Eya, + amrex::Array4 const& Eyf, + amrex::Array4 const& Eyc, + amrex::Array4 const& Eyg) +{ + using namespace amrex; + + int jg = amrex::coarsen(j,2); + Real wx = (j == jg*2) ? 0.0 : 0.5; + Real owx = 1.0-wx; + + int kg = amrex::coarsen(k,2); + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + +#if (AMREX_SPACEDIM == 2) + + // interp from coarse nodal and coarse staggered to fine nodal + Real eg = owx * owy * (Eyg(jg ,kg ,0) + Eyc(jg ,kg ,0)) + + owx * wy * (Eyg(jg ,kg+1,0) + Eyc(jg ,kg+1,0)) + + wx * owy * (Eyg(jg+1,kg ,0) + Eyc(jg+1,kg ,0)) + + wx * wy * (Eyg(jg+1,kg+1,0) + Eyc(jg+1,kg+1,0)); + Real ec = 0.0; + + // interp from fine staggered to fine nodal + Real ef = Eyf(j,k,0); + +#else + + int lg = amrex::coarsen(l,2); + Real wz = (l == lg*2) ? 0.0 : 0.5; + Real owz = 1.0-wz; + + // interp from coarse nodal to fine nodal + Real eg = owx * owy * owz * Eyg(jg ,kg ,lg ) + + wx * owy * owz * Eyg(jg+1,kg ,lg ) + + owx * wy * owz * Eyg(jg ,kg+1,lg ) + + wx * wy * owz * Eyg(jg+1,kg+1,lg ) + + owx * owy * wz * Eyg(jg ,kg ,lg+1) + + wx * owy * wz * Eyg(jg+1,kg ,lg+1) + + owx * wy * wz * Eyg(jg ,kg+1,lg+1) + + wx * wy * wz * Eyg(jg+1,kg+1,lg+1); + + // interp from coarse staggered to fine nodal + wy = 0.5-wy; owy = 1.0-wy; + Real ec = owx * owy * owz * Eyc(jg ,kg ,lg ) + + wx * owy * owz * Eyc(jg+1,kg ,lg ) + + owx * wy * owz * Eyc(jg ,kg-1,lg ) + + wx * wy * owz * Eyc(jg+1,kg-1,lg ) + + owx * owy * wz * Eyc(jg ,kg ,lg+1) + + wx * owy * wz * Eyc(jg+1,kg ,lg+1) + + owx * wy * wz * Eyc(jg ,kg-1,lg+1) + + wx * wy * wz * Eyc(jg+1,kg-1,lg+1); + + // interp from fine staggered to fine nodal + Real ef = 0.5*(Eyf(j,k-1,l) + Eyf(j,k,l)); + +#endif + + Eya(j,k,l) = eg + (ef-ec); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_efield_z (int j, int k, int l, + amrex::Array4 const& Eza, + amrex::Array4 const& Ezf, + amrex::Array4 const& Ezc, + amrex::Array4 const& Ezg) +{ + using namespace amrex; + + int jg = amrex::coarsen(j,2); + Real wx = (j == jg*2) ? 0.0 : 0.5; + Real owx = 1.0-wx; + + int kg = amrex::coarsen(k,2); + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + +#if (AMREX_SPACEDIM == 2) + + // interp from coarse nodal to fine nodal + Real eg = owx * owy * Ezg(jg ,kg ,0) + + owx * wy * Ezg(jg ,kg+1,0) + + wx * owy * Ezg(jg+1,kg ,0) + + wx * wy * Ezg(jg+1,kg+1,0); + + // interp from coarse stagged to fine nodal + wy = 0.5-wy; owy = 1.0-wy; + Real ec = owx * owy * Ezc(jg ,kg ,0) + + owx * wy * Ezc(jg ,kg-1,0) + + wx * owy * Ezc(jg+1,kg ,0) + + wx * wy * Ezc(jg+1,kg-1,0); + + // interp from fine staggered to fine nodal + Real ef = 0.5*(Ezf(j,k-1,0) + Ezf(j,k,0)); + +#else + + int lg = amrex::coarsen(l,2); + Real wz = (l == lg*2) ? 0.0 : 0.5; + Real owz = 1.0-wz; + + // interp from coarse nodal to fine nodal + Real eg = owx * owy * owz * Ezg(jg ,kg ,lg ) + + wx * owy * owz * Ezg(jg+1,kg ,lg ) + + owx * wy * owz * Ezg(jg ,kg+1,lg ) + + wx * wy * owz * Ezg(jg+1,kg+1,lg ) + + owx * owy * wz * Ezg(jg ,kg ,lg+1) + + wx * owy * wz * Ezg(jg+1,kg ,lg+1) + + owx * wy * wz * Ezg(jg ,kg+1,lg+1) + + wx * wy * wz * Ezg(jg+1,kg+1,lg+1); + + // interp from coarse staggered to fine nodal + wz = 0.5-wz; owz = 1.0-wz; + Real ec = owx * owy * owz * Ezc(jg ,kg ,lg ) + + wx * owy * owz * Ezc(jg+1,kg ,lg ) + + owx * wy * owz * Ezc(jg ,kg+1,lg ) + + wx * wy * owz * Ezc(jg+1,kg+1,lg ) + + owx * owy * wz * Ezc(jg ,kg ,lg-1) + + wx * owy * wz * Ezc(jg+1,kg ,lg-1) + + owx * wy * wz * Ezc(jg ,kg+1,lg-1) + + wx * wy * wz * Ezc(jg+1,kg+1,lg-1); + + // interp from fine staggered to fine nodal + Real ef = 0.5*(Ezf(j,k,l-1) + Ezf(j,k,l)); + +#endif + + Eza(j,k,l) = eg + (ef-ec); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_efield_x (int j, int k, int l, + amrex::Array4 const& Exa, + amrex::Array4 const& Exf) +{ + Exa(j,k,l) = 0.5*(Exf(j-1,k,l) + Exf(j,k,l)); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_efield_y (int j, int k, int l, + amrex::Array4 const& Eya, + amrex::Array4 const& Eyf) +{ +#if (AMREX_SPACEDIM == 2) + Eya(j,k,0) = Eyf(j,k,0); +#else + Eya(j,k,l) = 0.5*(Eyf(j,k-1,l) + Eyf(j,k,l)); +#endif +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_efield_z (int j, int k, int l, + amrex::Array4 const& Eza, + amrex::Array4 const& Ezf) +{ +#if (AMREX_SPACEDIM == 2) + Eza(j,k,0) = 0.5*(Ezf(j,k-1,0) + Ezf(j,k,0)); +#else + Eza(j,k,l) = 0.5*(Ezf(j,k,l-1) + Ezf(j,k,l)); +#endif +} + #endif diff --git a/Source/Parallelization/WarpXRegrid.cpp b/Source/Parallelization/WarpXRegrid.cpp index 5441755f5..4ae2b6dca 100644 --- a/Source/Parallelization/WarpXRegrid.cpp +++ b/Source/Parallelization/WarpXRegrid.cpp @@ -1,5 +1,6 @@ #include +#include #include using namespace amrex; @@ -91,7 +92,7 @@ WarpX::RemakeLevel (int lev, Real time, const BoxArray& ba, const DistributionMa // Aux patch - if (lev == 0) + if (lev == 0 && field_gathering_algo != GatheringAlgo::MomentumConserving) { for (int idim = 0; idim < 3; ++idim) { Bfield_aux[lev][idim].reset(new MultiFab(*Bfield_fp[lev][idim], amrex::make_alias, 0, Bfield_aux[lev][idim]->nComp())); diff --git a/Source/WarpX.H b/Source/WarpX.H index 7a5344f4f..b8a88b77c 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -203,6 +203,8 @@ public: // This function does aux(lev) = fp(lev) + I(aux(lev-1)-cp(lev)). // Caller must make sure fp and cp have ghost cells filled. void UpdateAuxilaryData (); + void UpdateNodalAuxilaryData (); + void UpdateStagAuxilaryData (); // Fill boundary cells including coarse/fine boundaries void FillBoundaryB (); diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index d92354f2e..bac0d4121 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -792,16 +792,19 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm ncomps = n_rz_azimuthal_modes*2 - 1; #endif + bool is_nodal = (field_gathering_algo == GatheringAlgo::MomentumConserving); + IntVect ngextra(static_cast(is_nodal)); + // // The fine patch // - Bfield_fp[lev][0].reset( new MultiFab(amrex::convert(ba,Bx_nodal_flag),dm,ncomps,ngE)); - Bfield_fp[lev][1].reset( new MultiFab(amrex::convert(ba,By_nodal_flag),dm,ncomps,ngE)); - Bfield_fp[lev][2].reset( new MultiFab(amrex::convert(ba,Bz_nodal_flag),dm,ncomps,ngE)); + Bfield_fp[lev][0].reset( new MultiFab(amrex::convert(ba,Bx_nodal_flag),dm,ncomps,ngE+ngextra)); + Bfield_fp[lev][1].reset( new MultiFab(amrex::convert(ba,By_nodal_flag),dm,ncomps,ngE+ngextra)); + Bfield_fp[lev][2].reset( new MultiFab(amrex::convert(ba,Bz_nodal_flag),dm,ncomps,ngE+ngextra)); - Efield_fp[lev][0].reset( new MultiFab(amrex::convert(ba,Ex_nodal_flag),dm,ncomps,ngE)); - Efield_fp[lev][1].reset( new MultiFab(amrex::convert(ba,Ey_nodal_flag),dm,ncomps,ngE)); - Efield_fp[lev][2].reset( new MultiFab(amrex::convert(ba,Ez_nodal_flag),dm,ncomps,ngE)); + Efield_fp[lev][0].reset( new MultiFab(amrex::convert(ba,Ex_nodal_flag),dm,ncomps,ngE+ngextra)); + Efield_fp[lev][1].reset( new MultiFab(amrex::convert(ba,Ey_nodal_flag),dm,ncomps,ngE+ngextra)); + Efield_fp[lev][2].reset( new MultiFab(amrex::convert(ba,Ez_nodal_flag),dm,ncomps,ngE+ngextra)); current_fp[lev][0].reset( new MultiFab(amrex::convert(ba,jx_nodal_flag),dm,ncomps,ngJ)); current_fp[lev][1].reset( new MultiFab(amrex::convert(ba,jy_nodal_flag),dm,ncomps,ngJ)); @@ -848,7 +851,18 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm // // The Aux patch (i.e., the full solution) // - if (lev == 0) + if (is_nodal) + { + BoxArray const nba = amrex::convert(ba,IntVect::TheUnitVector()); + Bfield_aux[lev][0].reset( new MultiFab(nba,dm,ncomps,ngE)); + Bfield_aux[lev][1].reset( new MultiFab(nba,dm,ncomps,ngE)); + Bfield_aux[lev][2].reset( new MultiFab(nba,dm,ncomps,ngE)); + + Efield_aux[lev][0].reset( new MultiFab(nba,dm,ncomps,ngE)); + Efield_aux[lev][1].reset( new MultiFab(nba,dm,ncomps,ngE)); + Efield_aux[lev][2].reset( new MultiFab(nba,dm,ncomps,ngE)); + } + else if (lev == 0) { for (int idir = 0; idir < 3; ++idir) { Efield_aux[lev][idir].reset(new MultiFab(*Efield_fp[lev][idir], amrex::make_alias, 0, ncomps)); @@ -929,15 +943,25 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm cba.coarsen(refRatio(lev-1)); if (n_field_gather_buffer > 0 || mypc->nSpeciesGatherFromMainGrid() > 0) { - // Create the MultiFabs for B - Bfield_cax[lev][0].reset( new MultiFab(amrex::convert(cba,Bx_nodal_flag),dm,ncomps,ngE)); - Bfield_cax[lev][1].reset( new MultiFab(amrex::convert(cba,By_nodal_flag),dm,ncomps,ngE)); - Bfield_cax[lev][2].reset( new MultiFab(amrex::convert(cba,Bz_nodal_flag),dm,ncomps,ngE)); - - // Create the MultiFabs for E - Efield_cax[lev][0].reset( new MultiFab(amrex::convert(cba,Ex_nodal_flag),dm,ncomps,ngE)); - Efield_cax[lev][1].reset( new MultiFab(amrex::convert(cba,Ey_nodal_flag),dm,ncomps,ngE)); - Efield_cax[lev][2].reset( new MultiFab(amrex::convert(cba,Ez_nodal_flag),dm,ncomps,ngE)); + if (field_gathering_algo == GatheringAlgo::MomentumConserving) { + BoxArray const& cnba = amrex::convert(cba,IntVect::TheUnitVector()); + Bfield_cax[lev][0].reset( new MultiFab(cnba,dm,ncomps,ngE)); + Bfield_cax[lev][1].reset( new MultiFab(cnba,dm,ncomps,ngE)); + Bfield_cax[lev][2].reset( new MultiFab(cnba,dm,ncomps,ngE)); + Efield_cax[lev][0].reset( new MultiFab(cnba,dm,ncomps,ngE)); + Efield_cax[lev][1].reset( new MultiFab(cnba,dm,ncomps,ngE)); + Efield_cax[lev][2].reset( new MultiFab(cnba,dm,ncomps,ngE)); + } else { + // Create the MultiFabs for B + Bfield_cax[lev][0].reset( new MultiFab(amrex::convert(cba,Bx_nodal_flag),dm,ncomps,ngE)); + Bfield_cax[lev][1].reset( new MultiFab(amrex::convert(cba,By_nodal_flag),dm,ncomps,ngE)); + Bfield_cax[lev][2].reset( new MultiFab(amrex::convert(cba,Bz_nodal_flag),dm,ncomps,ngE)); + + // Create the MultiFabs for E + Efield_cax[lev][0].reset( new MultiFab(amrex::convert(cba,Ex_nodal_flag),dm,ncomps,ngE)); + Efield_cax[lev][1].reset( new MultiFab(amrex::convert(cba,Ey_nodal_flag),dm,ncomps,ngE)); + Efield_cax[lev][2].reset( new MultiFab(amrex::convert(cba,Ez_nodal_flag),dm,ncomps,ngE)); + } gather_buffer_masks[lev].reset( new iMultiFab(ba, dm, ncomps, 1) ); // Gather buffer masks have 1 ghost cell, because of the fact -- cgit v1.2.3 From 45b6e243030bb72816f79f55aa60f88f7aa7f7bc Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 26 Sep 2019 18:24:45 -0700 Subject: Rho Synchronize: Port to GPU Port the charge density synchronize functions to GPU. --- Source/FortranInterface/WarpX_f.F90 | 59 ---------- Source/FortranInterface/WarpX_f.H | 9 -- .../InterpolateDensityFineToCoarse.H | 120 +++++++++++++++++++++ Source/Parallelization/WarpXComm.cpp | 35 +++--- Source/WarpX.H | 8 +- 5 files changed, 143 insertions(+), 88 deletions(-) create mode 100644 Source/Parallelization/InterpolateDensityFineToCoarse.H (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/FortranInterface/WarpX_f.F90 b/Source/FortranInterface/WarpX_f.F90 index 2ddf9409d..762ed2cd8 100644 --- a/Source/FortranInterface/WarpX_f.F90 +++ b/Source/FortranInterface/WarpX_f.F90 @@ -51,65 +51,6 @@ contains end subroutine warpx_compute_E - subroutine warpx_sync_rho_2d (lo, hi, crse, clo, chi, fine, flo, fhi, nc) & - bind(c, name='warpx_sync_rho_2d') - integer, intent(in) :: lo(2), hi(2), flo(2), fhi(2), clo(2), chi(2), nc - real(amrex_real), intent(in ) :: fine(flo(1):fhi(1),flo(2):fhi(2),nc) - real(amrex_real), intent(inout) :: crse(clo(1):chi(1),clo(2):chi(2),nc) - - integer :: i,j,ii,jj,m - - do m = 1, nc - do j = lo(2), hi(2) - jj = j*2 - do i = lo(1), hi(1) - ii = i*2 - crse(i,j,m) = 0.25d0 * & - ( fine(ii,jj,m) + 0.5d0 *(fine(ii-1,jj ,m)+fine(ii+1,jj ,m) & - & + fine(ii ,jj-1,m)+fine(ii ,jj+1,m)) & - & + 0.25d0*(fine(ii-1,jj-1,m)+fine(ii+1,jj-1,m) & - & + fine(ii-1,jj+1,m)+fine(ii+1,jj+1,m)) ) - end do - end do - end do - end subroutine warpx_sync_rho_2d - - subroutine warpx_sync_rho_3d (lo, hi, crse, clo, chi, fine, flo, fhi, nc) & - bind(c, name='warpx_sync_rho_3d') - integer, intent(in) :: lo(3), hi(3), flo(3), fhi(3), clo(3), chi(3), nc - real(amrex_real), intent(in ) :: fine(flo(1):fhi(1),flo(2):fhi(2),flo(3):fhi(3),nc) - real(amrex_real), intent(inout) :: crse(clo(1):chi(1),clo(2):chi(2),clo(3):chi(3),nc) - - integer :: i,j,k,ii,jj,kk,m - - do m = 1, nc - do k = lo(3), hi(3) - kk = k*2 - do j = lo(2), hi(2) - jj = j*2 - do i = lo(1), hi(1) - ii = i*2 - crse(i,j,k,m) = 0.125d0 * & - (fine(ii,jj,kk,m) + 0.5d0 *(fine(ii-1,jj ,kk ,m)+fine(ii+1,jj ,kk ,m) & - & + fine(ii ,jj-1,kk ,m)+fine(ii ,jj+1,kk ,m) & - & + fine(ii ,jj ,kk-1,m)+fine(ii ,jj ,kk+1,m)) & - & + 0.25d0 *(fine(ii-1,jj-1,kk ,m)+fine(ii+1,jj-1,kk ,m) & - & + fine(ii-1,jj+1,kk ,m)+fine(ii+1,jj+1,kk ,m) & - & + fine(ii-1,jj ,kk-1,m)+fine(ii+1,jj ,kk-1,m) & - & + fine(ii-1,jj ,kk+1,m)+fine(ii+1,jj ,kk+1,m) & - & + fine(ii ,jj-1,kk-1,m)+fine(ii ,jj+1,kk-1,m) & - & + fine(ii ,jj-1,kk+1,m)+fine(ii ,jj+1,kk+1,m)) & - & + 0.125d0*(fine(ii-1,jj-1,kk-1,m)+fine(ii-1,jj-1,kk+1,m) & - & + fine(ii-1,jj+1,kk-1,m)+fine(ii-1,jj+1,kk+1,m) & - & + fine(ii+1,jj-1,kk-1,m)+fine(ii+1,jj-1,kk+1,m) & - & + fine(ii+1,jj+1,kk-1,m)+fine(ii+1,jj+1,kk+1,m))) - end do - end do - end do - end do - - end subroutine warpx_sync_rho_3d - subroutine warpx_build_buffer_masks (lo, hi, msk, mlo, mhi, gmsk, glo, ghi, ng) & bind(c, name='warpx_build_buffer_masks') integer, dimension(3), intent(in) :: lo, hi, mlo, mhi, glo, ghi diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H index 2d5d346e2..b51a83387 100644 --- a/Source/FortranInterface/WarpX_f.H +++ b/Source/FortranInterface/WarpX_f.H @@ -16,8 +16,6 @@ #if (AMREX_SPACEDIM == 3) -#define WRPX_SYNC_RHO warpx_sync_rho_3d - #define WRPX_SUM_FINE_TO_CRSE_NODAL warpx_sum_fine_to_crse_nodal_3d #define WRPX_ZERO_OUT_BNDRY warpx_zero_out_bndry_3d #define WRPX_BUILD_MASK warpx_build_mask_3d @@ -30,8 +28,6 @@ #elif (AMREX_SPACEDIM == 2) -#define WRPX_SYNC_RHO warpx_sync_rho_2d - #define WRPX_SUM_FINE_TO_CRSE_NODAL warpx_sum_fine_to_crse_nodal_2d #define WRPX_ZERO_OUT_BNDRY warpx_zero_out_bndry_2d #define WRPX_BUILD_MASK warpx_build_mask_2d @@ -229,11 +225,6 @@ extern "C" #endif const amrex::Real* sigbz, int sigbz_lo, int sigbz_hi); - void WRPX_SYNC_RHO (const int* lo, const int* hi, - BL_FORT_FAB_ARG_ANYD(crse), - const BL_FORT_FAB_ARG_ANYD(fine), - const int* ncomp); - #ifdef WARPX_USE_PSATD void warpx_fft_mpi_init (int fcomm); void warpx_fft_domain_decomp (int* warpx_local_nz, int* warpx_local_z0, diff --git a/Source/Parallelization/InterpolateDensityFineToCoarse.H b/Source/Parallelization/InterpolateDensityFineToCoarse.H new file mode 100644 index 000000000..947db2aac --- /dev/null +++ b/Source/Parallelization/InterpolateDensityFineToCoarse.H @@ -0,0 +1,120 @@ +/* Copyright 2019 Axel Huebl, Weiqun Zhang + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef INTERPOLATE_DENSITY_FINE_TO_COARSE_H +#define INTERPOLATE_DENSITY_FINE_TO_COARSE_H + +#include +#include +#include +#include +#include + +#include // std::move + + +/** Fill a charge density (rho) coarse patch with averaged values from a fine patch + * + * Fills the values of the charge density on the coarse patch + * by averaging the values of the charge density of the fine patch. + */ +class InterpolateDensityFineToCoarse +{ +public: + /** Construct with fine and coarse patch and their refinement ratio + * + * @param[in] fine read-only fine patch + * @param[in,out] coarse overwritten coarse patch + * @param[in] refinement_ratio ratio between coarse and fine patch granularity + * (currently, only a value of is implemented) + * @param[in] number_of_components the number of components + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + InterpolateDensityFineToCoarse( + amrex::Array4 const fine, + amrex::Array4 const coarse, + int const refinement_ratio, + int const number_of_components + ) : m_fine(std::move(fine)), + m_coarse(std::move(coarse)), + m_refinement_ratio(std::move(refinement_ratio)), + m_number_of_components(std::move(number_of_components)) + { + //! @note constants and stencils in operator() implementation assume 2x refinement + BL_ASSERT(refinement_ratio == 2); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + void + operator()( + int const i, + int const j, + int const k + ) const noexcept + { + using namespace amrex; + + auto const & fine_unsafe = m_fine; // out-of-bounds access not secured with zero-values yet + auto const & coarse = m_coarse; // out-of-bounds access not secured but will also not occur + + // return zero for out-of-bounds accesses during interpolation + // this is efficiently used as a method to add neutral elements beyond guards in the average below + auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l, int const m) noexcept + { + return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l, m) : amrex::Real{0.}; + }; + + int const ii = i * m_refinement_ratio; + int const jj = j * m_refinement_ratio; + int const kk = k * m_refinement_ratio; + for( int m = 0; m < m_number_of_components; ++m ) { +#if AMREX_SPACEDIM == 2 + coarse(i,j,kk,m) = 0.25_rt * ( + fine(ii,jj,kk,m) + + 0.5_rt * ( + fine(ii-1,jj ,kk,m) + fine(ii+1,jj ,kk,m) + + fine(ii ,jj-1,kk,m) + fine(ii ,jj+1,kk,m) + ) + + 0.25_rt * ( + fine(ii-1,jj-1,kk,m) + fine(ii+1,jj-1,kk,m) + + fine(ii-1,jj+1,kk,m) + fine(ii+1,jj+1,kk,m) + ) + ); +#elif AMREX_SPACEDIM == 3 + coarse(i,j,k,m) = 0.125_rt * ( + fine(ii,jj,kk,m) + + 0.5_rt * ( + fine(ii-1,jj ,kk ,m) + fine(ii+1,jj ,kk ,m) + + fine(ii ,jj-1,kk ,m) + fine(ii ,jj+1,kk ,m) + + fine(ii ,jj ,kk-1,m) + fine(ii ,jj ,kk+1,m) + ) + + 0.25_rt * ( + fine(ii-1,jj-1,kk ,m) + fine(ii+1,jj-1,kk ,m) + + fine(ii-1,jj+1,kk ,m) + fine(ii+1,jj+1,kk ,m) + + fine(ii-1,jj ,kk-1,m) + fine(ii+1,jj ,kk-1,m) + + fine(ii-1,jj ,kk+1,m) + fine(ii+1,jj ,kk+1,m) + + fine(ii ,jj-1,kk-1,m) + fine(ii ,jj+1,kk-1,m) + + fine(ii ,jj-1,kk+1,m) + fine(ii ,jj+1,kk+1,m) + ) + + 0.125_rt * ( + fine(ii-1,jj-1,kk-1,m) + fine(ii-1,jj-1,kk+1,m) + + fine(ii-1,jj+1,kk-1,m) + fine(ii-1,jj+1,kk+1,m) + + fine(ii+1,jj-1,kk-1,m) + fine(ii+1,jj-1,kk+1,m) + + fine(ii+1,jj+1,kk-1,m) + fine(ii+1,jj+1,kk+1,m) + ) + ); +#endif + } + } +private: + amrex::Array4< amrex::Real const > m_fine; + amrex::Array4< amrex::Real > m_coarse; + int m_refinement_ratio; + int m_number_of_components; +}; + +#endif // INTERPOLATE_DENSITY_FINE_TO_COARSE_H diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 4f870e79c..92f0b4f09 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -3,10 +3,12 @@ #include #include #include +#include #include #include + using namespace amrex; void @@ -354,7 +356,7 @@ WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > std::array< amrex::MultiFab *, 3 > const & coarse, int const refinement_ratio) { - BL_PROFILE("InterpolateCurrentFineToCoarse()"); + BL_PROFILE("interpolateCurrentFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch @@ -386,6 +388,8 @@ WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > void WarpX::SyncRho () { + BL_PROFILE("SyncRho()"); + if (!rho_fp[0]) return; const int ncomp = rho_fp[0]->nComp(); @@ -395,7 +399,7 @@ WarpX::SyncRho () { rho_cp[lev]->setVal(0.0); const IntVect& refinement_ratio = refRatio(lev-1); - SyncRho(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); + interpolateDensityFineToCoarse(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); } // For each level @@ -408,29 +412,26 @@ WarpX::SyncRho () } void -WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio) +WarpX::interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio) { + BL_PROFILE("interpolateDensityFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); - const IntVect& ng = (fine.nGrowVect()+1)/refinement_ratio; + const IntVect& ng = (fine.nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch const int nc = fine.nComp(); #ifdef _OPEMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { - FArrayBox ffab; - for (MFIter mfi(crse,true); mfi.isValid(); ++mfi) + // OMP in-box decomposition of coarse into tilebox + for (MFIter mfi(coarse, TilingIfNotGPU()); mfi.isValid(); ++mfi) { - const Box& bx = mfi.growntilebox(ng); - Box fbx = amrex::grow(amrex::refine(bx,refinement_ratio),1); - ffab.resize(fbx, nc); - fbx &= fine[mfi].box(); - ffab.setVal(0.0); - ffab.copy(fine[mfi], fbx, 0, fbx, 0, nc); - WRPX_SYNC_RHO(bx.loVect(), bx.hiVect(), - BL_TO_FORTRAN_ANYD(crse[mfi]), - BL_TO_FORTRAN_ANYD(ffab), - &nc); + const Box& bx = mfi.growntilebox(ng); // only grow to outer directions of tileboxes for filling guards + + amrex::ParallelFor( + bx, + InterpolateDensityFineToCoarse(fine.const_array(mfi), coarse.array(mfi), refinement_ratio, nc) + ); } } } @@ -562,7 +563,7 @@ WarpX::RestrictRhoFromFineToCoarsePatch (int lev) if (rho_fp[lev]) { rho_cp[lev]->setVal(0.0); const IntVect& refinement_ratio = refRatio(lev-1); - SyncRho(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); + interpolateDensityFineToCoarse(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); } } diff --git a/Source/WarpX.H b/Source/WarpX.H index c59802427..0da1cf350 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -436,10 +436,12 @@ private: * averaging the values of the charge density of the fine patch (on the same level). * * \param[in] fine fine patches to interpolate from - * \param[out] crse coarse patches to interpolate to - * \param[in] ref_ratio integer ratio between the two + * \param[out] coarse coarse patches to interpolate to + * \param[in] refinement_ratio integer ratio between the two */ - void SyncRho (const amrex::MultiFab& fine, amrex::MultiFab& crse, int ref_ratio); + void interpolateDensityFineToCoarse (const amrex::MultiFab& fine, + amrex::MultiFab& coarse, + int const refinement_ratio); void ExchangeWithPmlB (int lev); void ExchangeWithPmlE (int lev); -- cgit v1.2.3 From 6b70e4abd40d70b48bf4a40cd1a7675a77c8a7ff Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 4 Oct 2019 16:38:41 -0700 Subject: fix for cases with warpx.do_nodal=1 --- Source/Parallelization/WarpXComm.cpp | 10 +++++----- Source/WarpX.H | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 6128b27a4..c5a8877d3 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -52,15 +52,15 @@ WarpX::UpdateAuxilaryData () { BL_PROFILE("UpdateAuxilaryData()"); - if (Bfield_aux[0][0]->is_nodal()) { - UpdateNodalAuxilaryData(); + if (Bfield_aux[0][0]->ixType() == Bfield_fp[0][0]->ixType()) { + UpdateAuxilaryDataSameType(); } else { - UpdateStagAuxilaryData(); + UpdateAuxilaryDataStagToNodal(); } } void -WarpX::UpdateNodalAuxilaryData () +WarpX::UpdateAuxilaryDataStagToNodal () { // For level 0, we only need to do the average. #ifdef _OPENMP @@ -201,7 +201,7 @@ WarpX::UpdateNodalAuxilaryData () } void -WarpX::UpdateStagAuxilaryData () +WarpX::UpdateAuxilaryDataSameType () { for (int lev = 1; lev <= finest_level; ++lev) { diff --git a/Source/WarpX.H b/Source/WarpX.H index 561e0533d..dd86dfc14 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -203,8 +203,8 @@ public: // This function does aux(lev) = fp(lev) + I(aux(lev-1)-cp(lev)). // Caller must make sure fp and cp have ghost cells filled. void UpdateAuxilaryData (); - void UpdateNodalAuxilaryData (); - void UpdateStagAuxilaryData (); + void UpdateAuxilaryDataStagToNodal (); + void UpdateAuxilaryDataSameType (); // Fill boundary cells including coarse/fine boundaries void FillBoundaryB (); -- cgit v1.2.3