diff options
author | 2019-09-30 18:12:58 +0200 | |
---|---|---|
committer | 2019-09-30 18:12:58 +0200 | |
commit | 0c2d775fc16f1da5116c1ab198e2518dad5fcac4 (patch) | |
tree | c5f581e061fb8c9c78fc0019b9f2868c7bb64ac2 /Source/Parallelization/WarpXComm.cpp | |
parent | 309cbaf7dafc84d2ba721b94bbf190b29acaa069 (diff) | |
parent | c636e0f71c6ca6a854ed45bf90f90b943b34fc58 (diff) | |
download | WarpX-0c2d775fc16f1da5116c1ab198e2518dad5fcac4.tar.gz WarpX-0c2d775fc16f1da5116c1ab198e2518dad5fcac4.tar.zst WarpX-0c2d775fc16f1da5116c1ab198e2518dad5fcac4.zip |
Merge remote-tracking branch 'upstream/dev' into qed_phys_part_with_lambda
Diffstat (limited to 'Source/Parallelization/WarpXComm.cpp')
-rw-r--r-- | Source/Parallelization/WarpXComm.cpp | 200 |
1 files changed, 76 insertions, 124 deletions
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 1c8c37cad..4f870e79c 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -1,8 +1,8 @@ +#include <WarpXComm_K.H> #include <WarpX.H> #include <WarpX_f.H> #include <WarpXSumGuardCells.H> - -#include <AMReX_FillPatchUtil_F.H> +#include <Parallelization/InterpolateCurrentFineToCoarse.H> #include <algorithm> #include <cstdlib> @@ -52,8 +52,6 @@ WarpX::UpdateAuxilaryData () { BL_PROFILE("UpdateAuxilaryData()"); - const int use_limiter = 0; - for (int lev = 1; lev <= finest_level; ++lev) { const auto& crse_period = Geom(lev-1).periodicity(); @@ -81,57 +79,37 @@ WarpX::UpdateAuxilaryData () MultiFab::Subtract(dBy, *Bfield_cp[lev][1], 0, 0, Bfield_cp[lev][1]->nComp(), ng); MultiFab::Subtract(dBz, *Bfield_cp[lev][2], 0, 0, Bfield_cp[lev][2]->nComp(), ng); - const Real* dx = Geom(lev-1).CellSize(); const int refinement_ratio = refRatio(lev-1)[0]; + AMREX_ALWAYS_ASSERT(refinement_ratio == 2); + #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif + for (MFIter mfi(*Bfield_aux[lev][0]); mfi.isValid(); ++mfi) { - std::array<FArrayBox,3> bfab; - for (MFIter mfi(*Bfield_aux[lev][0]); mfi.isValid(); ++mfi) + Array4<Real> const& bx_aux = Bfield_aux[lev][0]->array(mfi); + Array4<Real> const& by_aux = Bfield_aux[lev][1]->array(mfi); + Array4<Real> const& bz_aux = Bfield_aux[lev][2]->array(mfi); + Array4<Real const> const& bx_fp = Bfield_fp[lev][0]->const_array(mfi); + Array4<Real const> const& by_fp = Bfield_fp[lev][1]->const_array(mfi); + Array4<Real const> const& bz_fp = Bfield_fp[lev][2]->const_array(mfi); + Array4<Real const> const& bx_c = dBx.const_array(mfi); + Array4<Real const> const& by_c = dBy.const_array(mfi); + Array4<Real const> const& bz_c = dBz.const_array(mfi); + + amrex::ParallelFor(Box(bx_aux), Box(by_aux), Box(bz_aux), + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept { - Box ccbx = mfi.fabbox(); - ccbx.enclosedCells(); - ccbx.coarsen(refinement_ratio).refine(refinement_ratio); // so that ccbx is coarsenable - - const FArrayBox& cxfab = dBx[mfi]; - const FArrayBox& cyfab = dBy[mfi]; - const FArrayBox& czfab = dBz[mfi]; - bfab[0].resize(amrex::convert(ccbx,Bx_nodal_flag)); - bfab[1].resize(amrex::convert(ccbx,By_nodal_flag)); - bfab[2].resize(amrex::convert(ccbx,Bz_nodal_flag)); - -#if (AMREX_SPACEDIM == 3) - amrex_interp_div_free_bfield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(bfab[0]), - BL_TO_FORTRAN_ANYD(bfab[1]), - BL_TO_FORTRAN_ANYD(bfab[2]), - BL_TO_FORTRAN_ANYD(cxfab), - BL_TO_FORTRAN_ANYD(cyfab), - BL_TO_FORTRAN_ANYD(czfab), - dx, &refinement_ratio,&use_limiter); -#else - amrex_interp_div_free_bfield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(bfab[0]), - BL_TO_FORTRAN_ANYD(bfab[2]), - BL_TO_FORTRAN_ANYD(cxfab), - BL_TO_FORTRAN_ANYD(czfab), - dx, &refinement_ratio,&use_limiter); - amrex_interp_cc_bfield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(bfab[1]), - BL_TO_FORTRAN_ANYD(cyfab), - &refinement_ratio,&use_limiter); -#endif - - for (int idim = 0; idim < 3; ++idim) - { - FArrayBox& aux = (*Bfield_aux[lev][idim])[mfi]; - FArrayBox& fp = (*Bfield_fp[lev][idim])[mfi]; - const Box& bx = aux.box(); - aux.copy(fp, bx, 0, bx, 0, 1); - aux.plus(bfab[idim], bx, bx, 0, 0, 1); - } - } + warpx_interp_bfield_x(j,k,l, bx_aux, bx_fp, bx_c); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_bfield_y(j,k,l, by_aux, by_fp, by_c); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_bfield_z(j,k,l, bz_aux, bz_fp, bz_c); + }); } } @@ -156,56 +134,34 @@ WarpX::UpdateAuxilaryData () MultiFab::Subtract(dEy, *Efield_cp[lev][1], 0, 0, Efield_cp[lev][1]->nComp(), ng); MultiFab::Subtract(dEz, *Efield_cp[lev][2], 0, 0, Efield_cp[lev][2]->nComp(), ng); - const int refinement_ratio = refRatio(lev-1)[0]; #ifdef _OPEMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif + for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi) { - std::array<FArrayBox,3> efab; - for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi) + Array4<Real> const& ex_aux = Efield_aux[lev][0]->array(mfi); + Array4<Real> const& ey_aux = Efield_aux[lev][1]->array(mfi); + Array4<Real> const& ez_aux = Efield_aux[lev][2]->array(mfi); + Array4<Real const> const& ex_fp = Efield_fp[lev][0]->const_array(mfi); + Array4<Real const> const& ey_fp = Efield_fp[lev][1]->const_array(mfi); + Array4<Real const> const& ez_fp = Efield_fp[lev][2]->const_array(mfi); + Array4<Real const> const& ex_c = dEx.const_array(mfi); + Array4<Real const> const& ey_c = dEy.const_array(mfi); + Array4<Real const> const& ez_c = dEz.const_array(mfi); + + amrex::ParallelFor(Box(ex_aux), Box(ey_aux), Box(ez_aux), + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept { - Box ccbx = mfi.fabbox(); - ccbx.enclosedCells(); - ccbx.coarsen(refinement_ratio).refine(refinement_ratio); // so that ccbx is coarsenable - - const FArrayBox& cxfab = dEx[mfi]; - const FArrayBox& cyfab = dEy[mfi]; - const FArrayBox& czfab = dEz[mfi]; - efab[0].resize(amrex::convert(ccbx,Ex_nodal_flag)); - efab[1].resize(amrex::convert(ccbx,Ey_nodal_flag)); - efab[2].resize(amrex::convert(ccbx,Ez_nodal_flag)); - -#if (AMREX_SPACEDIM == 3) - amrex_interp_efield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(efab[0]), - BL_TO_FORTRAN_ANYD(efab[1]), - BL_TO_FORTRAN_ANYD(efab[2]), - BL_TO_FORTRAN_ANYD(cxfab), - BL_TO_FORTRAN_ANYD(cyfab), - BL_TO_FORTRAN_ANYD(czfab), - &refinement_ratio,&use_limiter); -#else - amrex_interp_efield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(efab[0]), - BL_TO_FORTRAN_ANYD(efab[2]), - BL_TO_FORTRAN_ANYD(cxfab), - BL_TO_FORTRAN_ANYD(czfab), - &refinement_ratio,&use_limiter); - amrex_interp_nd_efield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(efab[1]), - BL_TO_FORTRAN_ANYD(cyfab), - &refinement_ratio); -#endif - - for (int idim = 0; idim < 3; ++idim) - { - FArrayBox& aux = (*Efield_aux[lev][idim])[mfi]; - FArrayBox& fp = (*Efield_fp[lev][idim])[mfi]; - const Box& bx = aux.box(); - aux.copy(fp, bx, 0, bx, 0, Efield_fp[lev][idim]->nComp()); - aux.plus(efab[idim], bx, bx, 0, 0, Efield_fp[lev][idim]->nComp()); - } - } + warpx_interp_efield_x(j,k,l, ex_aux, ex_fp, ex_c); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_efield_y(j,k,l, ey_aux, ey_fp, ey_c); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_efield_z(j,k,l, ez_aux, ez_fp, ez_c); + }); } } } @@ -381,7 +337,7 @@ WarpX::SyncCurrent () std::array< MultiFab*,3> crse { current_cp[lev][0].get(), current_cp[lev][1].get(), current_cp[lev][2].get() }; - SyncCurrent(fine, crse, refinement_ratio[0]); + interpolateCurrentFineToCoarse(fine, crse, refinement_ratio[0]); } // For each level @@ -393,36 +349,35 @@ WarpX::SyncCurrent () } } -/** \brief Fills the values of the current on the coarse patch by - * averaging the values of the current of the fine patch (on the same level). - */ void -WarpX::SyncCurrent (const std::array<const amrex::MultiFab*,3>& fine, - const std::array< amrex::MultiFab*,3>& crse, - int refinement_ratio) +WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine, + std::array< amrex::MultiFab *, 3 > const & coarse, + int const refinement_ratio) { + BL_PROFILE("InterpolateCurrentFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); - const IntVect& ng = (fine[0]->nGrowVect() + 1) /refinement_ratio; + const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch #ifdef _OPEMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { - FArrayBox ffab; - for (int idim = 0; idim < 3; ++idim) + for (int idim = 0; idim < fine.size(); ++idim) // j-field components { - for (MFIter mfi(*crse[idim],true); mfi.isValid(); ++mfi) + // OMP in-box decomposition of coarse into tilebox + for (MFIter mfi(*coarse[idim], TilingIfNotGPU()); mfi.isValid(); ++mfi) { - const Box& bx = mfi.growntilebox(ng); - Box fbx = amrex::grow(amrex::refine(bx,refinement_ratio),1); - ffab.resize(fbx); - fbx &= (*fine[idim])[mfi].box(); - ffab.setVal(0.0); - ffab.copy((*fine[idim])[mfi], fbx, 0, fbx, 0, fine[idim]->nComp()); - WRPX_SYNC_CURRENT(bx.loVect(), bx.hiVect(), - BL_TO_FORTRAN_ANYD((*crse[idim])[mfi]), - BL_TO_FORTRAN_ANYD(ffab), - &idim); + const Box& bx = mfi.growntilebox(ng); // only grow to outer directions of tileboxes for filling guards + + auto const & arrFine = fine[idim]->const_array(mfi); + auto const & arrCoarse = coarse[idim]->array(mfi); + + if( idim == 0 ) + amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<0>(arrFine, arrCoarse, refinement_ratio) ); + else if( idim == 1 ) + amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<1>(arrFine, arrCoarse, refinement_ratio) ); + else if( idim == 2 ) + amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<2>(arrFine, arrCoarse, refinement_ratio) ); } } } @@ -452,9 +407,6 @@ WarpX::SyncRho () } } -/** \brief Fills the values of the charge density on the coarse patch by - * averaging the values of the charge density of the fine patch (on the same level). - */ void WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio) { @@ -463,7 +415,7 @@ WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio) const int nc = fine.nComp(); #ifdef _OPEMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { FArrayBox ffab; @@ -501,7 +453,7 @@ WarpX::RestrictCurrentFromFineToCoarsePatch (int lev) std::array< MultiFab*,3> crse { current_cp[lev][0].get(), current_cp[lev][1].get(), current_cp[lev][2].get() }; - SyncCurrent(fine, crse, refinement_ratio[0]); + interpolateCurrentFineToCoarse(fine, crse, refinement_ratio[0]); } void @@ -688,7 +640,7 @@ WarpX::AddRhoFromFineLevelandSumBoundary(int lev, int icomp, int ncomp) } else if (charge_buf[lev+1]) // but no filter { - MultiFab::Copy(*charge_buf[lev+1], + MultiFab::Add(*charge_buf[lev+1], *rho_cp[lev+1], icomp, icomp, ncomp, rho_cp[lev+1]->nGrow()); mf.ParallelAdd(*charge_buf[lev+1], icomp, 0, |