diff options
-rw-r--r-- | Source/Diagnostics/ComputeDiagFunctors/RhoFunctor.cpp | 2 | ||||
-rw-r--r-- | Source/FieldSolver/SpectralSolver/SpectralFieldDataRZ.H | 5 | ||||
-rw-r--r-- | Source/FieldSolver/SpectralSolver/SpectralFieldDataRZ.cpp | 35 | ||||
-rw-r--r-- | Source/FieldSolver/SpectralSolver/SpectralSolverRZ.H | 9 | ||||
-rw-r--r-- | Source/FieldSolver/WarpXPushFieldsEM.cpp | 8 | ||||
-rw-r--r-- | Source/Filter/Filter.H | 2 | ||||
-rw-r--r-- | Source/Filter/Filter.cpp | 38 | ||||
-rw-r--r-- | Source/Parallelization/WarpXComm.cpp | 16 |
8 files changed, 91 insertions, 24 deletions
diff --git a/Source/Diagnostics/ComputeDiagFunctors/RhoFunctor.cpp b/Source/Diagnostics/ComputeDiagFunctors/RhoFunctor.cpp index 26d99f4a9..0b1d7cc3f 100644 --- a/Source/Diagnostics/ComputeDiagFunctors/RhoFunctor.cpp +++ b/Source/Diagnostics/ComputeDiagFunctors/RhoFunctor.cpp @@ -61,7 +61,7 @@ RhoFunctor::operator() ( amrex::MultiFab& mf_dst, const int dcomp, const int /*i if (WarpX::use_kspace_filter) { auto & solver = warpx.get_spectral_solver_fp(m_lev); solver.ForwardTransform(m_lev, *rho, IdxAvg::rho_new); - solver.ApplyFilter(IdxAvg::rho_new); + solver.ApplyFilter(m_lev, IdxAvg::rho_new); solver.BackwardTransform(m_lev, *rho, IdxAvg::rho_new); } } diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldDataRZ.H b/Source/FieldSolver/SpectralSolver/SpectralFieldDataRZ.H index 85aa41d3b..87158314d 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldDataRZ.H +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldDataRZ.H @@ -67,8 +67,9 @@ class SpectralFieldDataRZ void InitFilter (amrex::IntVect const & filter_npass_each_dir, bool const compensation, SpectralKSpaceRZ const & k_space); - void ApplyFilter (int const field_index); - void ApplyFilter (int const field_index1, int const field_index2, int const field_index3); + void ApplyFilter (const int lev, int const field_index); + void ApplyFilter (const int lev, int const field_index1, + int const field_index2, int const field_index3); // Returns an array that holds the kr for all of the modes HankelTransform::RealVector const & getKrArray (amrex::MFIter const & mfi) const { diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldDataRZ.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldDataRZ.cpp index a87bfdb54..80760afb3 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldDataRZ.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldDataRZ.cpp @@ -715,10 +715,18 @@ SpectralFieldDataRZ::InitFilter (amrex::IntVect const & filter_npass_each_dir, b /* \brief Apply K-space filtering on a scalar */ void -SpectralFieldDataRZ::ApplyFilter (int const field_index) +SpectralFieldDataRZ::ApplyFilter (const int lev, int const field_index) { + amrex::LayoutData<amrex::Real>* cost = WarpX::getCosts(lev); for (amrex::MFIter mfi(binomialfilter); mfi.isValid(); ++mfi){ + + if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers) + { + amrex::Gpu::synchronize(); + } + amrex::Real wt = amrex::second(); + auto const & filter_r = binomialfilter[mfi].getFilterArrayR(); auto const & filter_z = binomialfilter[mfi].getFilterArrayZ(); auto const & filter_r_arr = filter_r.dataPtr(); @@ -738,15 +746,31 @@ SpectralFieldDataRZ::ApplyFilter (int const field_index) int const ir = i + nr*mode; fields_arr(i,j,k,ic) *= filter_r_arr[ir]*filter_z_arr[j]; }); + + if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers) + { + amrex::Gpu::synchronize(); + wt = amrex::second() - wt; + amrex::HostDevice::Atomic::Add( &(*cost)[mfi.index()], wt); + } } } /* \brief Apply K-space filtering on a vector */ void -SpectralFieldDataRZ::ApplyFilter (int const field_index1, int const field_index2, int const field_index3) +SpectralFieldDataRZ::ApplyFilter (const int lev, int const field_index1, + int const field_index2, int const field_index3) { + amrex::LayoutData<amrex::Real>* cost = WarpX::getCosts(lev); for (amrex::MFIter mfi(binomialfilter); mfi.isValid(); ++mfi){ + + if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers) + { + amrex::Gpu::synchronize(); + } + amrex::Real wt = amrex::second(); + auto const & filter_r = binomialfilter[mfi].getFilterArrayR(); auto const & filter_z = binomialfilter[mfi].getFilterArrayZ(); auto const & filter_r_arr = filter_r.dataPtr(); @@ -770,5 +794,12 @@ SpectralFieldDataRZ::ApplyFilter (int const field_index1, int const field_index2 fields_arr(i,j,k,ic2) *= filter_r_arr[ir]*filter_z_arr[j]; fields_arr(i,j,k,ic3) *= filter_r_arr[ir]*filter_z_arr[j]; }); + + if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers) + { + amrex::Gpu::synchronize(); + wt = amrex::second() - wt; + amrex::HostDevice::Atomic::Add( &(*cost)[mfi.index()], wt); + } } } diff --git a/Source/FieldSolver/SpectralSolver/SpectralSolverRZ.H b/Source/FieldSolver/SpectralSolver/SpectralSolverRZ.H index 114d199c5..1c6b3d613 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralSolverRZ.H +++ b/Source/FieldSolver/SpectralSolver/SpectralSolverRZ.H @@ -69,15 +69,16 @@ class SpectralSolverRZ } /* \brief Apply K space filtering for a scalar */ - void ApplyFilter (int const field_index) + void ApplyFilter (const int lev, int const field_index) { - field_data.ApplyFilter(field_index); + field_data.ApplyFilter(lev, field_index); } /* \brief Apply K space filtering for a vector */ - void ApplyFilter (int const field_index1, int const field_index2, int const field_index3) + void ApplyFilter (const int lev, int const field_index1, + int const field_index2, int const field_index3) { - field_data.ApplyFilter(field_index1, field_index2, field_index3); + field_data.ApplyFilter(lev, field_index1, field_index2, field_index3); } /** diff --git a/Source/FieldSolver/WarpXPushFieldsEM.cpp b/Source/FieldSolver/WarpXPushFieldsEM.cpp index 15ce940d6..a36d21e08 100644 --- a/Source/FieldSolver/WarpXPushFieldsEM.cpp +++ b/Source/FieldSolver/WarpXPushFieldsEM.cpp @@ -239,11 +239,11 @@ WarpX::PSATDForwardTransformJ () { for (int lev = 0; lev <= finest_level; ++lev) { - spectral_solver_fp[lev]->ApplyFilter(IdxAvg::Jx, IdxAvg::Jy, IdxAvg::Jz); + spectral_solver_fp[lev]->ApplyFilter(lev, IdxAvg::Jx, IdxAvg::Jy, IdxAvg::Jz); if (spectral_solver_cp[lev]) { - spectral_solver_cp[lev]->ApplyFilter(IdxAvg::Jx, IdxAvg::Jy, IdxAvg::Jz); + spectral_solver_cp[lev]->ApplyFilter(lev, IdxAvg::Jx, IdxAvg::Jy, IdxAvg::Jz); } } } @@ -272,11 +272,11 @@ WarpX::PSATDForwardTransformRho (const int icomp) { for (int lev = 0; lev <= finest_level; ++lev) { - spectral_solver_fp[lev]->ApplyFilter(dst_comp); + spectral_solver_fp[lev]->ApplyFilter(lev, dst_comp); if (spectral_solver_cp[lev]) { - spectral_solver_cp[lev]->ApplyFilter(dst_comp); + spectral_solver_cp[lev]->ApplyFilter(lev, dst_comp); } } } diff --git a/Source/Filter/Filter.H b/Source/Filter/Filter.H index 92724a154..5a9b6d43e 100644 --- a/Source/Filter/Filter.H +++ b/Source/Filter/Filter.H @@ -22,7 +22,7 @@ public: // Apply stencil on MultiFab. // Guard cells are handled inside this function void ApplyStencil(amrex::MultiFab& dstmf, - const amrex::MultiFab& srcmf, int scomp=0, + const amrex::MultiFab& srcmf, const int lev, int scomp=0, int dcomp=0, int ncomp=10000); // Apply stencil on a FabArray. diff --git a/Source/Filter/Filter.cpp b/Source/Filter/Filter.cpp index 781dfd804..38b9e681c 100644 --- a/Source/Filter/Filter.cpp +++ b/Source/Filter/Filter.cpp @@ -27,18 +27,27 @@ using namespace amrex; /* \brief Apply stencil on MultiFab (GPU version, 2D/3D). * \param dstmf Destination MultiFab * \param srcmf source MultiFab + * \param[in] lev mesh refinement level * \param scomp first component of srcmf on which the filter is applied * \param dcomp first component of dstmf on which the filter is applied * \param ncomp Number of components on which the filter is applied. */ void -Filter::ApplyStencil (MultiFab& dstmf, const MultiFab& srcmf, int scomp, int dcomp, int ncomp) +Filter::ApplyStencil (MultiFab& dstmf, const MultiFab& srcmf, const int lev, int scomp, int dcomp, int ncomp) { WARPX_PROFILE("Filter::ApplyStencil(MultiFab)"); ncomp = std::min(ncomp, srcmf.nComp()); + amrex::LayoutData<amrex::Real>* cost = WarpX::getCosts(lev); + for (MFIter mfi(dstmf); mfi.isValid(); ++mfi) { + if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers) + { + amrex::Gpu::synchronize(); + } + amrex::Real wt = amrex::second(); + const auto& src = srcmf.array(mfi); const auto& dst = dstmf.array(mfi); const Box& tbx = mfi.growntilebox(); @@ -62,6 +71,13 @@ Filter::ApplyStencil (MultiFab& dstmf, const MultiFab& srcmf, int scomp, int dco // Apply filter DoFilter(tbx, tmp, dst, 0, dcomp, ncomp); + + if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers) + { + amrex::Gpu::synchronize(); + wt = amrex::second() - wt; + amrex::HostDevice::Atomic::Add( &(*cost)[mfi.index()], wt); + } } } @@ -166,15 +182,19 @@ void Filter::DoFilter (const Box& tbx, /* \brief Apply stencil on MultiFab (CPU version, 2D/3D). * \param dstmf Destination MultiFab * \param srcmf source MultiFab + * \param[in] lev mesh refinement level * \param scomp first component of srcmf on which the filter is applied * \param dcomp first component of dstmf on which the filter is applied * \param ncomp Number of components on which the filter is applied. */ void -Filter::ApplyStencil (amrex::MultiFab& dstmf, const amrex::MultiFab& srcmf, int scomp, int dcomp, int ncomp) +Filter::ApplyStencil (amrex::MultiFab& dstmf, const amrex::MultiFab& srcmf, const int lev, int scomp, int dcomp, int ncomp) { WARPX_PROFILE("Filter::ApplyStencil(MultiFab)"); ncomp = std::min(ncomp, srcmf.nComp()); + + amrex::LayoutData<amrex::Real>* cost = WarpX::getCosts(lev); + #ifdef AMREX_USE_OMP // never runs on GPU since in the else branch of AMREX_USE_GPU #pragma omp parallel @@ -182,6 +202,13 @@ Filter::ApplyStencil (amrex::MultiFab& dstmf, const amrex::MultiFab& srcmf, int { FArrayBox tmpfab; for (MFIter mfi(dstmf,true); mfi.isValid(); ++mfi){ + + if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers) + { + amrex::Gpu::synchronize(); + } + amrex::Real wt = amrex::second(); + const auto& srcfab = srcmf[mfi]; auto& dstfab = dstmf[mfi]; const Box& tbx = mfi.growntilebox(); @@ -194,6 +221,13 @@ Filter::ApplyStencil (amrex::MultiFab& dstmf, const amrex::MultiFab& srcmf, int tmpfab.copy(srcfab, ibx, scomp, ibx, 0, ncomp); // Apply filter DoFilter(tbx, tmpfab.array(), dstfab.array(), 0, dcomp, ncomp); + + if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers) + { + amrex::Gpu::synchronize(); + wt = amrex::second() - wt; + amrex::HostDevice::Atomic::Add( &(*cost)[mfi.index()], wt); + } } } } diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index f91de969f..24b00c1e7 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -962,7 +962,7 @@ WarpX::ApplyFilterandSumBoundaryJ (int lev, PatchType patch_type) IntVect ng = j[idim]->nGrowVect(); ng += bilinear_filter.stencil_length_each_dir-1; MultiFab jf(j[idim]->boxArray(), j[idim]->DistributionMap(), j[idim]->nComp(), ng); - bilinear_filter.ApplyStencil(jf, *j[idim]); + bilinear_filter.ApplyStencil(jf, *j[idim], lev); WarpXSumGuardCells(*(j[idim]), jf, period, 0, (j[idim])->nComp()); } else { WarpXSumGuardCells(*(j[idim]), period, 0, (j[idim])->nComp()); @@ -1004,12 +1004,12 @@ WarpX::AddCurrentFromFineLevelandSumBoundary (int lev) ng += bilinear_filter.stencil_length_each_dir-1; MultiFab jfc(current_cp[lev+1][idim]->boxArray(), current_cp[lev+1][idim]->DistributionMap(), current_cp[lev+1][idim]->nComp(), ng); - bilinear_filter.ApplyStencil(jfc, *current_cp[lev+1][idim]); + bilinear_filter.ApplyStencil(jfc, *current_cp[lev+1][idim], lev); // buffer patch of fine level MultiFab jfb(current_buf[lev+1][idim]->boxArray(), current_buf[lev+1][idim]->DistributionMap(), current_buf[lev+1][idim]->nComp(), ng); - bilinear_filter.ApplyStencil(jfb, *current_buf[lev+1][idim]); + bilinear_filter.ApplyStencil(jfb, *current_buf[lev+1][idim], lev); MultiFab::Add(jfb, jfc, 0, 0, current_buf[lev+1][idim]->nComp(), ng); mf.ParallelAdd(jfb, 0, 0, current_buf[lev+1][idim]->nComp(), ng, IntVect::TheZeroVector(), period); @@ -1023,7 +1023,7 @@ WarpX::AddCurrentFromFineLevelandSumBoundary (int lev) ng += bilinear_filter.stencil_length_each_dir-1; MultiFab jf(current_cp[lev+1][idim]->boxArray(), current_cp[lev+1][idim]->DistributionMap(), current_cp[lev+1][idim]->nComp(), ng); - bilinear_filter.ApplyStencil(jf, *current_cp[lev+1][idim]); + bilinear_filter.ApplyStencil(jf, *current_cp[lev+1][idim], lev); mf.ParallelAdd(jf, 0, 0, current_cp[lev+1][idim]->nComp(), ng, IntVect::TheZeroVector(), period); WarpXSumGuardCells(*current_cp[lev+1][idim], jf, period, 0, current_cp[lev+1][idim]->nComp()); } @@ -1078,7 +1078,7 @@ WarpX::ApplyFilterandSumBoundaryRho (int /*lev*/, int glev, amrex::MultiFab& rho IntVect ng = rho.nGrowVect(); ng += bilinear_filter.stencil_length_each_dir-1; MultiFab rf(rho.boxArray(), rho.DistributionMap(), ncomp, ng); - bilinear_filter.ApplyStencil(rf, rho, icomp, 0, ncomp); + bilinear_filter.ApplyStencil(rf, rho, glev, icomp, 0, ncomp); WarpXSumGuardCells(rho, rf, period, icomp, ncomp ); } else { WarpXSumGuardCells(rho, period, icomp, ncomp); @@ -1119,12 +1119,12 @@ WarpX::AddRhoFromFineLevelandSumBoundary(int lev, int icomp, int ncomp) ng += bilinear_filter.stencil_length_each_dir-1; MultiFab rhofc(rho_cp[lev+1]->boxArray(), rho_cp[lev+1]->DistributionMap(), ncomp, ng); - bilinear_filter.ApplyStencil(rhofc, *rho_cp[lev+1], icomp, 0, ncomp); + bilinear_filter.ApplyStencil(rhofc, *rho_cp[lev+1], lev, icomp, 0, ncomp); // buffer patch of fine level MultiFab rhofb(charge_buf[lev+1]->boxArray(), charge_buf[lev+1]->DistributionMap(), ncomp, ng); - bilinear_filter.ApplyStencil(rhofb, *charge_buf[lev+1], icomp, 0, ncomp); + bilinear_filter.ApplyStencil(rhofb, *charge_buf[lev+1], lev, icomp, 0, ncomp); MultiFab::Add(rhofb, rhofc, 0, 0, ncomp, ng); mf.ParallelAdd(rhofb, 0, 0, ncomp, ng, IntVect::TheZeroVector(), period); @@ -1135,7 +1135,7 @@ WarpX::AddRhoFromFineLevelandSumBoundary(int lev, int icomp, int ncomp) IntVect ng = rho_cp[lev+1]->nGrowVect(); ng += bilinear_filter.stencil_length_each_dir-1; MultiFab rf(rho_cp[lev+1]->boxArray(), rho_cp[lev+1]->DistributionMap(), ncomp, ng); - bilinear_filter.ApplyStencil(rf, *rho_cp[lev+1], icomp, 0, ncomp); + bilinear_filter.ApplyStencil(rf, *rho_cp[lev+1], lev, icomp, 0, ncomp); mf.ParallelAdd(rf, 0, 0, ncomp, ng, IntVect::TheZeroVector(), period); WarpXSumGuardCells( *rho_cp[lev+1], rf, period, icomp, ncomp ); } |