diff options
Diffstat (limited to 'Source/Parallelization')
-rw-r--r-- | Source/Parallelization/GuardCellManager.H | 76 | ||||
-rw-r--r-- | Source/Parallelization/GuardCellManager.cpp | 171 | ||||
-rw-r--r-- | Source/Parallelization/InterpolateCurrentFineToCoarse.H | 4 | ||||
-rw-r--r-- | Source/Parallelization/Make.package | 2 | ||||
-rw-r--r-- | Source/Parallelization/WarpXComm.cpp | 99 |
5 files changed, 322 insertions, 30 deletions
diff --git a/Source/Parallelization/GuardCellManager.H b/Source/Parallelization/GuardCellManager.H new file mode 100644 index 000000000..c57745b84 --- /dev/null +++ b/Source/Parallelization/GuardCellManager.H @@ -0,0 +1,76 @@ +#ifndef GUARDCELLMANAGER_H_ +#define GUARDCELLMANAGER_H_ + +#include <AMReX_IntVect.H> + +/** + * \brief This class computes and stores the number of guard cells needed for + * the allocation of the MultiFabs and required for each part of the PIC loop. + */ +class guardCellManager{ + +public: + + /** + * \brief Initialize number of guard cells depending on the options used. + * + * \param do_subcycling bool, whether to use subcycling + * \param do_fdtd_nci_corr bool, whether to use Godfrey NCI corrector + * \param do_nodal bool, whether the field solver is nodal + * \param do_moving_window bool, whether to use moving window + * \param do_fft_mpi_dec bool, whether to do parallel FFTs for PSATD + * \param aux_is_nodal bool, true if the aux grid is nodal + * \param moving_window_dir direction of moving window + * \param nox order of current deposition + * \param nox_fft order of PSATD in x direction + * \param noy_fft order of PSATD in y direction + * \param noz_fft order of PSATD in z direction + * \param nci_corr_stencil stencil of NCI corrector + * \param maxwell_fdtd_solver_id if of Maxwell solver + * \param max_level max level of the simulation + */ + void Init( + const bool do_subcycling, + const bool do_fdtd_nci_corr, + const bool do_nodal, + const bool do_moving_window, + const bool do_fft_mpi_dec, + const bool aux_is_nodal, + const int moving_window_dir, + const int nox, + const int nox_fft, const int noy_fft, const int noz_fft, + const int nci_corr_stencil, + const int maxwell_fdtd_solver_id, + const int max_level, + const bool exchange_all_guard_cells); + + // Guard cells allocated for MultiFabs E and B + amrex::IntVect ng_alloc_EB = amrex::IntVect::TheZeroVector(); + // Guard cells allocated for MultiFab J + amrex::IntVect ng_alloc_J = amrex::IntVect::TheZeroVector(); + // Guard cells allocated for MultiFab Rho + amrex::IntVect ng_alloc_Rho = amrex::IntVect::TheZeroVector(); + // Guard cells allocated for MultiFab F + amrex::IntVect ng_alloc_F = amrex::IntVect::TheZeroVector(); + + // Guard cells exchanged for specific parts of the PIC loop + + // Number of guard cells of E and B that must exchanged before Field Solver + amrex::IntVect ng_FieldSolver = amrex::IntVect::TheZeroVector(); + // Number of guard cells of F that must exchanged before Field Solver + amrex::IntVect ng_FieldSolverF = amrex::IntVect::TheZeroVector(); + // Number of guard cells of E and B that must exchanged before Field Gather + amrex::IntVect ng_FieldGather = amrex::IntVect::TheZeroVector(); + // Number of guard cells of E and B that must exchanged before updating the Aux grid + amrex::IntVect ng_UpdateAux = amrex::IntVect::TheZeroVector(); + // Number of guard cells of all MultiFabs that must exchanged before moving window + amrex::IntVect ng_MovingWindow = amrex::IntVect::TheZeroVector(); + + // When the auxiliary grid is nodal but the field solver is staggered + // (typically with momentum-conserving gather with FDTD Yee solver), + // An extra guard cell is needed on the fine grid to do the interpolation + // for E and B. + amrex::IntVect ng_Extra = amrex::IntVect::TheZeroVector(); +}; + +#endif // GUARDCELLMANAGER_H_ diff --git a/Source/Parallelization/GuardCellManager.cpp b/Source/Parallelization/GuardCellManager.cpp new file mode 100644 index 000000000..a275f4c00 --- /dev/null +++ b/Source/Parallelization/GuardCellManager.cpp @@ -0,0 +1,171 @@ +#include "GuardCellManager.H" +#include "NCIGodfreyFilter.H" +#include <AMReX_Print.H> + +using namespace amrex; + +void +guardCellManager::Init( + const bool do_subcycling, + const bool do_fdtd_nci_corr, + const bool do_nodal, + const bool do_moving_window, + const bool do_fft_mpi_dec, + const bool aux_is_nodal, + const int moving_window_dir, + const int nox, + const int nox_fft, const int noy_fft, const int noz_fft, + const int nci_corr_stencil, + const int maxwell_fdtd_solver_id, + const int max_level, + const bool exchange_all_guard_cells) +{ + // When using subcycling, the particles on the fine level perform two pushes + // before being redistributed ; therefore, we need one extra guard cell + // (the particles may move by 2*c*dt) + const int ngx_tmp = (max_level > 0 && do_subcycling == 1) ? nox+1 : nox; + const int ngy_tmp = (max_level > 0 && do_subcycling == 1) ? nox+1 : nox; + const int ngz_tmp = (max_level > 0 && do_subcycling == 1) ? nox+1 : nox; + + // Ex, Ey, Ez, Bx, By, and Bz have the same number of ghost cells. + // jx, jy, jz and rho have the same number of ghost cells. + // E and B have the same number of ghost cells as j and rho if NCI filter is not used, + // but different number of ghost cells in z-direction if NCI filter is used. + // The number of cells should be even, in order to easily perform the + // interpolation from coarse grid to fine grid. + int ngx = (ngx_tmp % 2) ? ngx_tmp+1 : ngx_tmp; // Always even number + int ngy = (ngy_tmp % 2) ? ngy_tmp+1 : ngy_tmp; // Always even number + int ngz_nonci = (ngz_tmp % 2) ? ngz_tmp+1 : ngz_tmp; // Always even number + int ngz; + if (do_fdtd_nci_corr) { + int ng = ngz_tmp + nci_corr_stencil; + ngz = (ng % 2) ? ng+1 : ng; + } else { + ngz = ngz_nonci; + } + + // J is only interpolated from fine to coarse (not coarse to fine) + // and therefore does not need to be even. + int ngJx = ngx_tmp; + int ngJy = ngy_tmp; + int ngJz = ngz_tmp; + + // When calling the moving window (with one level of refinement), we shift + // the fine grid by 2 cells ; therefore, we need at least 2 guard cells + // on level 1. This may not be necessary for level 0. + if (do_moving_window) { + ngx = std::max(ngx,2); + ngy = std::max(ngy,2); + ngz = std::max(ngz,2); + ngJx = std::max(ngJx,2); + ngJy = std::max(ngJy,2); + ngJz = std::max(ngJz,2); + } + +#if (AMREX_SPACEDIM == 3) + ng_alloc_EB = IntVect(ngx,ngy,ngz); + ng_alloc_J = IntVect(ngJx,ngJy,ngJz); +#elif (AMREX_SPACEDIM == 2) + ng_alloc_EB = IntVect(ngx,ngz); + ng_alloc_J = IntVect(ngJx,ngJz); +#endif + + ng_alloc_Rho = ng_alloc_J+1; //One extra ghost cell, so that it's safe to deposit charge density + // after pushing particle. + int ng_alloc_F_int = (do_moving_window) ? 2 : 0; + // CKC solver requires one additional guard cell + if (maxwell_fdtd_solver_id == 1) ng_alloc_F_int = std::max( ng_alloc_F_int, 1 ); + ng_alloc_F = IntVect(AMREX_D_DECL(ng_alloc_F_int, ng_alloc_F_int, ng_alloc_F_int)); + +#ifdef WARPX_USE_PSATD + if (do_fft_mpi_dec == false){ + // All boxes should have the same number of guard cells + // (to avoid temporary parallel copies) + // Thus take the max of the required number of guards for each field + // Also: the number of guard cell should be enough to contain + // the stencil of the FFT solver. Here, this number (`ngFFT`) + // is determined *empirically* to be the order of the solver + // for nodal, and half the order of the solver for staggered. + IntVect ngFFT; + if (do_nodal) { + ngFFT = IntVect(AMREX_D_DECL(nox_fft, noy_fft, noz_fft)); + } else { + ngFFT = IntVect(AMREX_D_DECL(nox_fft/2, noy_fft/2, noz_fft/2)); + } + for (int i_dim=0; i_dim<AMREX_SPACEDIM; i_dim++ ){ + int ng_required = ngFFT[i_dim]; + // Get the max + ng_required = std::max( ng_required, ng_alloc_EB[i_dim] ); + ng_required = std::max( ng_required, ng_alloc_J[i_dim] ); + ng_required = std::max( ng_required, ng_alloc_Rho[i_dim] ); + ng_required = std::max( ng_required, ng_alloc_F[i_dim] ); + // Set the guard cells to this max + ng_alloc_EB[i_dim] = ng_required; + ng_alloc_J[i_dim] = ng_required; + ng_alloc_F[i_dim] = ng_required; + ng_alloc_Rho[i_dim] = ng_required; + ng_alloc_F_int = ng_required; + } + } + ng_alloc_F = IntVect(AMREX_D_DECL(ng_alloc_F_int, ng_alloc_F_int, ng_alloc_F_int)); +#endif + + ng_Extra = IntVect(static_cast<int>(aux_is_nodal and !do_nodal)); + + // Compute number of cells required for Field Solver +#ifdef WARPX_USE_PSATD + ng_FieldSolver = ng_alloc_EB; + ng_FieldSolverF = ng_alloc_EB; +#else + ng_FieldSolver = IntVect(AMREX_D_DECL(1,1,1)); + ng_FieldSolverF = IntVect(AMREX_D_DECL(1,1,1)); +#endif + + if (exchange_all_guard_cells){ + // Run in safe mode: exchange all allocated guard cells at each + // call of FillBoundary + ng_FieldSolver = ng_alloc_EB; + ng_FieldSolverF = ng_alloc_F; + ng_FieldGather = ng_alloc_EB; + ng_UpdateAux = ng_alloc_EB; + if (do_moving_window){ + ng_MovingWindow = ng_alloc_EB; + } + } else { + + ng_FieldSolver = ng_FieldSolver.min(ng_alloc_EB); + + // Compute number of cells required for Field Gather + int FGcell[4] = {0,1,1,2}; // Index is nox + IntVect ng_FieldGather_noNCI = IntVect(AMREX_D_DECL(FGcell[nox],FGcell[nox],FGcell[nox])); + // Add one cell if momentum_conserving gather in a staggered-field simulation + ng_FieldGather_noNCI += ng_Extra; + // Not sure why, but need one extra guard cell when using MR + if (max_level >= 1) ng_FieldGather_noNCI += ng_Extra; + ng_FieldGather_noNCI = ng_FieldGather_noNCI.min(ng_alloc_EB); + // If NCI filter, add guard cells in the z direction + IntVect ng_NCIFilter = IntVect::TheZeroVector(); + if (do_fdtd_nci_corr) + ng_NCIFilter[AMREX_SPACEDIM-1] = NCIGodfreyFilter::m_stencil_width; + // Note: communications of guard cells for bilinear filter are handled + // separately. + ng_FieldGather = ng_FieldGather_noNCI + ng_NCIFilter; + + // Guard cells for auxiliary grid. + // Not sure why there is a 2* here... + ng_UpdateAux = 2*ng_FieldGather_noNCI + ng_NCIFilter; + + // Make sure we do not exchange more guard cells than allocated. + ng_FieldGather = ng_FieldGather.min(ng_alloc_EB); + ng_UpdateAux = ng_UpdateAux.min(ng_alloc_EB); + ng_FieldSolverF = ng_FieldSolverF.min(ng_alloc_F); + // Only FillBoundary(ng_FieldGather) is called between consecutive + // field solves. So ng_FieldGather must have enough cells + // for the field solve too. + ng_FieldGather = ng_FieldGather.max(ng_FieldSolver); + + if (do_moving_window){ + ng_MovingWindow[moving_window_dir] = 1; + } + } +} diff --git a/Source/Parallelization/InterpolateCurrentFineToCoarse.H b/Source/Parallelization/InterpolateCurrentFineToCoarse.H index cbbcdfab5..58451c6b7 100644 --- a/Source/Parallelization/InterpolateCurrentFineToCoarse.H +++ b/Source/Parallelization/InterpolateCurrentFineToCoarse.H @@ -63,9 +63,9 @@ public: // return zero for out-of-bounds accesses during interpolation // this is efficiently used as a method to add neutral elements beyond guards in the average below - auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l) noexcept + auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const jj, int const kk, int const ll) noexcept { - return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l) : amrex::Real{0.}; + return fine_unsafe.contains(jj, kk, ll) ? fine_unsafe(jj, kk, ll) : amrex::Real{0.}; }; int const ii = i * m_refinement_ratio; diff --git a/Source/Parallelization/Make.package b/Source/Parallelization/Make.package index 7c3c38627..065556b33 100644 --- a/Source/Parallelization/Make.package +++ b/Source/Parallelization/Make.package @@ -1,7 +1,9 @@ CEXE_sources += WarpXComm.cpp CEXE_sources += WarpXRegrid.cpp +CEXE_sources += GuardCellManager.cpp CEXE_headers += WarpXSumGuardCells.H CEXE_headers += WarpXComm_K.H +CEXE_headers += GuardCellManager.H CEXE_headers += WarpXComm.H INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Parallelization diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index b61ae4fc7..2e0cbdfad 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -321,41 +321,41 @@ WarpX::UpdateAuxilaryDataSameType () } void -WarpX::FillBoundaryB () +WarpX::FillBoundaryB (IntVect ng, IntVect ng_extra_fine) { for (int lev = 0; lev <= finest_level; ++lev) { - FillBoundaryB(lev); + FillBoundaryB(lev, ng, ng_extra_fine); } } void -WarpX::FillBoundaryE () +WarpX::FillBoundaryE (IntVect ng, IntVect ng_extra_fine) { for (int lev = 0; lev <= finest_level; ++lev) { - FillBoundaryE(lev); + FillBoundaryE(lev, ng, ng_extra_fine); } } void -WarpX::FillBoundaryF () +WarpX::FillBoundaryF (IntVect ng) { for (int lev = 0; lev <= finest_level; ++lev) { - FillBoundaryF(lev); + FillBoundaryF(lev, ng); } } void -WarpX::FillBoundaryE(int lev) +WarpX::FillBoundaryE(int lev, IntVect ng, IntVect ng_extra_fine) { - FillBoundaryE(lev, PatchType::fine); - if (lev > 0) FillBoundaryE(lev, PatchType::coarse); + FillBoundaryE(lev, PatchType::fine, ng+ng_extra_fine); + if (lev > 0) FillBoundaryE(lev, PatchType::coarse, ng); } void -WarpX::FillBoundaryE (int lev, PatchType patch_type) +WarpX::FillBoundaryE (int lev, PatchType patch_type, IntVect ng) { if (patch_type == PatchType::fine) { @@ -370,8 +370,12 @@ WarpX::FillBoundaryE (int lev, PatchType patch_type) } const auto& period = Geom(lev).periodicity(); - Vector<MultiFab*> mf{Efield_fp[lev][0].get(),Efield_fp[lev][1].get(),Efield_fp[lev][2].get()}; - amrex::FillBoundary(mf, period); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE( + ng <= Efield_fp[lev][0]->nGrowVect(), + "Error: in FillBoundaryE, requested more guard cells than allocated"); + Efield_fp[lev][0]->FillBoundary(ng, period); + Efield_fp[lev][1]->FillBoundary(ng, period); + Efield_fp[lev][2]->FillBoundary(ng, period); } else if (patch_type == PatchType::coarse) { @@ -386,20 +390,24 @@ WarpX::FillBoundaryE (int lev, PatchType patch_type) } const auto& cperiod = Geom(lev-1).periodicity(); - Vector<MultiFab*> mf{Efield_cp[lev][0].get(),Efield_cp[lev][1].get(),Efield_cp[lev][2].get()}; - amrex::FillBoundary(mf, cperiod); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE( + ng <= Efield_cp[lev][0]->nGrowVect(), + "Error: in FillBoundaryE, requested more guard cells than allocated"); + Efield_cp[lev][0]->FillBoundary(ng, cperiod); + Efield_cp[lev][1]->FillBoundary(ng, cperiod); + Efield_cp[lev][2]->FillBoundary(ng, cperiod); } } void -WarpX::FillBoundaryB (int lev) +WarpX::FillBoundaryB (int lev, IntVect ng, IntVect ng_extra_fine) { - FillBoundaryB(lev, PatchType::fine); - if (lev > 0) FillBoundaryB(lev, PatchType::coarse); + FillBoundaryB(lev, PatchType::fine, ng + ng_extra_fine); + if (lev > 0) FillBoundaryB(lev, PatchType::coarse, ng); } void -WarpX::FillBoundaryB (int lev, PatchType patch_type) +WarpX::FillBoundaryB (int lev, PatchType patch_type, IntVect ng) { if (patch_type == PatchType::fine) { @@ -413,8 +421,12 @@ WarpX::FillBoundaryB (int lev, PatchType patch_type) pml[lev]->FillBoundaryB(patch_type); } const auto& period = Geom(lev).periodicity(); - Vector<MultiFab*> mf{Bfield_fp[lev][0].get(),Bfield_fp[lev][1].get(),Bfield_fp[lev][2].get()}; - amrex::FillBoundary(mf, period); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE( + ng <= Bfield_fp[lev][0]->nGrowVect(), + "Error: in FillBoundaryB, requested more guard cells than allocated"); + Bfield_fp[lev][0]->FillBoundary(ng, period); + Bfield_fp[lev][1]->FillBoundary(ng, period); + Bfield_fp[lev][2]->FillBoundary(ng, period); } else if (patch_type == PatchType::coarse) { @@ -428,20 +440,24 @@ WarpX::FillBoundaryB (int lev, PatchType patch_type) pml[lev]->FillBoundaryB(patch_type); } const auto& cperiod = Geom(lev-1).periodicity(); - Vector<MultiFab*> mf{Bfield_cp[lev][0].get(),Bfield_cp[lev][1].get(),Bfield_cp[lev][2].get()}; - amrex::FillBoundary(mf, cperiod); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE( + ng <= Bfield_cp[lev][0]->nGrowVect(), + "Error: in FillBoundaryB, requested more guard cells than allocated"); + Bfield_cp[lev][0]->FillBoundary(ng, cperiod); + Bfield_cp[lev][1]->FillBoundary(ng, cperiod); + Bfield_cp[lev][2]->FillBoundary(ng, cperiod); } } void -WarpX::FillBoundaryF (int lev) +WarpX::FillBoundaryF (int lev, IntVect ng) { - FillBoundaryF(lev, PatchType::fine); - if (lev > 0) FillBoundaryF(lev, PatchType::coarse); + FillBoundaryF(lev, PatchType::fine, ng); + if (lev > 0) FillBoundaryF(lev, PatchType::coarse, ng); } void -WarpX::FillBoundaryF (int lev, PatchType patch_type) +WarpX::FillBoundaryF (int lev, PatchType patch_type, IntVect ng) { if (patch_type == PatchType::fine && F_fp[lev]) { @@ -453,7 +469,10 @@ WarpX::FillBoundaryF (int lev, PatchType patch_type) } const auto& period = Geom(lev).periodicity(); - F_fp[lev]->FillBoundary(period); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE( + ng <= F_fp[lev]->nGrowVect(), + "Error: in FillBoundaryF, requested more guard cells than allocated"); + F_fp[lev]->FillBoundary(ng, period); } else if (patch_type == PatchType::coarse && F_cp[lev]) { @@ -465,11 +484,35 @@ WarpX::FillBoundaryF (int lev, PatchType patch_type) } const auto& cperiod = Geom(lev-1).periodicity(); - F_cp[lev]->FillBoundary(cperiod); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE( + ng <= F_cp[lev]->nGrowVect(), + "Error: in FillBoundaryF, requested more guard cells than allocated"); + F_cp[lev]->FillBoundary(ng, cperiod); } } void +WarpX::FillBoundaryAux (IntVect ng) +{ + for (int lev = 0; lev <= finest_level-1; ++lev) + { + FillBoundaryAux(lev, ng); + } +} + +void +WarpX::FillBoundaryAux (int lev, IntVect ng) +{ + const auto& period = Geom(lev).periodicity(); + Efield_aux[lev][0]->FillBoundary(ng, period); + Efield_aux[lev][1]->FillBoundary(ng, period); + Efield_aux[lev][2]->FillBoundary(ng, period); + Bfield_aux[lev][0]->FillBoundary(ng, period); + Bfield_aux[lev][1]->FillBoundary(ng, period); + Bfield_aux[lev][2]->FillBoundary(ng, period); +} + +void WarpX::SyncCurrent () { BL_PROFILE("SyncCurrent()"); |