diff options
23 files changed, 581 insertions, 138 deletions
diff --git a/Docs/source/usage/parameters.rst b/Docs/source/usage/parameters.rst index 313f9b405..c3f170970 100644 --- a/Docs/source/usage/parameters.rst +++ b/Docs/source/usage/parameters.rst @@ -192,6 +192,10 @@ Setting up the field mesh automatically set so that it is one cell larger than ``n_current_deposition_buffer``, on the fine grid. +* ``warpx.do_single_precision_comms`` (`integer`; 0 by default) + Perform MPI communications for field guard regions in single precision. + Only meaningful for ``WarpX_PRECISION=DOUBLE``. + * ``particles.deposit_on_main_grid`` (`list of strings`) When using mesh refinement: the particle species whose name are included in the list will deposit their charge/current directly on the main grid diff --git a/Examples/analysis_default_regression.py b/Examples/analysis_default_regression.py index b40bb6675..7de0ad403 100755 --- a/Examples/analysis_default_regression.py +++ b/Examples/analysis_default_regression.py @@ -1,6 +1,7 @@ #! /usr/bin/env python import sys +import re sys.path.insert(1, '../../../../warpx/Regression/Checksum/') import checksumAPI @@ -11,4 +12,8 @@ fn = sys.argv[1] test_name = fn[:-9] # Could also be os.path.split(os.getcwd())[1] # Run checksum regression test -checksumAPI.evaluate_checksum(test_name, fn) +if re.search( 'single_precision', fn ): + checksumAPI.evaluate_checksum(test_name, fn, rtol=2.e-6) +else: + checksumAPI.evaluate_checksum(test_name, fn) + diff --git a/Regression/Checksum/benchmarks_json/LaserAcceleration_single_precision_comms.json b/Regression/Checksum/benchmarks_json/LaserAcceleration_single_precision_comms.json new file mode 100644 index 000000000..87e9f0c08 --- /dev/null +++ b/Regression/Checksum/benchmarks_json/LaserAcceleration_single_precision_comms.json @@ -0,0 +1,25 @@ +{ + "electrons": { + "particle_cpu": 69212.0, + "particle_id": 2655287162.0, + "particle_momentum_x": 1.7927389016680068e-20, + "particle_momentum_y": 7.226328445224444e-20, + "particle_momentum_z": 4.2323696789980776e-20, + "particle_position_x": 0.7139122548077861, + "particle_position_y": 0.7150340437975914, + "particle_position_z": 1.3175770609645199, + "particle_weight": 12926557617.187498 + }, + "lev=0": { + "Bx": 5863857.817897017, + "By": 2411.6004969074365, + "Bz": 116028.92381573828, + "Ex": 6268723833912.179, + "Ey": 1670757908382730.2, + "Ez": 104345840182128.1, + "jx": 555906521750828.2, + "jy": 1596094484782223.5, + "jz": 1045525028952434.1, + "rho": 2211743043.5912476 + } +}
\ No newline at end of file diff --git a/Regression/WarpX-tests.ini b/Regression/WarpX-tests.ini index e37f687f7..c5f082bc1 100644 --- a/Regression/WarpX-tests.ini +++ b/Regression/WarpX-tests.ini @@ -817,6 +817,24 @@ particleTypes = electrons analysisRoutine = Examples/analysis_default_regression.py tolerance = 1.e-14 +[LaserAcceleration_single_precision_comms] +buildDir = . +inputFile = Examples/Physics_applications/laser_acceleration/inputs_3d +runtime_params = warpx.do_dynamic_scheduling=0 amr.n_cell=32 32 256 max_step=100 electrons.zmin=0.e-6 warpx.serialize_ics=1 warpx.do_single_precision_comms=1 +dim = 3 +addToCompileString = +restartTest = 0 +useMPI = 1 +numprocs = 2 +useOMP = 1 +numthreads = 2 +compileTest = 0 +doVis = 0 +compareParticles = 1 +particleTypes = electrons +analysisRoutine = Examples/analysis_default_regression.py +tolerance = 2.e-7 + [subcyclingMR] buildDir = . inputFile = Examples/Tests/subcycling/inputs_2d diff --git a/Source/BoundaryConditions/PML.cpp b/Source/BoundaryConditions/PML.cpp index 700dcdce4..1d06408a2 100644 --- a/Source/BoundaryConditions/PML.cpp +++ b/Source/BoundaryConditions/PML.cpp @@ -17,6 +17,7 @@ #include "Utils/WarpXConst.H" #include "Utils/WarpXProfilerWrapper.H" #include "WarpX.H" +#include "Parallelization/WarpXCommUtil.H" #include <AMReX.H> #include <AMReX_Algorithm.H> @@ -1003,6 +1004,7 @@ PML::Exchange (MultiFab& pml, MultiFab& reg, const Geometry& geom, // Create temporary MultiFab to copy to and from the PML MultiFab tmpregmf(reg.boxArray(), reg.DistributionMap(), ncp, ngr); + tmpregmf.setVal(0.0); // Create the sum of the split fields, in the PML MultiFab totpmlmf(pml.boxArray(), pml.DistributionMap(), 1, 0); // Allocate @@ -1015,7 +1017,7 @@ PML::Exchange (MultiFab& pml, MultiFab& reg, const Geometry& geom, if (do_pml_in_domain){ // Valid cells of the PML and of the regular grid overlap // Copy from valid cells of the PML to valid cells of the regular grid - reg.ParallelCopy(totpmlmf, 0, 0, 1, IntVect(0), IntVect(0), period); + WarpXCommUtil::ParallelCopy(reg, totpmlmf, 0, 0, 1, IntVect(0), IntVect(0), period); } else { // Valid cells of the PML only overlap with guard cells of regular grid // (and outermost valid cell of the regular grid, for nodal direction) @@ -1023,7 +1025,7 @@ PML::Exchange (MultiFab& pml, MultiFab& reg, const Geometry& geom, // but avoid updating the outermost valid cell if (ngr.max() > 0) { MultiFab::Copy(tmpregmf, reg, 0, 0, 1, ngr); - tmpregmf.ParallelCopy(totpmlmf, 0, 0, 1, IntVect(0), ngr, period); + WarpXCommUtil::ParallelCopy(tmpregmf, totpmlmf, 0, 0, 1, IntVect(0), ngr, period); #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif @@ -1056,9 +1058,9 @@ PML::Exchange (MultiFab& pml, MultiFab& reg, const Geometry& geom, // Where valid cells of tmpregmf overlap with PML valid cells, // copy the PML (this is order to avoid overwriting PML valid cells, // in the next `ParallelCopy`) - tmpregmf.ParallelCopy(pml,0, 0, ncp, IntVect(0), IntVect(0), period); + WarpXCommUtil::ParallelCopy(tmpregmf, pml,0, 0, ncp, IntVect(0), IntVect(0), period); } - pml.ParallelCopy(tmpregmf, 0, 0, ncp, IntVect(0), ngp, period); + WarpXCommUtil::ParallelCopy(pml, tmpregmf, 0, 0, ncp, IntVect(0), ngp, period); } @@ -1068,7 +1070,7 @@ PML::CopyToPML (MultiFab& pml, MultiFab& reg, const Geometry& geom) const IntVect& ngp = pml.nGrowVect(); const auto& period = geom.periodicity(); - pml.ParallelCopy(reg, 0, 0, 1, IntVect(0), ngp, period); + WarpXCommUtil::ParallelCopy(pml, reg, 0, 0, 1, IntVect(0), ngp, period); } void @@ -1094,13 +1096,13 @@ PML::FillBoundaryE (PatchType patch_type) { const auto& period = m_geom->periodicity(); Vector<MultiFab*> mf{pml_E_fp[0].get(),pml_E_fp[1].get(),pml_E_fp[2].get()}; - amrex::FillBoundary(mf, period); + WarpXCommUtil::FillBoundary(mf, period); } else if (patch_type == PatchType::coarse && pml_E_cp[0] && pml_E_cp[0]->nGrowVect().max() > 0) { const auto& period = m_cgeom->periodicity(); Vector<MultiFab*> mf{pml_E_cp[0].get(),pml_E_cp[1].get(),pml_E_cp[2].get()}; - amrex::FillBoundary(mf, period); + WarpXCommUtil::FillBoundary(mf, period); } } @@ -1118,13 +1120,13 @@ PML::FillBoundaryB (PatchType patch_type) { const auto& period = m_geom->periodicity(); Vector<MultiFab*> mf{pml_B_fp[0].get(),pml_B_fp[1].get(),pml_B_fp[2].get()}; - amrex::FillBoundary(mf, period); + WarpXCommUtil::FillBoundary(mf, period); } else if (patch_type == PatchType::coarse && pml_B_cp[0]) { const auto& period = m_cgeom->periodicity(); Vector<MultiFab*> mf{pml_B_cp[0].get(),pml_B_cp[1].get(),pml_B_cp[2].get()}; - amrex::FillBoundary(mf, period); + WarpXCommUtil::FillBoundary(mf, period); } } @@ -1141,12 +1143,12 @@ PML::FillBoundaryF (PatchType patch_type) if (patch_type == PatchType::fine && pml_F_fp && pml_F_fp->nGrowVect().max() > 0) { const auto& period = m_geom->periodicity(); - pml_F_fp->FillBoundary(period); + WarpXCommUtil::FillBoundary(*pml_F_fp, period); } else if (patch_type == PatchType::coarse && pml_F_cp && pml_F_cp->nGrowVect().max() > 0) { const auto& period = m_cgeom->periodicity(); - pml_F_cp->FillBoundary(period); + WarpXCommUtil::FillBoundary(*pml_F_cp, period); } } @@ -1163,12 +1165,12 @@ PML::FillBoundaryG (PatchType patch_type) if (patch_type == PatchType::fine && pml_G_fp && pml_G_fp->nGrowVect().max() > 0) { const auto& period = m_geom->periodicity(); - pml_G_fp->FillBoundary(period); + WarpXCommUtil::FillBoundary(*pml_G_fp, period); } else if (patch_type == PatchType::coarse && pml_G_cp && pml_G_cp->nGrowVect().max() > 0) { const auto& period = m_cgeom->periodicity(); - pml_G_cp->FillBoundary(period); + WarpXCommUtil::FillBoundary(*pml_G_cp, period); } } diff --git a/Source/Diagnostics/BTDiagnostics.cpp b/Source/Diagnostics/BTDiagnostics.cpp index fadd97b0f..ae5c7a53b 100644 --- a/Source/Diagnostics/BTDiagnostics.cpp +++ b/Source/Diagnostics/BTDiagnostics.cpp @@ -1,5 +1,4 @@ #include "BTDiagnostics.H" - #include "BTD_Plotfile_Header_Impl.H" #include "ComputeDiagFunctors/BackTransformFunctor.H" #include "ComputeDiagFunctors/CellCenterFunctor.H" @@ -7,6 +6,7 @@ #include "ComputeDiagFunctors/RhoFunctor.H" #include "Diagnostics/Diagnostics.H" #include "Diagnostics/FlushFormats/FlushFormat.H" +#include "Parallelization/WarpXCommUtil.H" #include "Utils/CoarsenIO.H" #include "Utils/WarpXConst.H" #include "Utils/WarpXUtil.H" @@ -403,7 +403,7 @@ BTDiagnostics::PrepareFieldDataForOutput () AMREX_ALWAYS_ASSERT( icomp_dst == m_cellcenter_varnames.size() ); // fill boundary call is required to average_down (flatten) data to // the coarsest level. - m_cell_centered_data[lev]->FillBoundary(warpx.Geom(lev).periodicity() ); + WarpXCommUtil::FillBoundary(*m_cell_centered_data[lev], warpx.Geom(lev).periodicity()); } // Flattening out MF over levels diff --git a/Source/Diagnostics/BackTransformedDiagnostic.cpp b/Source/Diagnostics/BackTransformedDiagnostic.cpp index 171687942..17b59d3ae 100644 --- a/Source/Diagnostics/BackTransformedDiagnostic.cpp +++ b/Source/Diagnostics/BackTransformedDiagnostic.cpp @@ -7,6 +7,7 @@ */ #include "BackTransformedDiagnostic.H" +#include "Parallelization/WarpXCommUtil.H" #include "Utils/WarpXConst.H" #include "Utils/WarpXProfilerWrapper.H" #include "WarpX.H" @@ -757,8 +758,10 @@ void BackTransformedDiagnostic::Flush (const Geometry& /*geom*/) const int ncomp = lf_diags->m_data_buffer_->nComp(); MultiFab tmp(buff_ba, buff_dm, ncomp, 0); + tmp.setVal(0.0); - tmp.ParallelCopy(*lf_diags->m_data_buffer_, 0, 0, ncomp); + WarpXCommUtil::ParallelCopy(tmp, *lf_diags->m_data_buffer_, 0, 0, ncomp, + IntVect(AMREX_D_DECL(0, 0, 0)), IntVect(AMREX_D_DECL(0, 0, 0))); #ifdef WARPX_USE_HDF5 for (int comp = 0; comp < ncomp; ++comp) { @@ -905,6 +908,7 @@ writeLabFrameData (const MultiFab* cell_centered_data, tmp_slice_ptr = std::make_unique<MultiFab>(slice_ba, lf_diags->m_data_buffer_->DistributionMap(), ncomp, 0); + tmp_slice_ptr->setVal(0.0); // slice is re-used if the t_lab of a diag is equal to // that of the previous diag. @@ -912,7 +916,8 @@ writeLabFrameData (const MultiFab* cell_centered_data, // which has the dmap of the domain to // tmp_slice_ptr which has the dmap of the // data_buffer that stores the back-transformed data. - tmp_slice_ptr->ParallelCopy(*slice, 0, 0, ncomp); + WarpXCommUtil::ParallelCopy(*tmp_slice_ptr, *slice, 0, 0, ncomp, + IntVect(AMREX_D_DECL(0, 0, 0)), IntVect(AMREX_D_DECL(0, 0, 0))); lf_diags->AddDataToBuffer(*tmp_slice_ptr, i_lab, map_actual_fields_to_dump); tmp_slice_ptr = nullptr; diff --git a/Source/Diagnostics/ComputeDiagFunctors/BackTransformFunctor.cpp b/Source/Diagnostics/ComputeDiagFunctors/BackTransformFunctor.cpp index 78e070bfd..4af43b4b8 100644 --- a/Source/Diagnostics/ComputeDiagFunctors/BackTransformFunctor.cpp +++ b/Source/Diagnostics/ComputeDiagFunctors/BackTransformFunctor.cpp @@ -1,6 +1,7 @@ #include "BackTransformFunctor.H" #include "Diagnostics/ComputeDiagFunctors/ComputeDiagFunctor.H" +#include "Parallelization/WarpXCommUtil.H" #include "Utils/WarpXConst.H" #include "WarpX.H" @@ -70,11 +71,13 @@ BackTransformFunctor::operator ()(amrex::MultiFab& mf_dst, int /*dcomp*/, const // containing all ten components that were in the slice generated from m_mf_src. std::unique_ptr< amrex::MultiFab > tmp_slice_ptr = nullptr; tmp_slice_ptr = std::make_unique<MultiFab> ( slice_ba, mf_dst.DistributionMap(), - slice->nComp(), 0 ); + slice->nComp(), 0 ); + tmp_slice_ptr->setVal(0.0); // Parallel copy the lab-frame data from "slice" MultiFab with // ncomp=10 and boosted-frame dmap to "tmp_slice_ptr" MultiFab with // ncomp=10 and dmap of the destination Multifab, which will store the final data - tmp_slice_ptr->ParallelCopy( *slice, 0, 0, slice->nComp() ); + WarpXCommUtil::ParallelCopy(*tmp_slice_ptr, *slice, 0, 0, slice->nComp(), + IntVect(AMREX_D_DECL(0, 0, 0)), IntVect(AMREX_D_DECL(0, 0, 0))); // Now we will cherry pick only the user-defined fields from // tmp_slice_ptr to dst_mf const int k_lab = m_k_index_zlab[i_buffer]; diff --git a/Source/Diagnostics/Diagnostics.cpp b/Source/Diagnostics/Diagnostics.cpp index a6d85185c..9df599fa7 100644 --- a/Source/Diagnostics/Diagnostics.cpp +++ b/Source/Diagnostics/Diagnostics.cpp @@ -11,6 +11,7 @@ #include "FlushFormats/FlushFormatPlotfile.H" #include "FlushFormats/FlushFormatSensei.H" #include "Particles/MultiParticleContainer.H" +#include "Parallelization/WarpXCommUtil.H" #include "Utils/WarpXAlgorithmSelection.H" #include "Utils/WarpXProfilerWrapper.H" #include "Utils/WarpXUtil.H" @@ -318,7 +319,7 @@ Diagnostics::ComputeAndPack () // needed for contour plots of rho, i.e. ascent/sensei if (m_format == "sensei" || m_format == "ascent") { - m_mf_output[i_buffer][lev].FillBoundary(warpx.Geom(lev).periodicity()); + WarpXCommUtil::FillBoundary(m_mf_output[i_buffer][lev], warpx.Geom(lev).periodicity()); } } } diff --git a/Source/Diagnostics/SliceDiagnostic.cpp b/Source/Diagnostics/SliceDiagnostic.cpp index 5e58f4163..185c560ae 100644 --- a/Source/Diagnostics/SliceDiagnostic.cpp +++ b/Source/Diagnostics/SliceDiagnostic.cpp @@ -8,6 +8,7 @@ #include "SliceDiagnostic.H" #include "WarpX.H" +#include "Parallelization/WarpXCommUtil.H" #include <AMReX.H> #include <AMReX_Array4.H> @@ -25,6 +26,8 @@ #include <AMReX_MFIter.H> #include <AMReX_MultiFab.H> #include <AMReX_MultiFabUtil.H> +#include <AMReX_PlotFileUtil.H> + #include <AMReX_Print.H> #include <AMReX_REAL.H> #include <AMReX_RealBox.H> @@ -140,7 +143,8 @@ CreateSlice( const MultiFab& mf, const Vector<Geometry> &dom_geom, // Copy data from domain to slice that has same cell size as that of // // the domain mf. src and dst have the same number of ghost cells // - smf->ParallelCopy(mf, 0, 0, ncomp,nghost,nghost); + amrex::IntVect nghost_vect(AMREX_D_DECL(nghost, nghost, nghost)); + WarpXCommUtil::ParallelCopy(*smf, mf, 0, 0, ncomp,nghost_vect,nghost_vect); // inteprolate if required on refined slice // if (interpolate == 1 ) { diff --git a/Source/Diagnostics/WarpXIO.cpp b/Source/Diagnostics/WarpXIO.cpp index bf9aad1f5..4abd85faa 100644 --- a/Source/Diagnostics/WarpXIO.cpp +++ b/Source/Diagnostics/WarpXIO.cpp @@ -11,6 +11,7 @@ #include "FieldIO.H" #include "Particles/MultiParticleContainer.H" #include "Utils/CoarsenIO.H" +#include "Parallelization/WarpXCommUtil.H" #include "Utils/WarpXProfilerWrapper.H" #include "WarpX.H" @@ -368,7 +369,7 @@ WarpX::GetCellCenteredData() { const std::unique_ptr<MultiFab>& charge_density = mypc->GetChargeDensity(lev); AverageAndPackScalarField( *cc[lev], *charge_density, dmap[lev], dcomp, ng ); - cc[lev]->FillBoundary(geom[lev].periodicity()); + WarpXCommUtil::FillBoundary(*cc[lev], geom[lev].periodicity()); } for (int lev = finest_level; lev > 0; --lev) diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp index 979079e71..d8b6cdd16 100644 --- a/Source/Initialization/WarpXInitData.cpp +++ b/Source/Initialization/WarpXInitData.cpp @@ -18,6 +18,7 @@ #include "Filter/BilinearFilter.H" #include "Filter/NCIGodfreyFilter.H" #include "Particles/MultiParticleContainer.H" +#include "Parallelization/WarpXCommUtil.H" #include "Utils/WarpXAlgorithmSelection.H" #include "Utils/WarpXConst.H" #include "Utils/WarpXProfilerWrapper.H" @@ -493,24 +494,24 @@ WarpX::InitLevelData (int lev, Real /*time*/) ComputeDistanceToEB(); const auto &period = Geom(lev).periodicity(); - m_edge_lengths[lev][0]->FillBoundary(guard_cells.ng_alloc_EB, period); - m_edge_lengths[lev][1]->FillBoundary(guard_cells.ng_alloc_EB, period); - m_edge_lengths[lev][2]->FillBoundary(guard_cells.ng_alloc_EB, period); - m_face_areas[lev][0]->FillBoundary(guard_cells.ng_alloc_EB, period); - m_face_areas[lev][1]->FillBoundary(guard_cells.ng_alloc_EB, period); - m_face_areas[lev][2]->FillBoundary(guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_edge_lengths[lev][0], guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_edge_lengths[lev][1], guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_edge_lengths[lev][2], guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_face_areas[lev][0], guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_face_areas[lev][1], guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_face_areas[lev][2], guard_cells.ng_alloc_EB, period); if (WarpX::maxwell_solver_id == MaxwellSolverAlgo::ECT) { - m_area_mod[lev][0]->FillBoundary(guard_cells.ng_alloc_EB, period); - m_area_mod[lev][1]->FillBoundary(guard_cells.ng_alloc_EB, period); - m_area_mod[lev][2]->FillBoundary(guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_area_mod[lev][0], guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_area_mod[lev][1], guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_area_mod[lev][2], guard_cells.ng_alloc_EB, period); MarkCells(); - m_flag_info_face[lev][0]->FillBoundary(guard_cells.ng_alloc_EB, period); - m_flag_info_face[lev][1]->FillBoundary(guard_cells.ng_alloc_EB, period); - m_flag_info_face[lev][2]->FillBoundary(guard_cells.ng_alloc_EB, period); - m_flag_ext_face[lev][0]->FillBoundary(guard_cells.ng_alloc_EB, period); - m_flag_ext_face[lev][1]->FillBoundary(guard_cells.ng_alloc_EB, period); - m_flag_ext_face[lev][2]->FillBoundary(guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_flag_info_face[lev][0], guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_flag_info_face[lev][1], guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_flag_info_face[lev][2], guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_flag_ext_face[lev][0], guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_flag_ext_face[lev][1], guard_cells.ng_alloc_EB, period); + WarpXCommUtil::FillBoundary(*m_flag_ext_face[lev][2], guard_cells.ng_alloc_EB, period); ComputeFaceExtensions(); } } diff --git a/Source/Parallelization/CMakeLists.txt b/Source/Parallelization/CMakeLists.txt index 3588f6f31..0c75c7542 100644 --- a/Source/Parallelization/CMakeLists.txt +++ b/Source/Parallelization/CMakeLists.txt @@ -3,4 +3,5 @@ target_sources(WarpX GuardCellManager.cpp WarpXComm.cpp WarpXRegrid.cpp + WarpXCommUtil.cpp ) diff --git a/Source/Parallelization/Make.package b/Source/Parallelization/Make.package index 629cfafea..87955f814 100644 --- a/Source/Parallelization/Make.package +++ b/Source/Parallelization/Make.package @@ -1,5 +1,6 @@ CEXE_sources += WarpXComm.cpp CEXE_sources += WarpXRegrid.cpp CEXE_sources += GuardCellManager.cpp +CEXE_sources += WarpXCommUtil.cpp VPATH_LOCATIONS += $(WARPX_HOME)/Source/Parallelization diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 042344bda..e75bd7f1e 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -15,6 +15,7 @@ #include "Utils/WarpXAlgorithmSelection.H" #include "Utils/WarpXProfilerWrapper.H" #include "WarpXComm_K.H" +#include "WarpXCommUtil.H" #include "WarpXSumGuardCells.H" #include <AMReX.H> @@ -188,7 +189,7 @@ WarpX::UpdateAuxilaryDataStagToNodal () const amrex::IntVect& ng_src = guard_cells.ng_FieldGather; // Copy Bfield_aux to Btmp, using up to ng_src (=ng_FieldGather) guard cells from // Bfield_aux and filling up to ng (=nGrow) guard cells in Btmp - Btmp[i]->ParallelCopy(*Bfield_aux[lev-1][i], 0, 0, 1, ng_src, ng, cperiod); + WarpXCommUtil::ParallelCopy(*Btmp[i], *Bfield_aux[lev-1][i], 0, 0, 1, ng_src, ng, cperiod); } #ifdef AMREX_USE_OMP @@ -245,7 +246,7 @@ WarpX::UpdateAuxilaryDataStagToNodal () const amrex::IntVect& ng_src = guard_cells.ng_FieldGather; // Copy Efield_aux to Etmp, using up to ng_src (=ng_FieldGather) guard cells from // Efield_aux and filling up to ng (=nGrow) guard cells in Etmp - Etmp[i]->ParallelCopy(*Efield_aux[lev-1][i], 0, 0, 1, ng_src, ng, cperiod); + WarpXCommUtil::ParallelCopy(*Etmp[i], *Efield_aux[lev-1][i], 0, 0, 1, ng_src, ng, cperiod); } #ifdef AMREX_USE_OMP @@ -296,13 +297,16 @@ WarpX::UpdateAuxilaryDataSameType () dBx.setVal(0.0); dBy.setVal(0.0); dBz.setVal(0.0); + // Guard cells may not be up to date beyond ng_FieldGather const amrex::IntVect& ng_src = guard_cells.ng_FieldGather; // Copy Bfield_aux to the dB MultiFabs, using up to ng_src (=ng_FieldGather) guard // cells from Bfield_aux and filling up to ng (=nGrow) guard cells in the dB MultiFabs - dBx.ParallelCopy(*Bfield_aux[lev-1][0], 0, 0, Bfield_aux[lev-1][0]->nComp(), ng_src, ng, crse_period); - dBy.ParallelCopy(*Bfield_aux[lev-1][1], 0, 0, Bfield_aux[lev-1][1]->nComp(), ng_src, ng, crse_period); - dBz.ParallelCopy(*Bfield_aux[lev-1][2], 0, 0, Bfield_aux[lev-1][2]->nComp(), ng_src, ng, crse_period); + + WarpXCommUtil::ParallelCopy(dBx, *Bfield_aux[lev-1][0], 0, 0, Bfield_aux[lev-1][0]->nComp(), ng_src, ng, crse_period); + WarpXCommUtil::ParallelCopy(dBy, *Bfield_aux[lev-1][1], 0, 0, Bfield_aux[lev-1][1]->nComp(), ng_src, ng, crse_period); + WarpXCommUtil::ParallelCopy(dBz, *Bfield_aux[lev-1][2], 0, 0, Bfield_aux[lev-1][2]->nComp(), ng_src, ng, crse_period); + if (Bfield_cax[lev][0]) { MultiFab::Copy(*Bfield_cax[lev][0], dBx, 0, 0, Bfield_cax[lev][0]->nComp(), ng); @@ -358,13 +362,15 @@ WarpX::UpdateAuxilaryDataSameType () dEx.setVal(0.0); dEy.setVal(0.0); dEz.setVal(0.0); + // Guard cells may not be up to date beyond ng_FieldGather const amrex::IntVect& ng_src = guard_cells.ng_FieldGather; // Copy Efield_aux to the dE MultiFabs, using up to ng_src (=ng_FieldGather) guard // cells from Efield_aux and filling up to ng (=nGrow) guard cells in the dE MultiFabs - dEx.ParallelCopy(*Efield_aux[lev-1][0], 0, 0, Efield_aux[lev-1][0]->nComp(), ng_src, ng, crse_period); - dEy.ParallelCopy(*Efield_aux[lev-1][1], 0, 0, Efield_aux[lev-1][1]->nComp(), ng_src, ng, crse_period); - dEz.ParallelCopy(*Efield_aux[lev-1][2], 0, 0, Efield_aux[lev-1][2]->nComp(), ng_src, ng, crse_period); + WarpXCommUtil::ParallelCopy(dEx, *Efield_aux[lev-1][0], 0, 0, Efield_aux[lev-1][0]->nComp(), ng_src, ng, crse_period); + WarpXCommUtil::ParallelCopy(dEy, *Efield_aux[lev-1][1], 0, 0, Efield_aux[lev-1][1]->nComp(), ng_src, ng, crse_period); + WarpXCommUtil::ParallelCopy(dEz, *Efield_aux[lev-1][2], 0, 0, Efield_aux[lev-1][2]->nComp(), ng_src, ng, crse_period); + if (Efield_cax[lev][0]) { MultiFab::Copy(*Efield_cax[lev][0], dEx, 0, 0, Efield_cax[lev][0]->nComp(), ng); @@ -555,14 +561,14 @@ WarpX::FillBoundaryE (int lev, PatchType patch_type, IntVect ng) const auto& period = Geom(lev).periodicity(); if ( safe_guard_cells ){ Vector<MultiFab*> mf{Efield_fp[lev][0].get(),Efield_fp[lev][1].get(),Efield_fp[lev][2].get()}; - amrex::FillBoundary(mf, period); + WarpXCommUtil::FillBoundary(mf, period); } else { AMREX_ALWAYS_ASSERT_WITH_MESSAGE( ng <= Efield_fp[lev][0]->nGrowVect(), "Error: in FillBoundaryE, requested more guard cells than allocated"); - Efield_fp[lev][0]->FillBoundary(ng, period); - Efield_fp[lev][1]->FillBoundary(ng, period); - Efield_fp[lev][2]->FillBoundary(ng, period); + WarpXCommUtil::FillBoundary(*Efield_fp[lev][0], ng, period); + WarpXCommUtil::FillBoundary(*Efield_fp[lev][1], ng, period); + WarpXCommUtil::FillBoundary(*Efield_fp[lev][2], ng, period); } } else if (patch_type == PatchType::coarse) @@ -579,15 +585,15 @@ WarpX::FillBoundaryE (int lev, PatchType patch_type, IntVect ng) const auto& cperiod = Geom(lev-1).periodicity(); if ( safe_guard_cells ) { Vector<MultiFab*> mf{Efield_cp[lev][0].get(),Efield_cp[lev][1].get(),Efield_cp[lev][2].get()}; - amrex::FillBoundary(mf, cperiod); + WarpXCommUtil::FillBoundary(mf, cperiod); } else { AMREX_ALWAYS_ASSERT_WITH_MESSAGE( ng <= Efield_cp[lev][0]->nGrowVect(), "Error: in FillBoundaryE, requested more guard cells than allocated"); - Efield_cp[lev][0]->FillBoundary(ng, cperiod); - Efield_cp[lev][1]->FillBoundary(ng, cperiod); - Efield_cp[lev][2]->FillBoundary(ng, cperiod); + WarpXCommUtil::FillBoundary(*Efield_cp[lev][0], ng, cperiod); + WarpXCommUtil::FillBoundary(*Efield_cp[lev][1], ng, cperiod); + WarpXCommUtil::FillBoundary(*Efield_cp[lev][2], ng, cperiod); } } } @@ -616,14 +622,15 @@ WarpX::FillBoundaryB (int lev, PatchType patch_type, IntVect ng) const auto& period = Geom(lev).periodicity(); if ( safe_guard_cells ) { Vector<MultiFab*> mf{Bfield_fp[lev][0].get(),Bfield_fp[lev][1].get(),Bfield_fp[lev][2].get()}; - amrex::FillBoundary(mf, period); + WarpXCommUtil::FillBoundary(mf, period); } else { AMREX_ALWAYS_ASSERT_WITH_MESSAGE( ng <= Bfield_fp[lev][0]->nGrowVect(), "Error: in FillBoundaryB, requested more guard cells than allocated"); - Bfield_fp[lev][0]->FillBoundary(ng, period); - Bfield_fp[lev][1]->FillBoundary(ng, period); - Bfield_fp[lev][2]->FillBoundary(ng, period); + + WarpXCommUtil::FillBoundary(*Bfield_fp[lev][0], ng, period); + WarpXCommUtil::FillBoundary(*Bfield_fp[lev][1], ng, period); + WarpXCommUtil::FillBoundary(*Bfield_fp[lev][2], ng, period); } } else if (patch_type == PatchType::coarse) @@ -640,14 +647,15 @@ WarpX::FillBoundaryB (int lev, PatchType patch_type, IntVect ng) const auto& cperiod = Geom(lev-1).periodicity(); if ( safe_guard_cells ){ Vector<MultiFab*> mf{Bfield_cp[lev][0].get(),Bfield_cp[lev][1].get(),Bfield_cp[lev][2].get()}; - amrex::FillBoundary(mf, cperiod); + WarpXCommUtil::FillBoundary(mf, cperiod); } else { AMREX_ALWAYS_ASSERT_WITH_MESSAGE( ng <= Bfield_cp[lev][0]->nGrowVect(), "Error: in FillBoundaryB, requested more guard cells than allocated"); - Bfield_cp[lev][0]->FillBoundary(ng, cperiod); - Bfield_cp[lev][1]->FillBoundary(ng, cperiod); - Bfield_cp[lev][2]->FillBoundary(ng, cperiod); + + WarpXCommUtil::FillBoundary(*Bfield_cp[lev][0], ng, cperiod); + WarpXCommUtil::FillBoundary(*Bfield_cp[lev][1], ng, cperiod); + WarpXCommUtil::FillBoundary(*Bfield_cp[lev][2], ng, cperiod); } } } @@ -672,14 +680,14 @@ WarpX::FillBoundaryE_avg (int lev, PatchType patch_type, IntVect ng) const auto& period = Geom(lev).periodicity(); if ( safe_guard_cells ){ Vector<MultiFab*> mf{Efield_avg_fp[lev][0].get(),Efield_avg_fp[lev][1].get(),Efield_avg_fp[lev][2].get()}; - amrex::FillBoundary(mf, period); + WarpXCommUtil::FillBoundary(mf, period); } else { AMREX_ALWAYS_ASSERT_WITH_MESSAGE( ng <= Efield_avg_fp[lev][0]->nGrowVect(), "Error: in FillBoundaryE_avg, requested more guard cells than allocated"); - Efield_avg_fp[lev][0]->FillBoundary(ng, period); - Efield_avg_fp[lev][1]->FillBoundary(ng, period); - Efield_avg_fp[lev][2]->FillBoundary(ng, period); + WarpXCommUtil::FillBoundary(*Efield_avg_fp[lev][0], ng, period); + WarpXCommUtil::FillBoundary(*Efield_avg_fp[lev][1], ng, period); + WarpXCommUtil::FillBoundary(*Efield_avg_fp[lev][2], ng, period); } } else if (patch_type == PatchType::coarse) @@ -692,15 +700,15 @@ WarpX::FillBoundaryE_avg (int lev, PatchType patch_type, IntVect ng) const auto& cperiod = Geom(lev-1).periodicity(); if ( safe_guard_cells ) { Vector<MultiFab*> mf{Efield_avg_cp[lev][0].get(),Efield_avg_cp[lev][1].get(),Efield_avg_cp[lev][2].get()}; - amrex::FillBoundary(mf, cperiod); + WarpXCommUtil::FillBoundary(mf, cperiod); } else { AMREX_ALWAYS_ASSERT_WITH_MESSAGE( ng <= Efield_avg_cp[lev][0]->nGrowVect(), "Error: in FillBoundaryE, requested more guard cells than allocated"); - Efield_avg_cp[lev][0]->FillBoundary(ng, cperiod); - Efield_avg_cp[lev][1]->FillBoundary(ng, cperiod); - Efield_avg_cp[lev][2]->FillBoundary(ng, cperiod); + WarpXCommUtil::FillBoundary(*Efield_avg_cp[lev][0], ng, cperiod); + WarpXCommUtil::FillBoundary(*Efield_avg_cp[lev][1], ng, cperiod); + WarpXCommUtil::FillBoundary(*Efield_avg_cp[lev][2], ng, cperiod); } } } @@ -725,14 +733,14 @@ WarpX::FillBoundaryB_avg (int lev, PatchType patch_type, IntVect ng) const auto& period = Geom(lev).periodicity(); if ( safe_guard_cells ) { Vector<MultiFab*> mf{Bfield_avg_fp[lev][0].get(),Bfield_avg_fp[lev][1].get(),Bfield_avg_fp[lev][2].get()}; - amrex::FillBoundary(mf, period); + WarpXCommUtil::FillBoundary(mf, period); } else { AMREX_ALWAYS_ASSERT_WITH_MESSAGE( ng <= Bfield_fp[lev][0]->nGrowVect(), "Error: in FillBoundaryB, requested more guard cells than allocated"); - Bfield_avg_fp[lev][0]->FillBoundary(ng, period); - Bfield_avg_fp[lev][1]->FillBoundary(ng, period); - Bfield_avg_fp[lev][2]->FillBoundary(ng, period); + WarpXCommUtil::FillBoundary(*Bfield_avg_fp[lev][0], ng, period); + WarpXCommUtil::FillBoundary(*Bfield_avg_fp[lev][1], ng, period); + WarpXCommUtil::FillBoundary(*Bfield_avg_fp[lev][2], ng, period); } } else if (patch_type == PatchType::coarse) @@ -745,14 +753,14 @@ WarpX::FillBoundaryB_avg (int lev, PatchType patch_type, IntVect ng) const auto& cperiod = Geom(lev-1).periodicity(); if ( safe_guard_cells ){ Vector<MultiFab*> mf{Bfield_avg_cp[lev][0].get(),Bfield_avg_cp[lev][1].get(),Bfield_avg_cp[lev][2].get()}; - amrex::FillBoundary(mf, cperiod); + WarpXCommUtil::FillBoundary(mf, cperiod); } else { AMREX_ALWAYS_ASSERT_WITH_MESSAGE( ng <= Bfield_avg_cp[lev][0]->nGrowVect(), "Error: in FillBoundaryB_avg, requested more guard cells than allocated"); - Bfield_avg_cp[lev][0]->FillBoundary(ng, cperiod); - Bfield_avg_cp[lev][1]->FillBoundary(ng, cperiod); - Bfield_avg_cp[lev][2]->FillBoundary(ng, cperiod); + WarpXCommUtil::FillBoundary(*Bfield_avg_cp[lev][0], ng, cperiod); + WarpXCommUtil::FillBoundary(*Bfield_avg_cp[lev][1], ng, cperiod); + WarpXCommUtil::FillBoundary(*Bfield_avg_cp[lev][2], ng, cperiod); } } } @@ -779,7 +787,7 @@ WarpX::FillBoundaryF (int lev, PatchType patch_type, IntVect ng) { const amrex::Periodicity& period = Geom(lev).periodicity(); const amrex::IntVect& nghost = (safe_guard_cells) ? F_fp[lev]->nGrowVect() : ng; - F_fp[lev]->FillBoundary(nghost, period); + WarpXCommUtil::FillBoundary(*F_fp[lev], nghost, period); } } else if (patch_type == PatchType::coarse) @@ -794,7 +802,7 @@ WarpX::FillBoundaryF (int lev, PatchType patch_type, IntVect ng) { const amrex::Periodicity& period = Geom(lev-1).periodicity(); const amrex::IntVect& nghost = (safe_guard_cells) ? F_cp[lev]->nGrowVect() : ng; - F_cp[lev]->FillBoundary(nghost, period); + WarpXCommUtil::FillBoundary(*F_cp[lev], nghost, period); } } } @@ -823,7 +831,7 @@ void WarpX::FillBoundaryG (int lev, PatchType patch_type, IntVect ng) { const amrex::Periodicity& period = Geom(lev).periodicity(); const amrex::IntVect& nghost = (safe_guard_cells) ? G_fp[lev]->nGrowVect() : ng; - G_fp[lev]->FillBoundary(nghost, period); + WarpXCommUtil::FillBoundary(*G_fp[lev], nghost, period); } } else if (patch_type == PatchType::coarse) @@ -838,7 +846,7 @@ void WarpX::FillBoundaryG (int lev, PatchType patch_type, IntVect ng) { const amrex::Periodicity& period = Geom(lev-1).periodicity(); const amrex::IntVect& nghost = (safe_guard_cells) ? G_cp[lev]->nGrowVect() : ng; - G_cp[lev]->FillBoundary(nghost, period); + WarpXCommUtil::FillBoundary(*G_cp[lev], nghost, period); } } } @@ -856,12 +864,12 @@ void WarpX::FillBoundaryAux (int lev, IntVect ng) { const auto& period = Geom(lev).periodicity(); - Efield_aux[lev][0]->FillBoundary(ng, period); - Efield_aux[lev][1]->FillBoundary(ng, period); - Efield_aux[lev][2]->FillBoundary(ng, period); - Bfield_aux[lev][0]->FillBoundary(ng, period); - Bfield_aux[lev][1]->FillBoundary(ng, period); - Bfield_aux[lev][2]->FillBoundary(ng, period); + WarpXCommUtil::FillBoundary(*Efield_aux[lev][0], ng, period); + WarpXCommUtil::FillBoundary(*Efield_aux[lev][1], ng, period); + WarpXCommUtil::FillBoundary(*Efield_aux[lev][2], ng, period); + WarpXCommUtil::FillBoundary(*Bfield_aux[lev][0], ng, period); + WarpXCommUtil::FillBoundary(*Bfield_aux[lev][1], ng, period); + WarpXCommUtil::FillBoundary(*Bfield_aux[lev][2], ng, period); } void @@ -1049,7 +1057,7 @@ WarpX::AddCurrentFromFineLevelandSumBoundary (int lev) bilinear_filter.ApplyStencil(jfb, *current_buf[lev+1][idim], lev); MultiFab::Add(jfb, jfc, 0, 0, current_buf[lev+1][idim]->nComp(), ng); - mf.ParallelAdd(jfb, 0, 0, current_buf[lev+1][idim]->nComp(), ng, IntVect::TheZeroVector(), period); + WarpXCommUtil::ParallelAdd(mf, jfb, 0, 0, current_buf[lev+1][idim]->nComp(), ng, IntVect::TheZeroVector(), period); WarpXSumGuardCells(*current_cp[lev+1][idim], jfc, period, ng_depos_J, 0, current_cp[lev+1][idim]->nComp()); } @@ -1062,7 +1070,8 @@ WarpX::AddCurrentFromFineLevelandSumBoundary (int lev) MultiFab jf(current_cp[lev+1][idim]->boxArray(), current_cp[lev+1][idim]->DistributionMap(), current_cp[lev+1][idim]->nComp(), ng); bilinear_filter.ApplyStencil(jf, *current_cp[lev+1][idim], lev); - mf.ParallelAdd(jf, 0, 0, current_cp[lev+1][idim]->nComp(), ng, IntVect::TheZeroVector(), period); + + WarpXCommUtil::ParallelAdd(mf, jf, 0, 0, current_cp[lev+1][idim]->nComp(), ng, IntVect::TheZeroVector(), period); WarpXSumGuardCells(*current_cp[lev+1][idim], jf, period, ng_depos_J, 0, current_cp[lev+1][idim]->nComp()); } else if (current_buf[lev+1][idim]) // but no filter @@ -1071,17 +1080,17 @@ WarpX::AddCurrentFromFineLevelandSumBoundary (int lev) MultiFab::Add(*current_buf[lev+1][idim], *current_cp [lev+1][idim], 0, 0, current_buf[lev+1][idim]->nComp(), current_cp[lev+1][idim]->nGrowVect()); - mf.ParallelAdd(*current_buf[lev+1][idim], 0, 0, current_buf[lev+1][idim]->nComp(), - current_buf[lev+1][idim]->nGrowVect(), IntVect::TheZeroVector(), - period); + WarpXCommUtil::ParallelAdd(mf, *current_buf[lev+1][idim], 0, 0, current_buf[lev+1][idim]->nComp(), + current_buf[lev+1][idim]->nGrowVect(), IntVect::TheZeroVector(), + period); WarpXSumGuardCells(*(current_cp[lev+1][idim]), period, ng_depos_J, 0, current_cp[lev+1][idim]->nComp()); } else // no filter, no buffer { ng_depos_J.min(ng); - mf.ParallelAdd(*current_cp[lev+1][idim], 0, 0, current_cp[lev+1][idim]->nComp(), - current_cp[lev+1][idim]->nGrowVect(), IntVect::TheZeroVector(), - period); + WarpXCommUtil::ParallelAdd(mf, *current_cp[lev+1][idim], 0, 0, current_cp[lev+1][idim]->nComp(), + current_cp[lev+1][idim]->nGrowVect(), IntVect::TheZeroVector(), + period); WarpXSumGuardCells(*(current_cp[lev+1][idim]), period, ng_depos_J, 0, current_cp[lev+1][idim]->nComp()); } MultiFab::Add(*current_fp[lev][idim], mf, 0, 0, current_fp[lev+1][idim]->nComp(), 0); @@ -1174,7 +1183,8 @@ WarpX::AddRhoFromFineLevelandSumBoundary(int lev, int icomp, int ncomp) bilinear_filter.ApplyStencil(rhofb, *charge_buf[lev+1], lev, icomp, 0, ncomp); MultiFab::Add(rhofb, rhofc, 0, 0, ncomp, ng); - mf.ParallelAdd(rhofb, 0, 0, ncomp, ng, IntVect::TheZeroVector(), period); + + WarpXCommUtil::ParallelAdd(mf, rhofb, 0, 0, ncomp, ng, IntVect::TheZeroVector(), period); WarpXSumGuardCells( *rho_cp[lev+1], rhofc, period, ng_depos_rho, icomp, ncomp ); } else if (use_filter) // but no buffer @@ -1184,7 +1194,8 @@ WarpX::AddRhoFromFineLevelandSumBoundary(int lev, int icomp, int ncomp) ng_depos_rho.min(ng); MultiFab rf(rho_cp[lev+1]->boxArray(), rho_cp[lev+1]->DistributionMap(), ncomp, ng); bilinear_filter.ApplyStencil(rf, *rho_cp[lev+1], lev, icomp, 0, ncomp); - mf.ParallelAdd(rf, 0, 0, ncomp, ng, IntVect::TheZeroVector(), period); + + WarpXCommUtil::ParallelAdd(mf, rf, 0, 0, ncomp, ng, IntVect::TheZeroVector(), period); WarpXSumGuardCells( *rho_cp[lev+1], rf, period, ng_depos_rho, icomp, ncomp ); } else if (charge_buf[lev+1]) // but no filter @@ -1193,18 +1204,19 @@ WarpX::AddRhoFromFineLevelandSumBoundary(int lev, int icomp, int ncomp) MultiFab::Add(*charge_buf[lev+1], *rho_cp[lev+1], icomp, icomp, ncomp, rho_cp[lev+1]->nGrowVect()); - mf.ParallelAdd(*charge_buf[lev+1], icomp, 0, - ncomp, - charge_buf[lev+1]->nGrowVect(), IntVect::TheZeroVector(), - period); + + WarpXCommUtil::ParallelAdd(mf, *charge_buf[lev+1], icomp, 0, + ncomp, + charge_buf[lev+1]->nGrowVect(), IntVect::TheZeroVector(), + period); WarpXSumGuardCells(*(rho_cp[lev+1]), period, ng_depos_rho, icomp, ncomp); } else // no filter, no buffer { ng_depos_rho.min(ng); - mf.ParallelAdd(*rho_cp[lev+1], icomp, 0, ncomp, - rho_cp[lev+1]->nGrowVect(), IntVect::TheZeroVector(), - period); + WarpXCommUtil::ParallelAdd(mf, *rho_cp[lev+1], icomp, 0, ncomp, + rho_cp[lev+1]->nGrowVect(), IntVect::TheZeroVector(), + period); WarpXSumGuardCells(*(rho_cp[lev+1]), period, ng_depos_rho, icomp, ncomp); } MultiFab::Add(*rho_fp[lev], mf, 0, icomp, ncomp, 0); @@ -1222,16 +1234,16 @@ WarpX::NodalSyncJ (int lev, PatchType patch_type) if (patch_type == PatchType::fine) { const auto& period = Geom(lev).periodicity(); - current_fp[lev][0]->OverrideSync(period); - current_fp[lev][1]->OverrideSync(period); - current_fp[lev][2]->OverrideSync(period); + WarpXCommUtil::OverrideSync(*current_fp[lev][0], period); + WarpXCommUtil::OverrideSync(*current_fp[lev][1], period); + WarpXCommUtil::OverrideSync(*current_fp[lev][2], period); } else if (patch_type == PatchType::coarse) { const auto& cperiod = Geom(lev-1).periodicity(); - current_cp[lev][0]->OverrideSync(cperiod); - current_cp[lev][1]->OverrideSync(cperiod); - current_cp[lev][2]->OverrideSync(cperiod); + WarpXCommUtil::OverrideSync(*current_cp[lev][0], cperiod); + WarpXCommUtil::OverrideSync(*current_cp[lev][1], cperiod); + WarpXCommUtil::OverrideSync(*current_cp[lev][2], cperiod); } } @@ -1244,13 +1256,13 @@ WarpX::NodalSyncRho (int lev, PatchType patch_type, int icomp, int ncomp) { const auto& period = Geom(lev).periodicity(); MultiFab rhof(*rho_fp[lev], amrex::make_alias, icomp, ncomp); - rhof.OverrideSync(period); + WarpXCommUtil::OverrideSync(rhof, period); } else if (patch_type == PatchType::coarse && rho_cp[lev]) { const auto& cperiod = Geom(lev-1).periodicity(); MultiFab rhoc(*rho_cp[lev], amrex::make_alias, icomp, ncomp); - rhoc.OverrideSync(cperiod); + WarpXCommUtil::OverrideSync(rhoc, cperiod); } } @@ -1278,17 +1290,17 @@ void WarpX::NodalSyncPML (int lev, PatchType patch_type) // Always synchronize nodal points const auto& period = Geom(lev).periodicity(); - pml_E[0]->OverrideSync(period); - pml_E[1]->OverrideSync(period); - pml_E[2]->OverrideSync(period); - pml_B[0]->OverrideSync(period); - pml_B[1]->OverrideSync(period); - pml_B[2]->OverrideSync(period); + WarpXCommUtil::OverrideSync(*pml_E[0], period); + WarpXCommUtil::OverrideSync(*pml_E[1], period); + WarpXCommUtil::OverrideSync(*pml_E[2], period); + WarpXCommUtil::OverrideSync(*pml_B[0], period); + WarpXCommUtil::OverrideSync(*pml_B[1], period); + WarpXCommUtil::OverrideSync(*pml_B[2], period); if (pml_F) { - pml_F->OverrideSync(period); + WarpXCommUtil::OverrideSync(*pml_F, period); } if (pml_G) { - pml_G->OverrideSync(period); + WarpXCommUtil::OverrideSync(*pml_G, period); } } } @@ -1301,16 +1313,16 @@ void WarpX::NodalSync (amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab> for (int lev = 0; lev <= WarpX::finest_level; lev++) { const amrex::Periodicity& period = Geom(lev).periodicity(); - mf_fp[lev][0]->OverrideSync(period); - mf_fp[lev][1]->OverrideSync(period); - mf_fp[lev][2]->OverrideSync(period); + WarpXCommUtil::OverrideSync(*mf_fp[lev][0], period); + WarpXCommUtil::OverrideSync(*mf_fp[lev][1], period); + WarpXCommUtil::OverrideSync(*mf_fp[lev][2], period); if (lev > 0) { const amrex::Periodicity& cperiod = Geom(lev-1).periodicity(); - mf_cp[lev][0]->OverrideSync(cperiod); - mf_cp[lev][1]->OverrideSync(cperiod); - mf_cp[lev][2]->OverrideSync(cperiod); + WarpXCommUtil::OverrideSync(*mf_cp[lev][0], cperiod); + WarpXCommUtil::OverrideSync(*mf_cp[lev][1], cperiod); + WarpXCommUtil::OverrideSync(*mf_cp[lev][2], cperiod); } } } diff --git a/Source/Parallelization/WarpXCommUtil.H b/Source/Parallelization/WarpXCommUtil.H new file mode 100644 index 000000000..2a5ec5390 --- /dev/null +++ b/Source/Parallelization/WarpXCommUtil.H @@ -0,0 +1,94 @@ +/* Copyright 2019 Andrew Myers + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ +#ifndef WARPX_COMMUTIL_H_ +#define WARPX_COMMUTIL_H_ + +#include <AMReX_FabArray.H> +#include <AMReX_Gpu.H> +#include <AMReX_iMultiFab.H> +#include <AMReX_MultiFab.H> +#include <AMReX_Periodicity.H> +#include <AMReX_TypeTraits.H> + +#include "WarpX.H" + +namespace WarpXCommUtil +{ + +using comm_float_type = float; + +template <class FAB1, class FAB2> +void +mixedCopy (amrex::FabArray<FAB1>& dst, amrex::FabArray<FAB2> const& src, int srccomp, int dstcomp, int numcomp, const amrex::IntVect& nghost) +{ + auto const& srcma = src.const_arrays(); + auto const& dstma = dst.arrays(); + ParallelFor(dst, nghost, numcomp, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept + { + dstma[box_no](i,j,k,dstcomp+n) = (typename FAB1::value_type) srcma[box_no](i,j,k,srccomp+n); + }); + amrex::Gpu::synchronize(); +} + +void ParallelCopy (amrex::MultiFab& dst, + const amrex::MultiFab& src, + int src_comp, + int dst_comp, + int num_comp, + const amrex::IntVect& src_nghost, + const amrex::IntVect& dst_nghost, + const amrex::Periodicity& period = amrex::Periodicity::NonPeriodic(), + amrex::FabArrayBase::CpOp op = amrex::FabArrayBase::COPY); + +void ParallelAdd (amrex::MultiFab& dst, + const amrex::MultiFab& src, + int src_comp, + int dst_comp, + int num_comp, + const amrex::IntVect& src_nghost, + const amrex::IntVect& dst_nghost, + const amrex::Periodicity& period = amrex::Periodicity::NonPeriodic()); + +void FillBoundary (amrex::MultiFab& mf, + const amrex::Periodicity& period = amrex::Periodicity::NonPeriodic()); + +void FillBoundary (amrex::MultiFab& mf, + amrex::IntVect ng, + const amrex::Periodicity& period = amrex::Periodicity::NonPeriodic()); + +void FillBoundary (amrex::iMultiFab& mf, + const amrex::Periodicity& period = amrex::Periodicity::NonPeriodic()); + +void FillBoundary (amrex::iMultiFab& mf, + amrex::IntVect ng, + const amrex::Periodicity& period = amrex::Periodicity::NonPeriodic()); + +void +FillBoundary (amrex::Vector<amrex::MultiFab*> const& mf, const amrex::Periodicity& period); + +void SumBoundary (amrex::MultiFab& mf, + const amrex::Periodicity& period = amrex::Periodicity::NonPeriodic()); + +void SumBoundary (amrex::MultiFab& mf, + int start_comp, + int num_comps, + amrex::IntVect ng, + const amrex::Periodicity& period = amrex::Periodicity::NonPeriodic()); + +void SumBoundary (amrex::MultiFab& mf, + int start_comp, + int num_comps, + amrex::IntVect src_ng, + amrex::IntVect dst_ng, + const amrex::Periodicity& period = amrex::Periodicity::NonPeriodic()); + +void OverrideSync (amrex::MultiFab& mf, + const amrex::Periodicity& period = amrex::Periodicity::NonPeriodic()); +} + +#endif diff --git a/Source/Parallelization/WarpXCommUtil.cpp b/Source/Parallelization/WarpXCommUtil.cpp new file mode 100644 index 000000000..d395e7628 --- /dev/null +++ b/Source/Parallelization/WarpXCommUtil.cpp @@ -0,0 +1,243 @@ +/* Copyright 2021 Andrew Myers + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ +#include "WarpXCommUtil.H" + +#include <AMReX.H> +#include <AMReX_BaseFab.H> +#include <AMReX_IntVect.H> +#include <AMReX_FabArray.H> +#include <AMReX_MultiFab.H> +#include <AMReX_iMultiFab.H> + +namespace WarpXCommUtil { + +void ParallelCopy (amrex::MultiFab& dst, + const amrex::MultiFab& src, + int src_comp, + int dst_comp, + int num_comp, + const amrex::IntVect& src_nghost, + const amrex::IntVect& dst_nghost, + const amrex::Periodicity& period, + amrex::FabArrayBase::CpOp op) +{ + BL_PROFILE("WarpXCommUtil::ParallelCopy"); + + using WarpXCommUtil::comm_float_type; + + if (WarpX::do_single_precision_comms) + { + amrex::FabArray<amrex::BaseFab<comm_float_type> > src_tmp(src.boxArray(), + src.DistributionMap(), + num_comp, + src_nghost); + mixedCopy(src_tmp, src, src_comp, 0, num_comp, src_nghost); + + amrex::FabArray<amrex::BaseFab<comm_float_type> > dst_tmp(dst.boxArray(), + dst.DistributionMap(), + num_comp, + dst_nghost); + + mixedCopy(dst_tmp, dst, dst_comp, 0, num_comp, dst_nghost); + + dst_tmp.ParallelCopy(src_tmp, 0, 0, num_comp, + src_nghost, dst_nghost, period, op); + + mixedCopy(dst, dst_tmp, 0, dst_comp, num_comp, dst_nghost); + } + else + { + dst.ParallelCopy(src, src_comp, dst_comp, num_comp, src_nghost, dst_nghost, period, op); + } +} + +void ParallelAdd (amrex::MultiFab& dst, + const amrex::MultiFab& src, + int src_comp, + int dst_comp, + int num_comp, + const amrex::IntVect& src_nghost, + const amrex::IntVect& dst_nghost, + const amrex::Periodicity& period) +{ + WarpXCommUtil::ParallelCopy(dst, src, src_comp, dst_comp, num_comp, src_nghost, dst_nghost, period, + amrex::FabArrayBase::ADD); +} + +void FillBoundary (amrex::MultiFab& mf, const amrex::Periodicity& period) +{ + BL_PROFILE("WarpXCommUtil::FillBoundary"); + + if (WarpX::do_single_precision_comms) + { + amrex::FabArray<amrex::BaseFab<comm_float_type> > mf_tmp(mf.boxArray(), + mf.DistributionMap(), + mf.nComp(), + mf.nGrowVect()); + + mixedCopy(mf_tmp, mf, 0, 0, mf.nComp(), mf.nGrowVect()); + + mf_tmp.FillBoundary(period); + + mixedCopy(mf, mf_tmp, 0, 0, mf.nComp(), mf.nGrowVect()); + } + else + { + mf.FillBoundary(period); + } +} + +void FillBoundary (amrex::MultiFab& mf, + amrex::IntVect ng, + const amrex::Periodicity& period) +{ + BL_PROFILE("WarpXCommUtil::FillBoundary"); + + if (WarpX::do_single_precision_comms) + { + amrex::FabArray<amrex::BaseFab<comm_float_type> > mf_tmp(mf.boxArray(), + mf.DistributionMap(), + mf.nComp(), + mf.nGrowVect()); + + mixedCopy(mf_tmp, mf, 0, 0, mf.nComp(), mf.nGrowVect()); + + mf_tmp.FillBoundary(ng, period); + + mixedCopy(mf, mf_tmp, 0, 0, mf.nComp(), mf.nGrowVect()); + } + else + { + mf.FillBoundary(ng, period); + } +} + +void FillBoundary (amrex::iMultiFab& imf, const amrex::Periodicity& period) +{ + BL_PROFILE("WarpXCommUtil::FillBoundary"); + + imf.FillBoundary(period); +} + +void FillBoundary (amrex::iMultiFab& imf, + amrex::IntVect ng, + const amrex::Periodicity& period) +{ + BL_PROFILE("WarpXCommUtil::FillBoundary"); + imf.FillBoundary(ng, period); +} + +void +FillBoundary (amrex::Vector<amrex::MultiFab*> const& mf, const amrex::Periodicity& period) +{ + for (auto x : mf) { + WarpXCommUtil::FillBoundary(*x, period); + } +} + +void SumBoundary (amrex::MultiFab& mf, const amrex::Periodicity& period) +{ + BL_PROFILE("WarpXCommUtil::SumBoundary"); + + if (WarpX::do_single_precision_comms) + { + amrex::FabArray<amrex::BaseFab<comm_float_type> > mf_tmp(mf.boxArray(), + mf.DistributionMap(), + mf.nComp(), + mf.nGrowVect()); + + mixedCopy(mf_tmp, mf, 0, 0, mf.nComp(), mf.nGrowVect()); + + mf_tmp.SumBoundary(period); + + mixedCopy(mf, mf_tmp, 0, 0, mf.nComp(), mf.nGrowVect()); + } + else + { + mf.SumBoundary(period); + } +} + +void SumBoundary (amrex::MultiFab& mf, + int start_comp, + int num_comps, + amrex::IntVect ng, + const amrex::Periodicity& period) +{ + BL_PROFILE("WarpXCommUtil::SumBoundary"); + + if (WarpX::do_single_precision_comms) + { + amrex::FabArray<amrex::BaseFab<comm_float_type> > mf_tmp(mf.boxArray(), + mf.DistributionMap(), + num_comps, + ng); + mixedCopy(mf_tmp, mf, start_comp, 0, num_comps, ng); + + mf_tmp.SumBoundary(0, num_comps, ng, period); + + mixedCopy(mf, mf_tmp, 0, start_comp, num_comps, ng); + } + else + { + mf.SumBoundary(start_comp, num_comps, ng, period); + } +} + +void SumBoundary (amrex::MultiFab& mf, + int start_comp, + int num_comps, + amrex::IntVect src_ng, + amrex::IntVect dst_ng, + const amrex::Periodicity& period) +{ + BL_PROFILE("WarpXCommUtil::SumBoundary"); + + if (WarpX::do_single_precision_comms) + { + amrex::FabArray<amrex::BaseFab<comm_float_type> > mf_tmp(mf.boxArray(), + mf.DistributionMap(), + num_comps, + mf.nGrowVect()); + mixedCopy(mf_tmp, mf, start_comp, 0, num_comps, mf.nGrowVect()); + + mf_tmp.SumBoundary(0, num_comps, src_ng, dst_ng, period); + + mixedCopy(mf, mf_tmp, 0, start_comp, num_comps, dst_ng); + } + else + { + mf.SumBoundary(start_comp, num_comps, src_ng, dst_ng, period); + } +} + +void OverrideSync (amrex::MultiFab& mf, + const amrex::Periodicity& period) +{ + if (mf.ixType().cellCentered()) return; + + if (WarpX::do_single_precision_comms) + { + amrex::FabArray<amrex::BaseFab<comm_float_type> > mf_tmp(mf.boxArray(), + mf.DistributionMap(), + mf.nComp(), + mf.nGrowVect()); + + mixedCopy(mf_tmp, mf, 0, 0, mf.nComp(), mf.nGrowVect()); + + auto msk = mf.OwnerMask(period); + amrex::OverrideSync(mf_tmp, *msk, period); + + mixedCopy(mf, mf_tmp, 0, 0, mf.nComp(), mf.nGrowVect()); + } + else + { + mf.OverrideSync(period); + } +} + +} diff --git a/Source/Parallelization/WarpXSumGuardCells.H b/Source/Parallelization/WarpXSumGuardCells.H index 78738c130..c94e8a0fb 100644 --- a/Source/Parallelization/WarpXSumGuardCells.H +++ b/Source/Parallelization/WarpXSumGuardCells.H @@ -36,7 +36,7 @@ WarpXSumGuardCells(amrex::MultiFab& mf, const amrex::Periodicity& period, n_updated_guards = mf.nGrowVect(); else // Update only the valid cells n_updated_guards = amrex::IntVect::TheZeroVector(); - mf.SumBoundary(icomp, ncomp, src_ngrow, n_updated_guards, period); + WarpXCommUtil::SumBoundary(mf, icomp, ncomp, src_ngrow, n_updated_guards, period); } /** \brief Sum the values of `src` where the different boxes overlap @@ -69,6 +69,7 @@ WarpXSumGuardCells(amrex::MultiFab& dst, amrex::MultiFab& src, n_updated_guards = amrex::IntVect::TheZeroVector(); dst.setVal(0., icomp, ncomp, n_updated_guards); +// WarpXCommUtil::ParallelAdd(dst, src, 0, icomp, ncomp, src_ngrow, n_updated_guards, period); dst.ParallelAdd(src, 0, icomp, ncomp, src_ngrow, n_updated_guards, period); } diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp index ea8e58ee7..6cf890821 100644 --- a/Source/Particles/MultiParticleContainer.cpp +++ b/Source/Particles/MultiParticleContainer.cpp @@ -10,6 +10,7 @@ * License: BSD-3-Clause-LBNL */ #include "MultiParticleContainer.H" +#include "Parallelization/WarpXCommUtil.H" #include "Particles/ElementaryProcess/Ionization.H" #ifdef WARPX_QED # include "Particles/ElementaryProcess/QEDInternals/BreitWheelerEngineWrapper.H" @@ -556,7 +557,7 @@ MultiParticleContainer::GetChargeDensity (int lev, bool local) } if (!local) { const Geometry& gm = allcontainers[0]->Geom(lev); - rho->SumBoundary(gm.periodicity()); + WarpXCommUtil::SumBoundary(*rho, gm.periodicity()); } return rho; } diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 5a76a24df..8610238db 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -13,6 +13,7 @@ #include "Deposition/CurrentDeposition.H" #include "Pusher/GetAndSetPosition.H" #include "Pusher/UpdatePosition.H" +#include "Parallelization/WarpXCommUtil.H" #include "ParticleBoundaries_K.H" #include "Utils/CoarsenMR.H" #include "Utils/WarpXAlgorithmSelection.H" @@ -783,23 +784,29 @@ WarpXParticleContainer::DepositCharge (amrex::Vector<std::unique_ptr<amrex::Mult #endif // Exchange guard cells - if (local == false) rho[lev]->SumBoundary(m_gdb->Geom(lev).periodicity()); + if (local == false) { + WarpXCommUtil::SumBoundary(*rho[lev], m_gdb->Geom(lev).periodicity()); + } } // Now that the charge has been deposited at each level, // we average down from fine to crse if (interpolate_across_levels) { - for (int lev = finest_level - 1; lev >= 0; --lev) - { + for (int lev = finest_level - 1; lev >= 0; --lev) { const DistributionMapping& fine_dm = rho[lev+1]->DistributionMap(); BoxArray coarsened_fine_BA = rho[lev+1]->boxArray(); coarsened_fine_BA.coarsen(m_gdb->refRatio(lev)); MultiFab coarsened_fine_data(coarsened_fine_BA, fine_dm, rho[lev+1]->nComp(), 0); coarsened_fine_data.setVal(0.0); - const int refinement_ratio = 2; - CoarsenMR::Coarsen(coarsened_fine_data, *rho[lev+1], IntVect(refinement_ratio)); - rho[lev]->ParallelAdd(coarsened_fine_data, m_gdb->Geom(lev).periodicity()); + + int const refinement_ratio = 2; + + CoarsenMR::Coarsen( coarsened_fine_data, *rho[lev+1], IntVect(refinement_ratio) ); + WarpXCommUtil::ParallelAdd(*rho[lev], coarsened_fine_data, 0, 0, rho[lev]->nComp(), + amrex::IntVect::TheZeroVector(), + amrex::IntVect::TheZeroVector(), + m_gdb->Geom(lev).periodicity()); } } } @@ -860,7 +867,7 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local) WarpX::GetInstance().ApplyInverseVolumeScalingToChargeDensity(rho.get(), lev); #endif - if (local == false) rho->SumBoundary(gm.periodicity()); + if (local == false) { WarpXCommUtil::SumBoundary(*rho, gm.periodicity()); } return rho; } diff --git a/Source/Utils/WarpXMovingWindow.cpp b/Source/Utils/WarpXMovingWindow.cpp index 9b270469c..a95b58a41 100644 --- a/Source/Utils/WarpXMovingWindow.cpp +++ b/Source/Utils/WarpXMovingWindow.cpp @@ -10,6 +10,7 @@ #include "BoundaryConditions/PML.H" #include "Particles/MultiParticleContainer.H" +#include "Parallelization/WarpXCommUtil.H" #include "Utils/WarpXConst.H" #include "Utils/WarpXProfilerWrapper.H" @@ -292,7 +293,7 @@ WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir, if ( WarpX::safe_guard_cells ) { // Fill guard cells. - tmpmf.FillBoundary(geom.periodicity()); + WarpXCommUtil::FillBoundary(tmpmf, geom.periodicity()); } else { IntVect ng_mw = IntVect::TheUnitVector(); // Enough guard cells in the MW direction @@ -300,7 +301,7 @@ WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir, // Make sure we don't exceed number of guard cells allocated ng_mw = ng_mw.min(ng); // Fill guard cells. - tmpmf.FillBoundary(ng_mw, geom.periodicity()); + WarpXCommUtil::FillBoundary(tmpmf, ng_mw, geom.periodicity()); } // Make a box that covers the region that the window moved into diff --git a/Source/WarpX.H b/Source/WarpX.H index 8edcd42ba..588648c69 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -163,6 +163,9 @@ public: // default is false for standard PSATD and true for Galilean PSATD (set in WarpX.cpp) bool update_with_rho = false; + //! perform field communications in single precision + static int do_single_precision_comms; + // PSATD: Whether to fill the guard cells with inverse FFTs based on the boundary conditions static amrex::IntVect fill_guards; diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index c9faf6110..7f620e515 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -117,6 +117,7 @@ bool WarpX::do_dive_cleaning = 0; bool WarpX::do_divb_cleaning = 0; int WarpX::em_solver_medium; int WarpX::macroscopic_solver_algo; +int WarpX::do_single_precision_comms=0; amrex::Vector<int> WarpX::field_boundary_lo(AMREX_SPACEDIM,0); amrex::Vector<int> WarpX::field_boundary_hi(AMREX_SPACEDIM,0); amrex::Vector<ParticleBoundaryType> WarpX::particle_boundary_lo(AMREX_SPACEDIM,ParticleBoundaryType::Absorbing); @@ -637,6 +638,15 @@ WarpX::ReadParameters () getArrWithParser(pp_warpx, "mirror_z_npoints", mirror_z_npoints, 0, num_mirrors); } + pp_warpx.query("do_single_precision_comms", do_single_precision_comms); +#ifdef AMREX_USE_FLOAT + if (do_single_precision_comms) { + do_single_precision_comms = 0; + amrex::Warning("\nWARNING: Overwrote warpx.do_single_precision_comms" + " to be 0, since WarpX was built in single precision."); + } +#endif + pp_warpx.query("serialize_ics", serialize_ics); pp_warpx.query("refine_plasma", refine_plasma); pp_warpx.query("do_dive_cleaning", do_dive_cleaning); |