diff options
author | 2021-09-27 12:04:47 -0700 | |
---|---|---|
committer | 2021-09-27 12:04:47 -0700 | |
commit | 2180f5fda678313da481f211482b52810705fb07 (patch) | |
tree | 0f92dc97d7bc72339d12d1b0e5676897b15b6911 /Source/Parallelization/WarpXComm.cpp | |
parent | 002871119f1f546de03aa49f44a85fa523826fea (diff) | |
download | WarpX-2180f5fda678313da481f211482b52810705fb07.tar.gz WarpX-2180f5fda678313da481f211482b52810705fb07.tar.zst WarpX-2180f5fda678313da481f211482b52810705fb07.zip |
Allow more work for OpenMP threads in `WarpX::UpdateAuxilaryData` (#2342)
* Allow more work for OpenMP threads in AuxilaryData
* Better parallelization in stagger-to-nodal current interpolation
Diffstat (limited to 'Source/Parallelization/WarpXComm.cpp')
-rw-r--r-- | Source/Parallelization/WarpXComm.cpp | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 09d84f210..31f651f00 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -87,7 +87,7 @@ WarpX::UpdateAuxilaryDataStagToNodal () #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(*Bfield_aux[0][0]); mfi.isValid(); ++mfi) + for (MFIter mfi(*Bfield_aux[0][0], TilingIfNotGPU()); mfi.isValid(); ++mfi) { Array4<Real> const& bx_aux = Bfield_aux[0][0]->array(mfi); Array4<Real> const& by_aux = Bfield_aux[0][1]->array(mfi); @@ -103,10 +103,10 @@ WarpX::UpdateAuxilaryDataStagToNodal () Array4<Real const> const& ey_fp = Emf[0][1]->const_array(mfi); Array4<Real const> const& ez_fp = Emf[0][2]->const_array(mfi); - // Loop over full box including ghost cells + // Loop includes ghost cells (`growntilebox`) // (input arrays will be padded with zeros beyond ghost cells // for out-of-bound accesses due to large-stencil operations) - Box bx = mfi.fabbox(); + Box bx = mfi.growntilebox(); if (maxwell_solver_id == MaxwellSolverAlgo::PSATD) { @@ -194,7 +194,7 @@ WarpX::UpdateAuxilaryDataStagToNodal () #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(*Bfield_aux[lev][0]); mfi.isValid(); ++mfi) + for (MFIter mfi(*Bfield_aux[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi) { Array4<Real> const& bx_aux = Bfield_aux[lev][0]->array(mfi); Array4<Real> const& by_aux = Bfield_aux[lev][1]->array(mfi); @@ -209,7 +209,7 @@ WarpX::UpdateAuxilaryDataStagToNodal () Array4<Real const> const& by_c = Btmp[1]->const_array(mfi); Array4<Real const> const& bz_c = Btmp[2]->const_array(mfi); - const Box& bx = mfi.fabbox(); + const Box& bx = mfi.growntilebox(); amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept { @@ -433,12 +433,12 @@ void WarpX::UpdateCurrentNodalToStag (amrex::MultiFab& dst, amrex::MultiFab cons #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(dst); mfi.isValid(); ++mfi) + for (MFIter mfi(dst, TilingIfNotGPU()); mfi.isValid(); ++mfi) { // Loop over full box including ghost cells // (input arrays will be padded with zeros beyond ghost cells // for out-of-bound accesses due to large-stencil operations) - Box bx = mfi.fabbox(); + Box bx = mfi.growntilebox(); amrex::Array4<amrex::Real const> const& src_arr = src.const_array(mfi); amrex::Array4<amrex::Real> const& dst_arr = dst.array(mfi); |