From 69b0a98b3e8495eb2eca4269533516d66f3ff6f3 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 27 Sep 2019 11:41:49 -0700 Subject: nodal auxilary data --- Source/Parallelization/WarpXComm.cpp | 151 +++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 990d0f988..e67acec22 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -51,6 +51,157 @@ WarpX::UpdateAuxilaryData () { BL_PROFILE("UpdateAuxilaryData()"); + if (Bfield_aux[0][0]->is_nodal()) { + UpdateNodalAuxilaryData(); + } else { + UpdateStagAuxilaryData(); + } +} + +void +WarpX::UpdateNodalAuxilaryData () +{ + // For level 0, we only need to do the average. +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(*Bfield_aux[0][0]); mfi.isValid(); ++mfi) + { + Array4 const& bx_aux = Bfield_aux[0][0]->array(mfi); + Array4 const& by_aux = Bfield_aux[0][1]->array(mfi); + Array4 const& bz_aux = Bfield_aux[0][2]->array(mfi); + Array4 const& bx_fp = Bfield_fp[0][0]->const_array(mfi); + Array4 const& by_fp = Bfield_fp[0][1]->const_array(mfi); + Array4 const& bz_fp = Bfield_fp[0][2]->const_array(mfi); + + Array4 const& ex_aux = Efield_aux[0][0]->array(mfi); + Array4 const& ey_aux = Efield_aux[0][1]->array(mfi); + Array4 const& ez_aux = Efield_aux[0][2]->array(mfi); + Array4 const& ex_fp = Efield_fp[0][0]->const_array(mfi); + Array4 const& ey_fp = Efield_fp[0][1]->const_array(mfi); + Array4 const& ez_fp = Efield_fp[0][2]->const_array(mfi); + + const Box& bx = mfi.fabbox(); + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_nd_bfield_x(j,k,l, bx_aux, bx_fp); + warpx_interp_nd_bfield_y(j,k,l, by_aux, by_fp); + warpx_interp_nd_bfield_z(j,k,l, bz_aux, bz_fp); + warpx_interp_nd_efield_x(j,k,l, ex_aux, ex_fp); + warpx_interp_nd_efield_y(j,k,l, ey_aux, ey_fp); + warpx_interp_nd_efield_z(j,k,l, ez_aux, ez_fp); + }); + } + + for (int lev = 1; lev <= finest_level; ++lev) + { + BoxArray const& nba = Bfield_aux[lev][0]->boxArray(); + BoxArray const& cnba = amrex::coarsen(nba,2); + DistributionMapping const& dm = Bfield_aux[lev][0]->DistributionMap(); + auto const& cperiod = Geom(lev-1).periodicity(); + + // Bfield + { + Array,3> Btmp; + if (Bfield_cax[lev][0]) { + for (int i = 0; i < 3; ++i) { + Btmp[i].reset(new MultiFab(*Bfield_cax[lev][i], amrex::make_alias, 0, 1)); + } + } else { + IntVect ngtmp = Bfield_aux[lev-1][0]->nGrowVect(); + for (int i = 0; i < 3; ++i) { + Btmp[i].reset(new MultiFab(cnba, dm, 1, ngtmp)); + } + } + // ParallelCopy from coarse level + for (int i = 0; i < 3; ++i) { + IntVect ng = Btmp[i]->nGrowVect(); + Btmp[i]->ParallelCopy(*Bfield_aux[lev-1][i], 0, 0, 1, ng, ng, cperiod); + } + +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(*Bfield_aux[lev][0]); mfi.isValid(); ++mfi) + { + Array4 const& bx_aux = Bfield_aux[lev][0]->array(mfi); + Array4 const& by_aux = Bfield_aux[lev][1]->array(mfi); + Array4 const& bz_aux = Bfield_aux[lev][2]->array(mfi); + Array4 const& bx_fp = Bfield_fp[lev][0]->const_array(mfi); + Array4 const& by_fp = Bfield_fp[lev][1]->const_array(mfi); + Array4 const& bz_fp = Bfield_fp[lev][2]->const_array(mfi); + Array4 const& bx_cp = Bfield_cp[lev][0]->const_array(mfi); + Array4 const& by_cp = Bfield_cp[lev][1]->const_array(mfi); + Array4 const& bz_cp = Bfield_cp[lev][2]->const_array(mfi); + Array4 const& bx_c = Btmp[0]->const_array(mfi); + Array4 const& by_c = Btmp[1]->const_array(mfi); + Array4 const& bz_c = Btmp[2]->const_array(mfi); + + const Box& bx = mfi.fabbox(); + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_nd_bfield_x(j,k,l, bx_aux, bx_fp, bx_cp, bx_c); + warpx_interp_nd_bfield_y(j,k,l, by_aux, by_fp, by_cp, by_c); + warpx_interp_nd_bfield_z(j,k,l, bz_aux, bz_fp, bz_cp, bz_c); + }); + } + } + + // Efield + { + Array,3> Etmp; + if (Efield_cax[lev][0]) { + for (int i = 0; i < 3; ++i) { + Etmp[i].reset(new MultiFab(*Efield_cax[lev][i], amrex::make_alias, 0, 1)); + } + } else { + IntVect ngtmp = Efield_aux[lev-1][0]->nGrowVect(); + for (int i = 0; i < 3; ++i) { + Etmp[i].reset(new MultiFab(cnba, dm, 1, ngtmp)); + } + } + // ParallelCopy from coarse level + for (int i = 0; i < 3; ++i) { + IntVect ng = Etmp[i]->nGrowVect(); + Etmp[i]->ParallelCopy(*Efield_aux[lev-1][i], 0, 0, 1, ng, ng, cperiod); + } + +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi) + { + Array4 const& ex_aux = Efield_aux[lev][0]->array(mfi); + Array4 const& ey_aux = Efield_aux[lev][1]->array(mfi); + Array4 const& ez_aux = Efield_aux[lev][2]->array(mfi); + Array4 const& ex_fp = Efield_fp[lev][0]->const_array(mfi); + Array4 const& ey_fp = Efield_fp[lev][1]->const_array(mfi); + Array4 const& ez_fp = Efield_fp[lev][2]->const_array(mfi); + Array4 const& ex_cp = Efield_cp[lev][0]->const_array(mfi); + Array4 const& ey_cp = Efield_cp[lev][1]->const_array(mfi); + Array4 const& ez_cp = Efield_cp[lev][2]->const_array(mfi); + Array4 const& ex_c = Etmp[0]->const_array(mfi); + Array4 const& ey_c = Etmp[1]->const_array(mfi); + Array4 const& ez_c = Etmp[2]->const_array(mfi); + + const Box& bx = mfi.fabbox(); + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_nd_efield_x(j,k,l, ex_aux, ex_fp, ex_cp, ex_c); + warpx_interp_nd_efield_y(j,k,l, ey_aux, ey_fp, ey_cp, ey_c); + warpx_interp_nd_efield_z(j,k,l, ez_aux, ez_fp, ez_cp, ez_c); + }); + } + } + } +} + +void +WarpX::UpdateStagAuxilaryData () +{ for (int lev = 1; lev <= finest_level; ++lev) { const auto& crse_period = Geom(lev-1).periodicity(); -- cgit v1.2.3 From 45b6e243030bb72816f79f55aa60f88f7aa7f7bc Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 26 Sep 2019 18:24:45 -0700 Subject: Rho Synchronize: Port to GPU Port the charge density synchronize functions to GPU. --- Source/FortranInterface/WarpX_f.F90 | 59 ---------- Source/FortranInterface/WarpX_f.H | 9 -- .../InterpolateDensityFineToCoarse.H | 120 +++++++++++++++++++++ Source/Parallelization/WarpXComm.cpp | 35 +++--- Source/WarpX.H | 8 +- 5 files changed, 143 insertions(+), 88 deletions(-) create mode 100644 Source/Parallelization/InterpolateDensityFineToCoarse.H (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/FortranInterface/WarpX_f.F90 b/Source/FortranInterface/WarpX_f.F90 index 2ddf9409d..762ed2cd8 100644 --- a/Source/FortranInterface/WarpX_f.F90 +++ b/Source/FortranInterface/WarpX_f.F90 @@ -51,65 +51,6 @@ contains end subroutine warpx_compute_E - subroutine warpx_sync_rho_2d (lo, hi, crse, clo, chi, fine, flo, fhi, nc) & - bind(c, name='warpx_sync_rho_2d') - integer, intent(in) :: lo(2), hi(2), flo(2), fhi(2), clo(2), chi(2), nc - real(amrex_real), intent(in ) :: fine(flo(1):fhi(1),flo(2):fhi(2),nc) - real(amrex_real), intent(inout) :: crse(clo(1):chi(1),clo(2):chi(2),nc) - - integer :: i,j,ii,jj,m - - do m = 1, nc - do j = lo(2), hi(2) - jj = j*2 - do i = lo(1), hi(1) - ii = i*2 - crse(i,j,m) = 0.25d0 * & - ( fine(ii,jj,m) + 0.5d0 *(fine(ii-1,jj ,m)+fine(ii+1,jj ,m) & - & + fine(ii ,jj-1,m)+fine(ii ,jj+1,m)) & - & + 0.25d0*(fine(ii-1,jj-1,m)+fine(ii+1,jj-1,m) & - & + fine(ii-1,jj+1,m)+fine(ii+1,jj+1,m)) ) - end do - end do - end do - end subroutine warpx_sync_rho_2d - - subroutine warpx_sync_rho_3d (lo, hi, crse, clo, chi, fine, flo, fhi, nc) & - bind(c, name='warpx_sync_rho_3d') - integer, intent(in) :: lo(3), hi(3), flo(3), fhi(3), clo(3), chi(3), nc - real(amrex_real), intent(in ) :: fine(flo(1):fhi(1),flo(2):fhi(2),flo(3):fhi(3),nc) - real(amrex_real), intent(inout) :: crse(clo(1):chi(1),clo(2):chi(2),clo(3):chi(3),nc) - - integer :: i,j,k,ii,jj,kk,m - - do m = 1, nc - do k = lo(3), hi(3) - kk = k*2 - do j = lo(2), hi(2) - jj = j*2 - do i = lo(1), hi(1) - ii = i*2 - crse(i,j,k,m) = 0.125d0 * & - (fine(ii,jj,kk,m) + 0.5d0 *(fine(ii-1,jj ,kk ,m)+fine(ii+1,jj ,kk ,m) & - & + fine(ii ,jj-1,kk ,m)+fine(ii ,jj+1,kk ,m) & - & + fine(ii ,jj ,kk-1,m)+fine(ii ,jj ,kk+1,m)) & - & + 0.25d0 *(fine(ii-1,jj-1,kk ,m)+fine(ii+1,jj-1,kk ,m) & - & + fine(ii-1,jj+1,kk ,m)+fine(ii+1,jj+1,kk ,m) & - & + fine(ii-1,jj ,kk-1,m)+fine(ii+1,jj ,kk-1,m) & - & + fine(ii-1,jj ,kk+1,m)+fine(ii+1,jj ,kk+1,m) & - & + fine(ii ,jj-1,kk-1,m)+fine(ii ,jj+1,kk-1,m) & - & + fine(ii ,jj-1,kk+1,m)+fine(ii ,jj+1,kk+1,m)) & - & + 0.125d0*(fine(ii-1,jj-1,kk-1,m)+fine(ii-1,jj-1,kk+1,m) & - & + fine(ii-1,jj+1,kk-1,m)+fine(ii-1,jj+1,kk+1,m) & - & + fine(ii+1,jj-1,kk-1,m)+fine(ii+1,jj-1,kk+1,m) & - & + fine(ii+1,jj+1,kk-1,m)+fine(ii+1,jj+1,kk+1,m))) - end do - end do - end do - end do - - end subroutine warpx_sync_rho_3d - subroutine warpx_build_buffer_masks (lo, hi, msk, mlo, mhi, gmsk, glo, ghi, ng) & bind(c, name='warpx_build_buffer_masks') integer, dimension(3), intent(in) :: lo, hi, mlo, mhi, glo, ghi diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H index 2d5d346e2..b51a83387 100644 --- a/Source/FortranInterface/WarpX_f.H +++ b/Source/FortranInterface/WarpX_f.H @@ -16,8 +16,6 @@ #if (AMREX_SPACEDIM == 3) -#define WRPX_SYNC_RHO warpx_sync_rho_3d - #define WRPX_SUM_FINE_TO_CRSE_NODAL warpx_sum_fine_to_crse_nodal_3d #define WRPX_ZERO_OUT_BNDRY warpx_zero_out_bndry_3d #define WRPX_BUILD_MASK warpx_build_mask_3d @@ -30,8 +28,6 @@ #elif (AMREX_SPACEDIM == 2) -#define WRPX_SYNC_RHO warpx_sync_rho_2d - #define WRPX_SUM_FINE_TO_CRSE_NODAL warpx_sum_fine_to_crse_nodal_2d #define WRPX_ZERO_OUT_BNDRY warpx_zero_out_bndry_2d #define WRPX_BUILD_MASK warpx_build_mask_2d @@ -229,11 +225,6 @@ extern "C" #endif const amrex::Real* sigbz, int sigbz_lo, int sigbz_hi); - void WRPX_SYNC_RHO (const int* lo, const int* hi, - BL_FORT_FAB_ARG_ANYD(crse), - const BL_FORT_FAB_ARG_ANYD(fine), - const int* ncomp); - #ifdef WARPX_USE_PSATD void warpx_fft_mpi_init (int fcomm); void warpx_fft_domain_decomp (int* warpx_local_nz, int* warpx_local_z0, diff --git a/Source/Parallelization/InterpolateDensityFineToCoarse.H b/Source/Parallelization/InterpolateDensityFineToCoarse.H new file mode 100644 index 000000000..947db2aac --- /dev/null +++ b/Source/Parallelization/InterpolateDensityFineToCoarse.H @@ -0,0 +1,120 @@ +/* Copyright 2019 Axel Huebl, Weiqun Zhang + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef INTERPOLATE_DENSITY_FINE_TO_COARSE_H +#define INTERPOLATE_DENSITY_FINE_TO_COARSE_H + +#include +#include +#include +#include +#include + +#include // std::move + + +/** Fill a charge density (rho) coarse patch with averaged values from a fine patch + * + * Fills the values of the charge density on the coarse patch + * by averaging the values of the charge density of the fine patch. + */ +class InterpolateDensityFineToCoarse +{ +public: + /** Construct with fine and coarse patch and their refinement ratio + * + * @param[in] fine read-only fine patch + * @param[in,out] coarse overwritten coarse patch + * @param[in] refinement_ratio ratio between coarse and fine patch granularity + * (currently, only a value of is implemented) + * @param[in] number_of_components the number of components + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + InterpolateDensityFineToCoarse( + amrex::Array4 const fine, + amrex::Array4 const coarse, + int const refinement_ratio, + int const number_of_components + ) : m_fine(std::move(fine)), + m_coarse(std::move(coarse)), + m_refinement_ratio(std::move(refinement_ratio)), + m_number_of_components(std::move(number_of_components)) + { + //! @note constants and stencils in operator() implementation assume 2x refinement + BL_ASSERT(refinement_ratio == 2); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + void + operator()( + int const i, + int const j, + int const k + ) const noexcept + { + using namespace amrex; + + auto const & fine_unsafe = m_fine; // out-of-bounds access not secured with zero-values yet + auto const & coarse = m_coarse; // out-of-bounds access not secured but will also not occur + + // return zero for out-of-bounds accesses during interpolation + // this is efficiently used as a method to add neutral elements beyond guards in the average below + auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l, int const m) noexcept + { + return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l, m) : amrex::Real{0.}; + }; + + int const ii = i * m_refinement_ratio; + int const jj = j * m_refinement_ratio; + int const kk = k * m_refinement_ratio; + for( int m = 0; m < m_number_of_components; ++m ) { +#if AMREX_SPACEDIM == 2 + coarse(i,j,kk,m) = 0.25_rt * ( + fine(ii,jj,kk,m) + + 0.5_rt * ( + fine(ii-1,jj ,kk,m) + fine(ii+1,jj ,kk,m) + + fine(ii ,jj-1,kk,m) + fine(ii ,jj+1,kk,m) + ) + + 0.25_rt * ( + fine(ii-1,jj-1,kk,m) + fine(ii+1,jj-1,kk,m) + + fine(ii-1,jj+1,kk,m) + fine(ii+1,jj+1,kk,m) + ) + ); +#elif AMREX_SPACEDIM == 3 + coarse(i,j,k,m) = 0.125_rt * ( + fine(ii,jj,kk,m) + + 0.5_rt * ( + fine(ii-1,jj ,kk ,m) + fine(ii+1,jj ,kk ,m) + + fine(ii ,jj-1,kk ,m) + fine(ii ,jj+1,kk ,m) + + fine(ii ,jj ,kk-1,m) + fine(ii ,jj ,kk+1,m) + ) + + 0.25_rt * ( + fine(ii-1,jj-1,kk ,m) + fine(ii+1,jj-1,kk ,m) + + fine(ii-1,jj+1,kk ,m) + fine(ii+1,jj+1,kk ,m) + + fine(ii-1,jj ,kk-1,m) + fine(ii+1,jj ,kk-1,m) + + fine(ii-1,jj ,kk+1,m) + fine(ii+1,jj ,kk+1,m) + + fine(ii ,jj-1,kk-1,m) + fine(ii ,jj+1,kk-1,m) + + fine(ii ,jj-1,kk+1,m) + fine(ii ,jj+1,kk+1,m) + ) + + 0.125_rt * ( + fine(ii-1,jj-1,kk-1,m) + fine(ii-1,jj-1,kk+1,m) + + fine(ii-1,jj+1,kk-1,m) + fine(ii-1,jj+1,kk+1,m) + + fine(ii+1,jj-1,kk-1,m) + fine(ii+1,jj-1,kk+1,m) + + fine(ii+1,jj+1,kk-1,m) + fine(ii+1,jj+1,kk+1,m) + ) + ); +#endif + } + } +private: + amrex::Array4< amrex::Real const > m_fine; + amrex::Array4< amrex::Real > m_coarse; + int m_refinement_ratio; + int m_number_of_components; +}; + +#endif // INTERPOLATE_DENSITY_FINE_TO_COARSE_H diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 4f870e79c..92f0b4f09 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -3,10 +3,12 @@ #include #include #include +#include #include #include + using namespace amrex; void @@ -354,7 +356,7 @@ WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > std::array< amrex::MultiFab *, 3 > const & coarse, int const refinement_ratio) { - BL_PROFILE("InterpolateCurrentFineToCoarse()"); + BL_PROFILE("interpolateCurrentFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch @@ -386,6 +388,8 @@ WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > void WarpX::SyncRho () { + BL_PROFILE("SyncRho()"); + if (!rho_fp[0]) return; const int ncomp = rho_fp[0]->nComp(); @@ -395,7 +399,7 @@ WarpX::SyncRho () { rho_cp[lev]->setVal(0.0); const IntVect& refinement_ratio = refRatio(lev-1); - SyncRho(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); + interpolateDensityFineToCoarse(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); } // For each level @@ -408,29 +412,26 @@ WarpX::SyncRho () } void -WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio) +WarpX::interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio) { + BL_PROFILE("interpolateDensityFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); - const IntVect& ng = (fine.nGrowVect()+1)/refinement_ratio; + const IntVect& ng = (fine.nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch const int nc = fine.nComp(); #ifdef _OPEMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { - FArrayBox ffab; - for (MFIter mfi(crse,true); mfi.isValid(); ++mfi) + // OMP in-box decomposition of coarse into tilebox + for (MFIter mfi(coarse, TilingIfNotGPU()); mfi.isValid(); ++mfi) { - const Box& bx = mfi.growntilebox(ng); - Box fbx = amrex::grow(amrex::refine(bx,refinement_ratio),1); - ffab.resize(fbx, nc); - fbx &= fine[mfi].box(); - ffab.setVal(0.0); - ffab.copy(fine[mfi], fbx, 0, fbx, 0, nc); - WRPX_SYNC_RHO(bx.loVect(), bx.hiVect(), - BL_TO_FORTRAN_ANYD(crse[mfi]), - BL_TO_FORTRAN_ANYD(ffab), - &nc); + const Box& bx = mfi.growntilebox(ng); // only grow to outer directions of tileboxes for filling guards + + amrex::ParallelFor( + bx, + InterpolateDensityFineToCoarse(fine.const_array(mfi), coarse.array(mfi), refinement_ratio, nc) + ); } } } @@ -562,7 +563,7 @@ WarpX::RestrictRhoFromFineToCoarsePatch (int lev) if (rho_fp[lev]) { rho_cp[lev]->setVal(0.0); const IntVect& refinement_ratio = refRatio(lev-1); - SyncRho(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); + interpolateDensityFineToCoarse(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); } } diff --git a/Source/WarpX.H b/Source/WarpX.H index c59802427..0da1cf350 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -436,10 +436,12 @@ private: * averaging the values of the charge density of the fine patch (on the same level). * * \param[in] fine fine patches to interpolate from - * \param[out] crse coarse patches to interpolate to - * \param[in] ref_ratio integer ratio between the two + * \param[out] coarse coarse patches to interpolate to + * \param[in] refinement_ratio integer ratio between the two */ - void SyncRho (const amrex::MultiFab& fine, amrex::MultiFab& crse, int ref_ratio); + void interpolateDensityFineToCoarse (const amrex::MultiFab& fine, + amrex::MultiFab& coarse, + int const refinement_ratio); void ExchangeWithPmlB (int lev); void ExchangeWithPmlE (int lev); -- cgit v1.2.3 From 6b70e4abd40d70b48bf4a40cd1a7675a77c8a7ff Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 4 Oct 2019 16:38:41 -0700 Subject: fix for cases with warpx.do_nodal=1 --- Source/Parallelization/WarpXComm.cpp | 10 +++++----- Source/WarpX.H | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 6128b27a4..c5a8877d3 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -52,15 +52,15 @@ WarpX::UpdateAuxilaryData () { BL_PROFILE("UpdateAuxilaryData()"); - if (Bfield_aux[0][0]->is_nodal()) { - UpdateNodalAuxilaryData(); + if (Bfield_aux[0][0]->ixType() == Bfield_fp[0][0]->ixType()) { + UpdateAuxilaryDataSameType(); } else { - UpdateStagAuxilaryData(); + UpdateAuxilaryDataStagToNodal(); } } void -WarpX::UpdateNodalAuxilaryData () +WarpX::UpdateAuxilaryDataStagToNodal () { // For level 0, we only need to do the average. #ifdef _OPENMP @@ -201,7 +201,7 @@ WarpX::UpdateNodalAuxilaryData () } void -WarpX::UpdateStagAuxilaryData () +WarpX::UpdateAuxilaryDataSameType () { for (int lev = 1; lev <= finest_level; ++lev) { diff --git a/Source/WarpX.H b/Source/WarpX.H index 561e0533d..dd86dfc14 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -203,8 +203,8 @@ public: // This function does aux(lev) = fp(lev) + I(aux(lev-1)-cp(lev)). // Caller must make sure fp and cp have ghost cells filled. void UpdateAuxilaryData (); - void UpdateNodalAuxilaryData (); - void UpdateStagAuxilaryData (); + void UpdateAuxilaryDataStagToNodal (); + void UpdateAuxilaryDataSameType (); // Fill boundary cells including coarse/fine boundaries void FillBoundaryB (); -- cgit v1.2.3 From aea192d6ff5aabfedf09c9f139e831fb5d7bbdd6 Mon Sep 17 00:00:00 2001 From: Remi Lehe Date: Mon, 11 Nov 2019 20:43:52 -0800 Subject: Update DepositCharge --- Source/Parallelization/Make.package | 1 + Source/Parallelization/WarpXComm.cpp | 14 +++++++------- Source/Particles/WarpXParticleContainer.cpp | 9 +++++---- Source/WarpX.H | 23 ----------------------- 4 files changed, 13 insertions(+), 34 deletions(-) (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/Make.package b/Source/Parallelization/Make.package index c74583522..7c3c38627 100644 --- a/Source/Parallelization/Make.package +++ b/Source/Parallelization/Make.package @@ -2,6 +2,7 @@ CEXE_sources += WarpXComm.cpp CEXE_sources += WarpXRegrid.cpp CEXE_headers += WarpXSumGuardCells.H CEXE_headers += WarpXComm_K.H +CEXE_headers += WarpXComm.H INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Parallelization VPATH_LOCATIONS += $(WARPX_HOME)/Source/Parallelization diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 52df3dc25..851b78748 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -1,14 +1,14 @@ +#include "WarpXComm.H" #include #include #include #include -#include -#include +#include +#include #include #include - using namespace amrex; void @@ -503,9 +503,9 @@ WarpX::SyncCurrent () } void -WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine, - std::array< amrex::MultiFab *, 3 > const & coarse, - int const refinement_ratio) +interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine, + std::array< amrex::MultiFab *, 3 > const & coarse, + int const refinement_ratio) { BL_PROFILE("interpolateCurrentFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); @@ -563,7 +563,7 @@ WarpX::SyncRho () } void -WarpX::interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio) +interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio) { BL_PROFILE("interpolateDensityFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 6f195fa87..4be339707 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -7,7 +7,7 @@ #include #include #include - +#include // Import low-level single-particle kernels #include #include @@ -505,7 +505,8 @@ WarpXParticleContainer::DepositCharge (Vector >& rho, bool local, bool reset) { // Loop over the refinement levels - for (int lev = 0; lev < max_level; ++lev) { + int const finest_level = rho.size() - 1; + for (int lev = 0; lev < finest_level; ++lev) { // Reset the `rho` array if `reset` is True if (reset) rho[lev]->setVal(0.0, rho[lev]->nGrow()); @@ -557,8 +558,8 @@ WarpXParticleContainer::DepositCharge (Vector >& rho, int const refinement_ratio = 2; - interpolateDensityFineToCoarse( rho[lev+1], coarsened_fine_data, refinement_ratio ); - rho[lev].ParallelAdd( coarsened_fine_data, m_gdb->Geom(lev).periodicity() ); + interpolateDensityFineToCoarse( *rho[lev+1], coarsened_fine_data, refinement_ratio ); + rho[lev]->ParallelAdd( coarsened_fine_data, m_gdb->Geom(lev).periodicity() ); } } diff --git a/Source/WarpX.H b/Source/WarpX.H index 4275df3c5..d17787c57 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -419,29 +419,6 @@ private: std::array, 3> getInterpolatedB(int lev) const; - /** \brief Fills the values of the current on the coarse patch by - * averaging the values of the current of the fine patch (on the same level). - * Also fills the guards of the coarse patch. - * - * \param[in] fine fine patches to interpolate from - * \param[out] coarse coarse patches to interpolate to - * \param[in] refinement_ratio integer ratio between the two - */ - void interpolateCurrentFineToCoarse (std::array< amrex::MultiFab const *, 3 > const & fine, - std::array< amrex::MultiFab *, 3 > const & coarse, - int const refinement_ratio); - - /** \brief Fills the values of the charge density on the coarse patch by - * averaging the values of the charge density of the fine patch (on the same level). - * - * \param[in] fine fine patches to interpolate from - * \param[out] coarse coarse patches to interpolate to - * \param[in] refinement_ratio integer ratio between the two - */ - void interpolateDensityFineToCoarse (const amrex::MultiFab& fine, - amrex::MultiFab& coarse, - int const refinement_ratio); - void ExchangeWithPmlB (int lev); void ExchangeWithPmlE (int lev); void ExchangeWithPmlF (int lev); -- cgit v1.2.3 From 38ffda500337187c6f51422ccdd0ac208b483691 Mon Sep 17 00:00:00 2001 From: Remi Lehe Date: Tue, 12 Nov 2019 09:35:13 -0800 Subject: Remove Fortran files --- Source/Parallelization/WarpXComm.cpp | 9 +- Source/Particles/Make.package | 1 - Source/Particles/WarpXParticleContainer.cpp | 1 + Source/Particles/deposit_cic.F90 | 134 ---------------------------- 4 files changed, 6 insertions(+), 139 deletions(-) delete mode 100644 Source/Particles/deposit_cic.F90 (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 52df3dc25..529d52604 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -503,9 +504,9 @@ WarpX::SyncCurrent () } void -WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine, - std::array< amrex::MultiFab *, 3 > const & coarse, - int const refinement_ratio) +interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine, + std::array< amrex::MultiFab *, 3 > const & coarse, + int const refinement_ratio) { BL_PROFILE("interpolateCurrentFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); @@ -563,7 +564,7 @@ WarpX::SyncRho () } void -WarpX::interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio) +interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio) { BL_PROFILE("interpolateDensityFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package index a6c124ddd..6d34fa0ef 100644 --- a/Source/Particles/Make.package +++ b/Source/Particles/Make.package @@ -1,4 +1,3 @@ -F90EXE_sources += deposit_cic.F90 F90EXE_sources += interpolate_cic.F90 F90EXE_sources += push_particles_ES.F90 diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 51ed84ea7..2a5be6f5d 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/Source/Particles/deposit_cic.F90 b/Source/Particles/deposit_cic.F90 deleted file mode 100644 index 2831ce96b..000000000 --- a/Source/Particles/deposit_cic.F90 +++ /dev/null @@ -1,134 +0,0 @@ -module warpx_ES_deposit_cic - - use iso_c_binding - use amrex_fort_module, only : amrex_real, amrex_particle_real - - implicit none - -contains - -! This routine computes the charge density due to the particles using cloud-in-cell -! deposition. The Fab rho is assumed to be node-centered. -! -! Arguments: -! particles : a pointer to the particle array-of-structs -! ns : the stride length of particle struct (the size of the struct in number of reals) -! np : the number of particles -! weights : the particle weights -! charge : the charge of this particle species -! rho : a Fab that will contain the charge density on exit -! lo : a pointer to the lo corner of this valid box for rho, in index space -! hi : a pointer to the hi corner of this valid box for rho, in index space -! plo : the real position of the left-hand corner of the problem domain -! dx : the mesh spacing -! ng : the number of ghost cells in rho -! - subroutine warpx_deposit_cic_3d(particles, ns, np, & - weights, charge, rho, lo, hi, plo, dx, & - ng) & - bind(c,name='warpx_deposit_cic_3d') - integer, value, intent(in) :: ns, np - real(amrex_particle_real), intent(in) :: particles(ns,np) - real(amrex_particle_real), intent(in) :: weights(np) - real(amrex_real), intent(in) :: charge - integer, intent(in) :: lo(3) - integer, intent(in) :: hi(3) - integer, intent(in) :: ng - real(amrex_real), intent(inout) :: rho(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng) - real(amrex_real), intent(in) :: plo(3) - real(amrex_real), intent(in) :: dx(3) - - integer i, j, k, n - real(amrex_real) wx_lo, wy_lo, wz_lo, wx_hi, wy_hi, wz_hi - real(amrex_real) lx, ly, lz - real(amrex_real) inv_dx(3) - real(amrex_real) qp, inv_vol - - inv_dx = 1.0d0/dx - - inv_vol = inv_dx(1) * inv_dx(2) * inv_dx(3) - - do n = 1, np - - qp = weights(n) * charge * inv_vol - - lx = (particles(1, n) - plo(1))*inv_dx(1) - ly = (particles(2, n) - plo(2))*inv_dx(2) - lz = (particles(3, n) - plo(3))*inv_dx(3) - - i = floor(lx) - j = floor(ly) - k = floor(lz) - - wx_hi = lx - i - wy_hi = ly - j - wz_hi = lz - k - - wx_lo = 1.0d0 - wx_hi - wy_lo = 1.0d0 - wy_hi - wz_lo = 1.0d0 - wz_hi - - rho(i, j, k ) = rho(i, j, k ) + wx_lo*wy_lo*wz_lo*qp - rho(i, j, k+1) = rho(i, j, k+1) + wx_lo*wy_lo*wz_hi*qp - rho(i, j+1, k ) = rho(i, j+1, k ) + wx_lo*wy_hi*wz_lo*qp - rho(i, j+1, k+1) = rho(i, j+1, k+1) + wx_lo*wy_hi*wz_hi*qp - rho(i+1, j, k ) = rho(i+1, j, k ) + wx_hi*wy_lo*wz_lo*qp - rho(i+1, j, k+1) = rho(i+1, j, k+1) + wx_hi*wy_lo*wz_hi*qp - rho(i+1, j+1, k ) = rho(i+1, j+1, k ) + wx_hi*wy_hi*wz_lo*qp - rho(i+1, j+1, k+1) = rho(i+1, j+1, k+1) + wx_hi*wy_hi*wz_hi*qp - - end do - - end subroutine warpx_deposit_cic_3d - - subroutine warpx_deposit_cic_2d(particles, ns, np, & - weights, charge, rho, lo, hi, plo, dx, & - ng) & - bind(c,name='warpx_deposit_cic_2d') - integer, value, intent(in) :: ns, np - real(amrex_particle_real), intent(in) :: particles(ns,np) - real(amrex_particle_real), intent(in) :: weights(np) - real(amrex_real), intent(in) :: charge - integer, intent(in) :: lo(2) - integer, intent(in) :: hi(2) - integer, intent(in) :: ng - real(amrex_real), intent(inout) :: rho(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng) - real(amrex_real), intent(in) :: plo(2) - real(amrex_real), intent(in) :: dx(2) - - integer i, j, n - real(amrex_real) wx_lo, wy_lo, wx_hi, wy_hi - real(amrex_real) lx, ly - real(amrex_real) inv_dx(2) - real(amrex_real) qp, inv_vol - - inv_dx = 1.0d0/dx - - inv_vol = inv_dx(1) * inv_dx(2) - - do n = 1, np - - qp = weights(n) * charge * inv_vol - - lx = (particles(1, n) - plo(1))*inv_dx(1) - ly = (particles(2, n) - plo(2))*inv_dx(2) - - i = floor(lx) - j = floor(ly) - - wx_hi = lx - i - wy_hi = ly - j - - wx_lo = 1.0d0 - wx_hi - wy_lo = 1.0d0 - wy_hi - - rho(i, j ) = rho(i, j ) + wx_lo*wy_lo*qp - rho(i, j+1) = rho(i, j+1) + wx_lo*wy_hi*qp - rho(i+1, j ) = rho(i+1, j ) + wx_hi*wy_lo*qp - rho(i+1, j+1) = rho(i+1, j+1) + wx_hi*wy_hi*qp - - end do - - end subroutine warpx_deposit_cic_2d - -end module warpx_ES_deposit_cic -- cgit v1.2.3