From 2fe4cae1b0da80e04f2cc0a7865ff86ac64e8a39 Mon Sep 17 00:00:00 2001 From: Remi Lehe Date: Sat, 14 Sep 2019 10:52:08 -0700 Subject: Fix charge deposition with MR --- Source/Parallelization/WarpXComm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 1c8c37cad..e24dd772c 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -688,7 +688,7 @@ WarpX::AddRhoFromFineLevelandSumBoundary(int lev, int icomp, int ncomp) } else if (charge_buf[lev+1]) // but no filter { - MultiFab::Copy(*charge_buf[lev+1], + MultiFab::Add(*charge_buf[lev+1], *rho_cp[lev+1], icomp, icomp, ncomp, rho_cp[lev+1]->nGrow()); mf.ParallelAdd(*charge_buf[lev+1], icomp, 0, -- cgit v1.2.3 From 22e42395271564fbca74a5a4da0b13b265cede61 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 16 Sep 2019 19:06:03 -0700 Subject: implement the energy conserving way of interploating from coarse to fine for auxilary data and remove the high-order divB-preserving option. --- Source/Parallelization/Make.package | 1 + Source/Parallelization/WarpXComm.cpp | 147 +++++++++++--------------------- Source/Parallelization/WarpXComm_K.H | 161 +++++++++++++++++++++++++++++++++++ 3 files changed, 213 insertions(+), 96 deletions(-) create mode 100644 Source/Parallelization/WarpXComm_K.H (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/Make.package b/Source/Parallelization/Make.package index 3d1fcf1da..c74583522 100644 --- a/Source/Parallelization/Make.package +++ b/Source/Parallelization/Make.package @@ -1,6 +1,7 @@ CEXE_sources += WarpXComm.cpp CEXE_sources += WarpXRegrid.cpp CEXE_headers += WarpXSumGuardCells.H +CEXE_headers += WarpXComm_K.H INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Parallelization VPATH_LOCATIONS += $(WARPX_HOME)/Source/Parallelization diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index e24dd772c..990d0f988 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -1,9 +1,8 @@ +#include #include #include #include -#include - #include #include @@ -52,8 +51,6 @@ WarpX::UpdateAuxilaryData () { BL_PROFILE("UpdateAuxilaryData()"); - const int use_limiter = 0; - for (int lev = 1; lev <= finest_level; ++lev) { const auto& crse_period = Geom(lev-1).periodicity(); @@ -81,57 +78,37 @@ WarpX::UpdateAuxilaryData () MultiFab::Subtract(dBy, *Bfield_cp[lev][1], 0, 0, Bfield_cp[lev][1]->nComp(), ng); MultiFab::Subtract(dBz, *Bfield_cp[lev][2], 0, 0, Bfield_cp[lev][2]->nComp(), ng); - const Real* dx = Geom(lev-1).CellSize(); const int refinement_ratio = refRatio(lev-1)[0]; + AMREX_ALWAYS_ASSERT(refinement_ratio == 2); + #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif + for (MFIter mfi(*Bfield_aux[lev][0]); mfi.isValid(); ++mfi) { - std::array bfab; - for (MFIter mfi(*Bfield_aux[lev][0]); mfi.isValid(); ++mfi) + Array4 const& bx_aux = Bfield_aux[lev][0]->array(mfi); + Array4 const& by_aux = Bfield_aux[lev][1]->array(mfi); + Array4 const& bz_aux = Bfield_aux[lev][2]->array(mfi); + Array4 const& bx_fp = Bfield_fp[lev][0]->const_array(mfi); + Array4 const& by_fp = Bfield_fp[lev][1]->const_array(mfi); + Array4 const& bz_fp = Bfield_fp[lev][2]->const_array(mfi); + Array4 const& bx_c = dBx.const_array(mfi); + Array4 const& by_c = dBy.const_array(mfi); + Array4 const& bz_c = dBz.const_array(mfi); + + amrex::ParallelFor(Box(bx_aux), Box(by_aux), Box(bz_aux), + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept { - Box ccbx = mfi.fabbox(); - ccbx.enclosedCells(); - ccbx.coarsen(refinement_ratio).refine(refinement_ratio); // so that ccbx is coarsenable - - const FArrayBox& cxfab = dBx[mfi]; - const FArrayBox& cyfab = dBy[mfi]; - const FArrayBox& czfab = dBz[mfi]; - bfab[0].resize(amrex::convert(ccbx,Bx_nodal_flag)); - bfab[1].resize(amrex::convert(ccbx,By_nodal_flag)); - bfab[2].resize(amrex::convert(ccbx,Bz_nodal_flag)); - -#if (AMREX_SPACEDIM == 3) - amrex_interp_div_free_bfield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(bfab[0]), - BL_TO_FORTRAN_ANYD(bfab[1]), - BL_TO_FORTRAN_ANYD(bfab[2]), - BL_TO_FORTRAN_ANYD(cxfab), - BL_TO_FORTRAN_ANYD(cyfab), - BL_TO_FORTRAN_ANYD(czfab), - dx, &refinement_ratio,&use_limiter); -#else - amrex_interp_div_free_bfield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(bfab[0]), - BL_TO_FORTRAN_ANYD(bfab[2]), - BL_TO_FORTRAN_ANYD(cxfab), - BL_TO_FORTRAN_ANYD(czfab), - dx, &refinement_ratio,&use_limiter); - amrex_interp_cc_bfield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(bfab[1]), - BL_TO_FORTRAN_ANYD(cyfab), - &refinement_ratio,&use_limiter); -#endif - - for (int idim = 0; idim < 3; ++idim) - { - FArrayBox& aux = (*Bfield_aux[lev][idim])[mfi]; - FArrayBox& fp = (*Bfield_fp[lev][idim])[mfi]; - const Box& bx = aux.box(); - aux.copy(fp, bx, 0, bx, 0, 1); - aux.plus(bfab[idim], bx, bx, 0, 0, 1); - } - } + warpx_interp_bfield_x(j,k,l, bx_aux, bx_fp, bx_c); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_bfield_y(j,k,l, by_aux, by_fp, by_c); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_bfield_z(j,k,l, bz_aux, bz_fp, bz_c); + }); } } @@ -156,56 +133,34 @@ WarpX::UpdateAuxilaryData () MultiFab::Subtract(dEy, *Efield_cp[lev][1], 0, 0, Efield_cp[lev][1]->nComp(), ng); MultiFab::Subtract(dEz, *Efield_cp[lev][2], 0, 0, Efield_cp[lev][2]->nComp(), ng); - const int refinement_ratio = refRatio(lev-1)[0]; #ifdef _OPEMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif + for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi) { - std::array efab; - for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi) + Array4 const& ex_aux = Efield_aux[lev][0]->array(mfi); + Array4 const& ey_aux = Efield_aux[lev][1]->array(mfi); + Array4 const& ez_aux = Efield_aux[lev][2]->array(mfi); + Array4 const& ex_fp = Efield_fp[lev][0]->const_array(mfi); + Array4 const& ey_fp = Efield_fp[lev][1]->const_array(mfi); + Array4 const& ez_fp = Efield_fp[lev][2]->const_array(mfi); + Array4 const& ex_c = dEx.const_array(mfi); + Array4 const& ey_c = dEy.const_array(mfi); + Array4 const& ez_c = dEz.const_array(mfi); + + amrex::ParallelFor(Box(ex_aux), Box(ey_aux), Box(ez_aux), + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept { - Box ccbx = mfi.fabbox(); - ccbx.enclosedCells(); - ccbx.coarsen(refinement_ratio).refine(refinement_ratio); // so that ccbx is coarsenable - - const FArrayBox& cxfab = dEx[mfi]; - const FArrayBox& cyfab = dEy[mfi]; - const FArrayBox& czfab = dEz[mfi]; - efab[0].resize(amrex::convert(ccbx,Ex_nodal_flag)); - efab[1].resize(amrex::convert(ccbx,Ey_nodal_flag)); - efab[2].resize(amrex::convert(ccbx,Ez_nodal_flag)); - -#if (AMREX_SPACEDIM == 3) - amrex_interp_efield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(efab[0]), - BL_TO_FORTRAN_ANYD(efab[1]), - BL_TO_FORTRAN_ANYD(efab[2]), - BL_TO_FORTRAN_ANYD(cxfab), - BL_TO_FORTRAN_ANYD(cyfab), - BL_TO_FORTRAN_ANYD(czfab), - &refinement_ratio,&use_limiter); -#else - amrex_interp_efield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(efab[0]), - BL_TO_FORTRAN_ANYD(efab[2]), - BL_TO_FORTRAN_ANYD(cxfab), - BL_TO_FORTRAN_ANYD(czfab), - &refinement_ratio,&use_limiter); - amrex_interp_nd_efield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(efab[1]), - BL_TO_FORTRAN_ANYD(cyfab), - &refinement_ratio); -#endif - - for (int idim = 0; idim < 3; ++idim) - { - FArrayBox& aux = (*Efield_aux[lev][idim])[mfi]; - FArrayBox& fp = (*Efield_fp[lev][idim])[mfi]; - const Box& bx = aux.box(); - aux.copy(fp, bx, 0, bx, 0, Efield_fp[lev][idim]->nComp()); - aux.plus(efab[idim], bx, bx, 0, 0, Efield_fp[lev][idim]->nComp()); - } - } + warpx_interp_efield_x(j,k,l, ex_aux, ex_fp, ex_c); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_efield_y(j,k,l, ey_aux, ey_fp, ey_c); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_efield_z(j,k,l, ez_aux, ez_fp, ez_c); + }); } } } diff --git a/Source/Parallelization/WarpXComm_K.H b/Source/Parallelization/WarpXComm_K.H new file mode 100644 index 000000000..093323ec3 --- /dev/null +++ b/Source/Parallelization/WarpXComm_K.H @@ -0,0 +1,161 @@ +#ifndef WARPX_COMM_K_H_ +#define WARPX_COMM_K_H_ + +#include + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_bfield_x (int j, int k, int l, + amrex::Array4 const& Bxa, + amrex::Array4 const& Bxf, + amrex::Array4 const& Bxc) +{ + using namespace amrex; + + int lg = amrex::coarsen(l,2); + int kg = amrex::coarsen(k,2); + int jg = amrex::coarsen(j,2); + + Real wx = (j == jg*2) ? 0.0 : 0.5; + Real owx = 1.0-wx; + Bxa(j,k,l) = owx * Bxc(jg,kg,lg) + wx * Bxc(jg+1,kg,lg) + Bxf(j,k,l); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_bfield_y (int j, int k, int l, + amrex::Array4 const& Bya, + amrex::Array4 const& Byf, + amrex::Array4 const& Byc) +{ + using namespace amrex; + + int lg = amrex::coarsen(l,2); + int kg = amrex::coarsen(k,2); + int jg = amrex::coarsen(j,2); + + // Note that for 2d, l=0, because the amrex convention is used here. + +#if (AMREX_SPACEDIM == 3) + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + Bya(j,k,l) = owy * Byc(jg,kg,lg) + wy * Byc(jg,kg+1,lg) + Byf(j,k,l); +#else + Bya(j,k,l) = Byc(jg,kg,lg) + Byf(j,k,l); +#endif +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_bfield_z (int j, int k, int l, + amrex::Array4 const& Bza, + amrex::Array4 const& Bzf, + amrex::Array4 const& Bzc) +{ + using namespace amrex; + + int lg = amrex::coarsen(l,2); + int kg = amrex::coarsen(k,2); + int jg = amrex::coarsen(j,2); + + // Note that for 2d, l=0, because the amrex convention is used here. + +#if (AMREX_SPACEDIM == 3) + Real wz = (l == lg*2) ? 0.0 : 0.5; + Real owz = 1.0-wz; + Bza(j,k,l) = owz * Bzc(jg,kg,lg) + owz * Bzc(jg,kg,lg+1) + Bzf(j,k,l); +#else + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + Bza(j,k,l) = owy * Bzc(jg,kg,lg) + owy * Bzc(jg,kg+1,lg) + Bzf(j,k,l); +#endif +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_efield_x (int j, int k, int l, + amrex::Array4 const& Exa, + amrex::Array4 const& Exf, + amrex::Array4 const& Exc) +{ + using namespace amrex; + + int lg = amrex::coarsen(l,2); + int kg = amrex::coarsen(k,2); + int jg = amrex::coarsen(j,2); + + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + +#if (AMREX_SPACEDIM == 3) + Real wz = (l == lg*2) ? 0.0 : 0.5; + Real owz = 1.0-wz; + Exa(j,k,l) = owy * owz * Exc(jg ,kg ,lg ) + + wy * owz * Exc(jg ,kg+1,lg ) + + owy * wz * Exc(jg ,kg ,lg+1) + + wy * wz * Exc(jg ,kg+1,lg+1) + + Exf(j,k,l); +#else + Exa(j,k,l) = owy * Exc(jg,kg,lg) + wy * Exc(jg,kg+1,lg) + Exf(j,k,l); +#endif +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_efield_y (int j, int k, int l, + amrex::Array4 const& Eya, + amrex::Array4 const& Eyf, + amrex::Array4 const& Eyc) +{ + using namespace amrex; + + int lg = amrex::coarsen(l,2); + int kg = amrex::coarsen(k,2); + int jg = amrex::coarsen(j,2); + + Real wx = (j == jg*2) ? 0.0 : 0.5; + Real owx = 1.0-wx; + +#if (AMREX_SPACEDIM == 3) + Real wz = (l == lg*2) ? 0.0 : 0.5; + Real owz = 1.0-wz; + Eya(j,k,l) = owx * owz * Eyc(jg ,kg ,lg ) + + wx * owz * Eyc(jg+1,kg ,lg ) + + owx * wz * Eyc(jg ,kg ,lg+1) + + wx * wz * Eyc(jg+1,kg ,lg+1) + + Eyf(j,k,l); +#else + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + Eya(j,k,l) = owx * owy * Eyc(jg ,kg ,lg) + + wx * owy * Eyc(jg+1,kg ,lg) + + owx * wy * Eyc(jg ,kg+1,lg) + + wx * wy * Eyc(jg+1,kg+1,lg) + + Eyf(j,k,l); +#endif +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_efield_z (int j, int k, int l, + amrex::Array4 const& Eza, + amrex::Array4 const& Ezf, + amrex::Array4 const& Ezc) +{ + using namespace amrex; + + int lg = amrex::coarsen(l,2); + int kg = amrex::coarsen(k,2); + int jg = amrex::coarsen(j,2); + + Real wx = (j == jg*2) ? 0.0 : 0.5; + Real owx = 1.0-wx; + +#if (AMREX_SPACEDIM == 3) + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + Eza(j,k,l) = owx * owy * Ezc(jg ,kg ,lg ) + + wx * owy * Ezc(jg+1,kg ,lg ) + + owx * wy * Ezc(jg ,kg+1,lg ) + + wx * wy * Ezc(jg+1,kg+1,lg ) + + Ezf(j,k,l); +#else + Eza(j,k,l) = owx * Ezc(jg,kg,lg) + wx * Ezc(jg+1,kg,lg) + Ezf(j,k,l); +#endif +} + +#endif -- cgit v1.2.3 From 15658267709e5a53a11ef6f945d1bfb283f994a6 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Tue, 24 Sep 2019 16:25:49 -0700 Subject: Current Synchronize: Port to GPU Port the current synchronize functions to GPU. --- Source/FortranInterface/WarpX_f.F90 | 110 ------------------ Source/FortranInterface/WarpX_f.H | 2 - Source/Parallelization/CurrentSynchronize.H | 171 ++++++++++++++++++++++++++++ Source/Parallelization/WarpXComm.cpp | 44 ++++--- Source/WarpX.H | 21 +++- 5 files changed, 209 insertions(+), 139 deletions(-) create mode 100644 Source/Parallelization/CurrentSynchronize.H (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/FortranInterface/WarpX_f.F90 b/Source/FortranInterface/WarpX_f.F90 index 542cc95a1..2ddf9409d 100644 --- a/Source/FortranInterface/WarpX_f.F90 +++ b/Source/FortranInterface/WarpX_f.F90 @@ -51,116 +51,6 @@ contains end subroutine warpx_compute_E - subroutine warpx_sync_current_2d (lo, hi, crse, clo, chi, fine, flo, fhi, dir) & - bind(c, name='warpx_sync_current_2d') - integer, intent(in) :: lo(2), hi(2), flo(2), fhi(2), clo(2), chi(2), dir - real(amrex_real), intent(in ) :: fine(flo(1):fhi(1),flo(2):fhi(2)) - real(amrex_real), intent(inout) :: crse(clo(1):chi(1),clo(2):chi(2)) - - integer :: i,j,ii,jj - - if (dir == 0) then - do j = lo(2), hi(2) - jj = j*2 - do i = lo(1), hi(1) - ii = i*2 - crse(i,j) = 0.25d0 * (fine(ii,jj) + fine(ii+1,jj) & - + 0.5d0*(fine(ii,jj-1) + fine(ii+1,jj-1) + fine(ii,jj+1) + fine(ii+1,jj+1)) ) - end do - end do - else if (dir == 2) then - do j = lo(2), hi(2) - jj = j*2 - do i = lo(1), hi(1) - ii = i*2 - crse(i,j) = 0.25d0 * (fine(ii,jj) + fine(ii,jj+1) & - + 0.5d0*(fine(ii-1,jj) + fine(ii-1,jj+1) + fine(ii+1,jj) + fine(ii+1,jj+1)) ) - end do - end do - else - do j = lo(2), hi(2) - jj = j*2 - do i = lo(1), hi(1) - ii = i*2 - crse(i,j) = 0.25d0 * & - ( fine(ii,jj) + 0.5d0 *(fine(ii-1,jj )+fine(ii+1,jj ) & - & + fine(ii ,jj-1)+fine(ii ,jj+1)) & - & + 0.25d0*(fine(ii-1,jj-1)+fine(ii+1,jj-1) & - & + fine(ii-1,jj+1)+fine(ii+1,jj+1)) ) - end do - end do - end if - end subroutine warpx_sync_current_2d - - subroutine warpx_sync_current_3d (lo, hi, crse, clo, chi, fine, flo, fhi, dir) & - bind(c, name='warpx_sync_current_3d') - integer, intent(in) :: lo(3), hi(3), flo(3), fhi(3), clo(3), chi(3), dir - real(amrex_real), intent(in ) :: fine(flo(1):fhi(1),flo(2):fhi(2),flo(3):fhi(3)) - real(amrex_real), intent(inout) :: crse(clo(1):chi(1),clo(2):chi(2),clo(3):chi(3)) - - integer :: i,j,k, ii,jj,kk - - if (dir == 0) then - do k = lo(3), hi(3) - kk = k*2 - do j = lo(2), hi(2) - jj = j*2 - do i = lo(1), hi(1) - ii = i*2 - crse(i,j,k) = 0.125d0* & - ( fine(ii ,jj,kk) + 0.5d0 *(fine(ii ,jj-1,kk )+fine(ii ,jj+1,kk ) & - & + fine(ii ,jj ,kk-1)+fine(ii ,jj ,kk+1)) & - & + 0.25d0*(fine(ii ,jj-1,kk-1)+fine(ii ,jj+1,kk-1) & - & + fine(ii ,jj-1,kk+1)+fine(ii ,jj+1,kk+1)) & - + fine(ii+1,jj,kk) + 0.5d0 *(fine(ii+1,jj-1,kk )+fine(ii+1,jj+1,kk ) & - & + fine(ii+1,jj ,kk-1)+fine(ii+1,jj ,kk+1)) & - & + 0.25d0*(fine(ii+1,jj-1,kk-1)+fine(ii+1,jj+1,kk-1) & - & + fine(ii+1,jj-1,kk+1)+fine(ii+1,jj+1,kk+1)) ) - end do - end do - end do - else if (dir == 1) then - do k = lo(3), hi(3) - kk = k*2 - do j = lo(2), hi(2) - jj = j*2 - do i = lo(1), hi(1) - ii = i*2 - crse(i,j,k) = 0.125d0* & - ( fine(ii,jj ,kk) + 0.5d0 *(fine(ii-1,jj ,kk )+fine(ii+1,jj ,kk ) & - & + fine(ii ,jj ,kk-1)+fine(ii ,jj ,kk+1)) & - & + 0.25d0*(fine(ii-1,jj ,kk-1)+fine(ii+1,jj ,kk-1) & - & + fine(ii-1,jj ,kk+1)+fine(ii+1,jj ,kk+1)) & - + fine(ii,jj+1,kk) + 0.5d0 *(fine(ii-1,jj+1,kk )+fine(ii+1,jj+1,kk ) & - & + fine(ii ,jj+1,kk-1)+fine(ii ,jj+1,kk+1)) & - & + 0.25d0*(fine(ii-1,jj+1,kk-1)+fine(ii+1,jj+1,kk-1) & - & + fine(ii-1,jj+1,kk+1)+fine(ii+1,jj+1,kk+1)) ) - end do - end do - end do - else - do k = lo(3), hi(3) - kk = k*2 - do j = lo(2), hi(2) - jj = j*2 - do i = lo(1), hi(1) - ii = i*2 - crse(i,j,k) = 0.125d0* & - ( fine(ii,jj,kk ) + 0.5d0 *(fine(ii-1,jj ,kk )+fine(ii+1,jj ,kk ) & - & + fine(ii ,jj-1,kk )+fine(ii ,jj+1,kk )) & - & + 0.25d0*(fine(ii-1,jj-1,kk )+fine(ii+1,jj-1,kk ) & - & + fine(ii-1,jj+1,kk )+fine(ii+1,jj+1,kk )) & - + fine(ii,jj,kk+1) + 0.5d0 *(fine(ii-1,jj ,kk+1)+fine(ii+1,jj ,kk+1) & - & + fine(ii ,jj-1,kk+1)+fine(ii ,jj+1,kk+1)) & - & + 0.25d0*(fine(ii-1,jj-1,kk+1)+fine(ii+1,jj-1,kk+1) & - & + fine(ii-1,jj+1,kk+1)+fine(ii+1,jj+1,kk+1)) ) - end do - end do - end do - end if - end subroutine warpx_sync_current_3d - - subroutine warpx_sync_rho_2d (lo, hi, crse, clo, chi, fine, flo, fhi, nc) & bind(c, name='warpx_sync_rho_2d') integer, intent(in) :: lo(2), hi(2), flo(2), fhi(2), clo(2), chi(2), nc diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H index 00212aec9..168fd6415 100644 --- a/Source/FortranInterface/WarpX_f.H +++ b/Source/FortranInterface/WarpX_f.H @@ -16,7 +16,6 @@ #if (AMREX_SPACEDIM == 3) -#define WRPX_SYNC_CURRENT warpx_sync_current_3d #define WRPX_SYNC_RHO warpx_sync_rho_3d #define WRPX_SUM_FINE_TO_CRSE_NODAL warpx_sum_fine_to_crse_nodal_3d @@ -31,7 +30,6 @@ #elif (AMREX_SPACEDIM == 2) -#define WRPX_SYNC_CURRENT warpx_sync_current_2d #define WRPX_SYNC_RHO warpx_sync_rho_2d #define WRPX_SUM_FINE_TO_CRSE_NODAL warpx_sum_fine_to_crse_nodal_2d diff --git a/Source/Parallelization/CurrentSynchronize.H b/Source/Parallelization/CurrentSynchronize.H new file mode 100644 index 000000000..ffc9d3479 --- /dev/null +++ b/Source/Parallelization/CurrentSynchronize.H @@ -0,0 +1,171 @@ +/* Copyright 2019 Axel Huebl + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef WARPX_CURRENTSYNCHRONIZE_H +#define WARPX_CURRENTSYNCHRONIZE_H + +#include +#include +#include +#include +#include + + +/** Fill a current coarse patch with averaged values from a fine patch + * + * Fills the values of the current for a selected component on the coarse patch + * by averaging the values of the current of the fine patch. + * + * @tparam IDim j-field component on which the averaging is performed + */ +template< int IDim > +class WarpxSyncCurrent +{ +public: + /** Construct with fine and coarse patch and their refinement ratio + * + * @param[in] fine read-only fine patch + * @param[in,out] coarse overwritten coarse patch + * @param[in] refinement_ratio ratio between coarse and fine patch granularity + * (currently, only a value of is implemented) + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + WarpxSyncCurrent( + amrex::Array4 const fine, + amrex::Array4 const coarse, + int const refinement_ratio + ) : m_fine(fine), m_coarse(coarse), m_refinement_ratio(refinement_ratio) + { + //! @note constants and stencils in operator() implementation assume 2x refinement + BL_ASSERT(refinement_ratio == 2); + } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void + operator()( + int const i, + int const j, + int const k + ) noexcept // TODO rename to jkl + { + auto const & fine_unsafe = m_fine; // out-of-bounds access not secured with zero-values yet + auto const & coarse = m_coarse; // out-of-bounds access not secured but will also not occur + + // return zero for out-of-bounds accesses during interpolation + // this is efficiently used as a method to add neutral elements beyond guards in the average below + auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l) noexcept + { + return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l) : amrex::Real{0.}; + }; + + int const ii = i * m_refinement_ratio; + int const jj = j * m_refinement_ratio; + int const kk = k * m_refinement_ratio; +#if AMREX_SPACEDIM == 2 + if (IDim == 0) { + coarse(i, j, k) = 0.25 * ( + fine(ii, jj, kk) + fine(ii + 1, jj, kk) + + 0.5 * ( + fine(ii, jj - 1, kk) + fine(ii + 1, jj - 1, kk) + + fine(ii, jj + 1, kk) + fine(ii + 1, jj + 1, kk) + ) + ); + } else if (IDim == 2) { + coarse(i, j, k) = 0.25 * ( + fine(ii, jj, kk) + fine(ii, jj + 1, kk) + + 0.5 * ( + fine(ii - 1, jj, kk) + fine(ii - 1, jj + 1, kk) + + fine(ii + 1, jj, kk) + fine(ii + 1, jj + 1, kk) + ) + ); + } else { + coarse(i, j, k) = 0.25 * ( + fine(ii, jj, kk) + + 0.5 * ( + fine(ii - 1, jj , kk) + fine(ii + 1, jj , kk) + + fine(ii , jj - 1, kk) + fine(ii , jj + 1, kk) + ) + + 0.25 * ( + fine(ii - 1, jj - 1, kk) + fine(ii + 1, jj - 1, kk) + + fine(ii - 1, jj + 1, kk) + fine(ii + 1, jj + 1, kk) + ) + ); + } +#elif AMREX_SPACEDIM == 3 + if (IDim == 0) { + coarse(i,j,k) = 0.125 * ( + fine(ii , jj, kk) + + 0.5 * ( + fine(ii , jj-1, kk ) + fine(ii , jj+1, kk ) + + fine(ii , jj , kk-1) + fine(ii , jj , kk+1) + ) + + 0.25 * ( + fine(ii , jj-1, kk-1) + fine(ii , jj+1, kk-1) + + fine(ii , jj-1, kk+1) + fine(ii , jj+1, kk+1) + ) + + fine(ii+1, jj, kk) + + 0.5 * ( + fine(ii+1, jj-1, kk ) + fine(ii+1, jj+1, kk ) + + fine(ii+1, jj , kk-1) + fine(ii+1, jj , kk+1) + ) + + 0.25 * ( + fine(ii+1, jj-1, kk-1)+fine(ii+1, jj+1, kk-1) + + fine(ii+1, jj-1, kk+1)+fine(ii+1, jj+1, kk+1) + ) + ); + } else if (IDim == 1) { + coarse(i, j, k) = 0.125 * ( + fine(ii, jj , kk) + + 0.5 * ( + fine(ii-1, jj , kk ) + fine(ii+1, jj , kk ) + + fine(ii , jj , kk-1) + fine(ii , jj , kk+1) + ) + + 0.25 * ( + fine(ii-1, jj , kk-1) + fine(ii+1, jj , kk-1) + + fine(ii-1, jj , kk+1) + fine(ii+1, jj , kk+1) + ) + + fine(ii, jj+1, kk) + + 0.5 * ( + fine(ii-1, jj+1, kk ) + fine(ii+1, jj+1, kk ) + + fine(ii , jj+1, kk-1) + fine(ii , jj+1, kk+1) + ) + + 0.25 * ( + fine(ii-1, jj+1, kk-1) + fine(ii+1, jj+1, kk-1) + + fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1) + ) + ); + } else { + coarse(i, j, k) = 0.125 * ( + fine(ii, jj, kk ) + + 0.5 * ( + fine(ii-1, jj , kk ) + fine(ii+1, jj , kk ) + + fine(ii , jj-1, kk ) + fine(ii , jj+1, kk ) + ) + + 0.25 * ( + fine(ii-1, jj-1, kk ) + fine(ii+1, jj-1, kk ) + + fine(ii-1, jj+1, kk ) + fine(ii+1, jj+1, kk ) + ) + + fine(ii, jj, kk+1) + + 0.5 * ( + fine(ii-1, jj , kk+1) + fine(ii+1, jj , kk+1) + + fine(ii , jj-1, kk+1) + fine(ii , jj+1, kk+1) + ) + + 0.25 * ( + fine(ii-1, jj-1, kk+1) + fine(ii+1, jj-1, kk+1) + + fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1) + ) + ); + } +#endif + } +private: + amrex::Array4< amrex::Real const > m_fine; + amrex::Array4< amrex::Real > m_coarse; + int m_refinement_ratio; +}; + +#endif //WARPX_CURRENTSYNCHRONIZE_H diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 990d0f988..22fa4820e 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -348,36 +349,34 @@ WarpX::SyncCurrent () } } -/** \brief Fills the values of the current on the coarse patch by - * averaging the values of the current of the fine patch (on the same level). - */ void WarpX::SyncCurrent (const std::array& fine, - const std::array< amrex::MultiFab*,3>& crse, - int refinement_ratio) + const std::array< amrex::MultiFab*,3>& coarse, + int const refinement_ratio) { BL_ASSERT(refinement_ratio == 2); - const IntVect& ng = (fine[0]->nGrowVect() + 1) /refinement_ratio; + const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch #ifdef _OPEMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { - FArrayBox ffab; - for (int idim = 0; idim < 3; ++idim) + FArrayBox ffab; // contiguous, temporary, copy of the tiled fine patch to read from + for (int idim = 0; idim < fine.size(); ++idim) // j-field components { - for (MFIter mfi(*crse[idim],true); mfi.isValid(); ++mfi) + for (MFIter mfi(*coarse[idim],true); mfi.isValid(); ++mfi) // OMP in-box decomposition of coarse into tilebox { - const Box& bx = mfi.growntilebox(ng); - Box fbx = amrex::grow(amrex::refine(bx,refinement_ratio),1); - ffab.resize(fbx); - fbx &= (*fine[idim])[mfi].box(); - ffab.setVal(0.0); - ffab.copy((*fine[idim])[mfi], fbx, 0, fbx, 0, fine[idim]->nComp()); - WRPX_SYNC_CURRENT(bx.loVect(), bx.hiVect(), - BL_TO_FORTRAN_ANYD((*crse[idim])[mfi]), - BL_TO_FORTRAN_ANYD(ffab), - &idim); + const Box& bx = mfi.growntilebox(ng); // only grow to outer directions of tileboxes for filling guards + + auto const & arrFine = fine[idim]->const_array(mfi); + auto const & arrCoarse = coarse[idim]->array(mfi); + + if( idim == 0 ) + amrex::ParallelFor( bx, WarpxSyncCurrent<0>(arrFine, arrCoarse, refinement_ratio) ); + else if( idim == 1 ) + amrex::ParallelFor( bx, WarpxSyncCurrent<1>(arrFine, arrCoarse, refinement_ratio) ); + else if( idim == 2 ) + amrex::ParallelFor( bx, WarpxSyncCurrent<2>(arrFine, arrCoarse, refinement_ratio) ); } } } @@ -407,9 +406,6 @@ WarpX::SyncRho () } } -/** \brief Fills the values of the charge density on the coarse patch by - * averaging the values of the charge density of the fine patch (on the same level). - */ void WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio) { @@ -418,7 +414,7 @@ WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio) const int nc = fine.nComp(); #ifdef _OPEMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { FArrayBox ffab; diff --git a/Source/WarpX.H b/Source/WarpX.H index 7a5344f4f..9b9fb045c 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -420,10 +420,25 @@ private: std::array, 3> getInterpolatedB(int lev) const; + /** \brief Fills the values of the current on the coarse patch by + * averaging the values of the current of the fine patch (on the same level). + * Also fills the guards of the coarse patch. + * + * \param[in] fine fine patches to interpolate from + * \param[out] coarse coarse patches to interpolate to + * \param[in] refinement_ratio integer ratio between the two + */ void SyncCurrent (const std::array& fine, - const std::array< amrex::MultiFab*,3>& crse, - int ref_ratio); - + const std::array< amrex::MultiFab*,3>& coarse, + int const refinement_ratio); + + /** \brief Fills the values of the charge density on the coarse patch by + * averaging the values of the charge density of the fine patch (on the same level). + * + * \param[in] fine fine patches to interpolate from + * \param[out] crse coarse patches to interpolate to + * \param[in] ref_ratio integer ratio between the two + */ void SyncRho (const amrex::MultiFab& fine, amrex::MultiFab& crse, int ref_ratio); void ExchangeWithPmlB (int lev); -- cgit v1.2.3 From d67030eb709d965672a9900132d0085922335cc1 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Wed, 25 Sep 2019 09:54:52 -0700 Subject: SyncCurrent: Disable Tiling for GPU Co-authored-by: Weiqun Zhang --- Source/Parallelization/WarpXComm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 22fa4820e..40f8203a9 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -361,10 +361,10 @@ WarpX::SyncCurrent (const std::array& fine, #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { - FArrayBox ffab; // contiguous, temporary, copy of the tiled fine patch to read from for (int idim = 0; idim < fine.size(); ++idim) // j-field components { - for (MFIter mfi(*coarse[idim],true); mfi.isValid(); ++mfi) // OMP in-box decomposition of coarse into tilebox + // OMP in-box decomposition of coarse into tilebox + for (MFIter mfi(*coarse[idim], TilingIfNotGPU()); mfi.isValid(); ++mfi) { const Box& bx = mfi.growntilebox(ng); // only grow to outer directions of tileboxes for filling guards -- cgit v1.2.3 From c9577f8d200d99c40d15d5ff0d2fdacbddc2026f Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Fri, 27 Sep 2019 11:14:00 -0700 Subject: Rename, Profile & Remove TODO --- Source/Parallelization/CurrentSynchronize.H | 175 --------------------- .../InterpolateCurrentFineToCoarse.H | 175 +++++++++++++++++++++ Source/Parallelization/WarpXComm.cpp | 19 +-- Source/WarpX.H | 6 +- 4 files changed, 188 insertions(+), 187 deletions(-) delete mode 100644 Source/Parallelization/CurrentSynchronize.H create mode 100644 Source/Parallelization/InterpolateCurrentFineToCoarse.H (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/CurrentSynchronize.H b/Source/Parallelization/CurrentSynchronize.H deleted file mode 100644 index 5329ca242..000000000 --- a/Source/Parallelization/CurrentSynchronize.H +++ /dev/null @@ -1,175 +0,0 @@ -/* Copyright 2019 Axel Huebl, Weiqun Zhang - * - * This file is part of WarpX. - * - * License: BSD-3-Clause-LBNL - */ - -#ifndef WARPX_CURRENTSYNCHRONIZE_H -#define WARPX_CURRENTSYNCHRONIZE_H - -#include -#include -#include -#include -#include - -#include // std::move - - -/** Fill a current coarse patch with averaged values from a fine patch - * - * Fills the values of the current for a selected component on the coarse patch - * by averaging the values of the current of the fine patch. - * - * @tparam IDim j-field component on which the averaging is performed - */ -template< int IDim > -class WarpxSyncCurrent -{ -public: - /** Construct with fine and coarse patch and their refinement ratio - * - * @param[in] fine read-only fine patch - * @param[in,out] coarse overwritten coarse patch - * @param[in] refinement_ratio ratio between coarse and fine patch granularity - * (currently, only a value of is implemented) - */ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - WarpxSyncCurrent( - amrex::Array4 const fine, - amrex::Array4 const coarse, - int const refinement_ratio - ) : m_fine(std::move(fine)), - m_coarse(std::move(coarse)), - m_refinement_ratio(std::move(refinement_ratio)) - { - //! @note constants and stencils in operator() implementation assume 2x refinement - BL_ASSERT(refinement_ratio == 2); - } - - AMREX_GPU_DEVICE AMREX_FORCE_INLINE - void - operator()( - int const i, - int const j, - int const k - ) const noexcept // TODO rename to jkl - { - auto const & fine_unsafe = m_fine; // out-of-bounds access not secured with zero-values yet - auto const & coarse = m_coarse; // out-of-bounds access not secured but will also not occur - - // return zero for out-of-bounds accesses during interpolation - // this is efficiently used as a method to add neutral elements beyond guards in the average below - auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l) noexcept - { - return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l) : amrex::Real{0.}; - }; - - int const ii = i * m_refinement_ratio; - int const jj = j * m_refinement_ratio; - int const kk = k * m_refinement_ratio; -#if AMREX_SPACEDIM == 2 - if (IDim == 0) { - coarse(i, j, k) = 0.25 * ( - fine(ii, jj, kk) + fine(ii + 1, jj, kk) + - 0.5 * ( - fine(ii, jj - 1, kk) + fine(ii + 1, jj - 1, kk) + - fine(ii, jj + 1, kk) + fine(ii + 1, jj + 1, kk) - ) - ); - } else if (IDim == 2) { - coarse(i, j, k) = 0.25 * ( - fine(ii, jj, kk) + fine(ii, jj + 1, kk) + - 0.5 * ( - fine(ii - 1, jj, kk) + fine(ii - 1, jj + 1, kk) + - fine(ii + 1, jj, kk) + fine(ii + 1, jj + 1, kk) - ) - ); - } else { - coarse(i, j, k) = 0.25 * ( - fine(ii, jj, kk) + - 0.5 * ( - fine(ii - 1, jj , kk) + fine(ii + 1, jj , kk) + - fine(ii , jj - 1, kk) + fine(ii , jj + 1, kk) - ) + - 0.25 * ( - fine(ii - 1, jj - 1, kk) + fine(ii + 1, jj - 1, kk) + - fine(ii - 1, jj + 1, kk) + fine(ii + 1, jj + 1, kk) - ) - ); - } -#elif AMREX_SPACEDIM == 3 - if (IDim == 0) { - coarse(i,j,k) = 0.125 * ( - fine(ii , jj, kk) + - 0.5 * ( - fine(ii , jj-1, kk ) + fine(ii , jj+1, kk ) + - fine(ii , jj , kk-1) + fine(ii , jj , kk+1) - ) + - 0.25 * ( - fine(ii , jj-1, kk-1) + fine(ii , jj+1, kk-1) + - fine(ii , jj-1, kk+1) + fine(ii , jj+1, kk+1) - ) + - fine(ii+1, jj, kk) + - 0.5 * ( - fine(ii+1, jj-1, kk ) + fine(ii+1, jj+1, kk ) + - fine(ii+1, jj , kk-1) + fine(ii+1, jj , kk+1) - ) + - 0.25 * ( - fine(ii+1, jj-1, kk-1) + fine(ii+1, jj+1, kk-1) + - fine(ii+1, jj-1, kk+1) + fine(ii+1, jj+1, kk+1) - ) - ); - } else if (IDim == 1) { - coarse(i, j, k) = 0.125 * ( - fine(ii, jj , kk) + - 0.5 * ( - fine(ii-1, jj , kk ) + fine(ii+1, jj , kk ) + - fine(ii , jj , kk-1) + fine(ii , jj , kk+1) - ) + - 0.25 * ( - fine(ii-1, jj , kk-1) + fine(ii+1, jj , kk-1) + - fine(ii-1, jj , kk+1) + fine(ii+1, jj , kk+1) - ) + - fine(ii, jj+1, kk) + - 0.5 * ( - fine(ii-1, jj+1, kk ) + fine(ii+1, jj+1, kk ) + - fine(ii , jj+1, kk-1) + fine(ii , jj+1, kk+1) - ) + - 0.25 * ( - fine(ii-1, jj+1, kk-1) + fine(ii+1, jj+1, kk-1) + - fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1) - ) - ); - } else { - coarse(i, j, k) = 0.125 * ( - fine(ii, jj, kk ) + - 0.5 * ( - fine(ii-1, jj , kk ) + fine(ii+1, jj , kk ) + - fine(ii , jj-1, kk ) + fine(ii , jj+1, kk ) - ) + - 0.25 * ( - fine(ii-1, jj-1, kk ) + fine(ii+1, jj-1, kk ) + - fine(ii-1, jj+1, kk ) + fine(ii+1, jj+1, kk ) - ) + - fine(ii, jj, kk+1) + - 0.5 * ( - fine(ii-1, jj , kk+1) + fine(ii+1, jj , kk+1) + - fine(ii , jj-1, kk+1) + fine(ii , jj+1, kk+1) - ) + - 0.25 * ( - fine(ii-1, jj-1, kk+1) + fine(ii+1, jj-1, kk+1) + - fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1) - ) - ); - } -#endif - } -private: - amrex::Array4< amrex::Real const > m_fine; - amrex::Array4< amrex::Real > m_coarse; - int m_refinement_ratio; -}; - -#endif //WARPX_CURRENTSYNCHRONIZE_H diff --git a/Source/Parallelization/InterpolateCurrentFineToCoarse.H b/Source/Parallelization/InterpolateCurrentFineToCoarse.H new file mode 100644 index 000000000..148b725d0 --- /dev/null +++ b/Source/Parallelization/InterpolateCurrentFineToCoarse.H @@ -0,0 +1,175 @@ +/* Copyright 2019 Axel Huebl, Weiqun Zhang + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef INTERPOLATECURRENTFINETOCOARSE_H +#define INTERPOLATECURRENTFINETOCOARSE_H + +#include +#include +#include +#include +#include + +#include // std::move + + +/** Fill a current coarse patch with averaged values from a fine patch + * + * Fills the values of the current for a selected component on the coarse patch + * by averaging the values of the current of the fine patch. + * + * @tparam IDim j-field component on which the averaging is performed + */ +template< int IDim > +class InterpolateCurrentFineToCoarse +{ +public: + /** Construct with fine and coarse patch and their refinement ratio + * + * @param[in] fine read-only fine patch + * @param[in,out] coarse overwritten coarse patch + * @param[in] refinement_ratio ratio between coarse and fine patch granularity + * (currently, only a value of is implemented) + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + InterpolateCurrentFineToCoarse( + amrex::Array4< amrex::Real const > const fine, + amrex::Array4< amrex::Real > const coarse, + int const refinement_ratio + ) : m_fine(std::move(fine)), + m_coarse(std::move(coarse)), + m_refinement_ratio(std::move(refinement_ratio)) + { + //! @note constants and stencils in operator() implementation assume 2x refinement + BL_ASSERT(refinement_ratio == 2); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + void + operator()( + int const i, + int const j, + int const k + ) const noexcept + { + auto const & fine_unsafe = m_fine; // out-of-bounds access not secured with zero-values yet + auto const & coarse = m_coarse; // out-of-bounds access not secured but will also not occur + + // return zero for out-of-bounds accesses during interpolation + // this is efficiently used as a method to add neutral elements beyond guards in the average below + auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l) noexcept + { + return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l) : amrex::Real{0.}; + }; + + int const ii = i * m_refinement_ratio; + int const jj = j * m_refinement_ratio; + int const kk = k * m_refinement_ratio; +#if AMREX_SPACEDIM == 2 + if (IDim == 0) { + coarse(i, j, k) = 0.25 * ( + fine(ii, jj, kk) + fine(ii + 1, jj, kk) + + 0.5 * ( + fine(ii, jj - 1, kk) + fine(ii + 1, jj - 1, kk) + + fine(ii, jj + 1, kk) + fine(ii + 1, jj + 1, kk) + ) + ); + } else if (IDim == 2) { + coarse(i, j, k) = 0.25 * ( + fine(ii, jj, kk) + fine(ii, jj + 1, kk) + + 0.5 * ( + fine(ii - 1, jj, kk) + fine(ii - 1, jj + 1, kk) + + fine(ii + 1, jj, kk) + fine(ii + 1, jj + 1, kk) + ) + ); + } else { + coarse(i, j, k) = 0.25 * ( + fine(ii, jj, kk) + + 0.5 * ( + fine(ii - 1, jj , kk) + fine(ii + 1, jj , kk) + + fine(ii , jj - 1, kk) + fine(ii , jj + 1, kk) + ) + + 0.25 * ( + fine(ii - 1, jj - 1, kk) + fine(ii + 1, jj - 1, kk) + + fine(ii - 1, jj + 1, kk) + fine(ii + 1, jj + 1, kk) + ) + ); + } +#elif AMREX_SPACEDIM == 3 + if (IDim == 0) { + coarse(i,j,k) = 0.125 * ( + fine(ii , jj, kk) + + 0.5 * ( + fine(ii , jj-1, kk ) + fine(ii , jj+1, kk ) + + fine(ii , jj , kk-1) + fine(ii , jj , kk+1) + ) + + 0.25 * ( + fine(ii , jj-1, kk-1) + fine(ii , jj+1, kk-1) + + fine(ii , jj-1, kk+1) + fine(ii , jj+1, kk+1) + ) + + fine(ii+1, jj, kk) + + 0.5 * ( + fine(ii+1, jj-1, kk ) + fine(ii+1, jj+1, kk ) + + fine(ii+1, jj , kk-1) + fine(ii+1, jj , kk+1) + ) + + 0.25 * ( + fine(ii+1, jj-1, kk-1) + fine(ii+1, jj+1, kk-1) + + fine(ii+1, jj-1, kk+1) + fine(ii+1, jj+1, kk+1) + ) + ); + } else if (IDim == 1) { + coarse(i, j, k) = 0.125 * ( + fine(ii, jj , kk) + + 0.5 * ( + fine(ii-1, jj , kk ) + fine(ii+1, jj , kk ) + + fine(ii , jj , kk-1) + fine(ii , jj , kk+1) + ) + + 0.25 * ( + fine(ii-1, jj , kk-1) + fine(ii+1, jj , kk-1) + + fine(ii-1, jj , kk+1) + fine(ii+1, jj , kk+1) + ) + + fine(ii, jj+1, kk) + + 0.5 * ( + fine(ii-1, jj+1, kk ) + fine(ii+1, jj+1, kk ) + + fine(ii , jj+1, kk-1) + fine(ii , jj+1, kk+1) + ) + + 0.25 * ( + fine(ii-1, jj+1, kk-1) + fine(ii+1, jj+1, kk-1) + + fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1) + ) + ); + } else { + coarse(i, j, k) = 0.125 * ( + fine(ii, jj, kk ) + + 0.5 * ( + fine(ii-1, jj , kk ) + fine(ii+1, jj , kk ) + + fine(ii , jj-1, kk ) + fine(ii , jj+1, kk ) + ) + + 0.25 * ( + fine(ii-1, jj-1, kk ) + fine(ii+1, jj-1, kk ) + + fine(ii-1, jj+1, kk ) + fine(ii+1, jj+1, kk ) + ) + + fine(ii, jj, kk+1) + + 0.5 * ( + fine(ii-1, jj , kk+1) + fine(ii+1, jj , kk+1) + + fine(ii , jj-1, kk+1) + fine(ii , jj+1, kk+1) + ) + + 0.25 * ( + fine(ii-1, jj-1, kk+1) + fine(ii+1, jj-1, kk+1) + + fine(ii-1, jj+1, kk+1) + fine(ii+1, jj+1, kk+1) + ) + ); + } +#endif + } +private: + amrex::Array4< amrex::Real const > m_fine; + amrex::Array4< amrex::Real > m_coarse; + int m_refinement_ratio; +}; + +#endif // INTERPOLATECURRENTFINETOCOARSE_H diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 40f8203a9..4f870e79c 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include @@ -337,7 +337,7 @@ WarpX::SyncCurrent () std::array< MultiFab*,3> crse { current_cp[lev][0].get(), current_cp[lev][1].get(), current_cp[lev][2].get() }; - SyncCurrent(fine, crse, refinement_ratio[0]); + interpolateCurrentFineToCoarse(fine, crse, refinement_ratio[0]); } // For each level @@ -350,10 +350,11 @@ WarpX::SyncCurrent () } void -WarpX::SyncCurrent (const std::array& fine, - const std::array< amrex::MultiFab*,3>& coarse, - int const refinement_ratio) +WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine, + std::array< amrex::MultiFab *, 3 > const & coarse, + int const refinement_ratio) { + BL_PROFILE("InterpolateCurrentFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch @@ -372,11 +373,11 @@ WarpX::SyncCurrent (const std::array& fine, auto const & arrCoarse = coarse[idim]->array(mfi); if( idim == 0 ) - amrex::ParallelFor( bx, WarpxSyncCurrent<0>(arrFine, arrCoarse, refinement_ratio) ); + amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<0>(arrFine, arrCoarse, refinement_ratio) ); else if( idim == 1 ) - amrex::ParallelFor( bx, WarpxSyncCurrent<1>(arrFine, arrCoarse, refinement_ratio) ); + amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<1>(arrFine, arrCoarse, refinement_ratio) ); else if( idim == 2 ) - amrex::ParallelFor( bx, WarpxSyncCurrent<2>(arrFine, arrCoarse, refinement_ratio) ); + amrex::ParallelFor( bx, InterpolateCurrentFineToCoarse<2>(arrFine, arrCoarse, refinement_ratio) ); } } } @@ -452,7 +453,7 @@ WarpX::RestrictCurrentFromFineToCoarsePatch (int lev) std::array< MultiFab*,3> crse { current_cp[lev][0].get(), current_cp[lev][1].get(), current_cp[lev][2].get() }; - SyncCurrent(fine, crse, refinement_ratio[0]); + interpolateCurrentFineToCoarse(fine, crse, refinement_ratio[0]); } void diff --git a/Source/WarpX.H b/Source/WarpX.H index 9b9fb045c..c59802427 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -428,9 +428,9 @@ private: * \param[out] coarse coarse patches to interpolate to * \param[in] refinement_ratio integer ratio between the two */ - void SyncCurrent (const std::array& fine, - const std::array< amrex::MultiFab*,3>& coarse, - int const refinement_ratio); + void interpolateCurrentFineToCoarse (std::array< amrex::MultiFab const *, 3 > const & fine, + std::array< amrex::MultiFab *, 3 > const & coarse, + int const refinement_ratio); /** \brief Fills the values of the charge density on the coarse patch by * averaging the values of the charge density of the fine patch (on the same level). -- cgit v1.2.3