diff options
Diffstat (limited to 'Source')
-rw-r--r-- | Source/FieldSolver/Make.package | 1 | ||||
-rw-r--r-- | Source/FieldSolver/WarpX_FDTD.H | 68 | ||||
-rw-r--r-- | Source/FortranInterface/WarpX_f.F90 | 137 | ||||
-rw-r--r-- | Source/Laser/LaserParticleContainer.cpp | 13 | ||||
-rw-r--r-- | Source/Particles/PhysicalParticleContainer.cpp | 12 | ||||
-rw-r--r-- | Source/Particles/WarpXParticleContainer.H | 50 | ||||
-rw-r--r-- | Source/Particles/WarpXParticleContainer.cpp | 285 | ||||
-rw-r--r-- | Source/WarpX.cpp | 127 |
8 files changed, 318 insertions, 375 deletions
diff --git a/Source/FieldSolver/Make.package b/Source/FieldSolver/Make.package index 12dc44e5b..7eea52ee7 100644 --- a/Source/FieldSolver/Make.package +++ b/Source/FieldSolver/Make.package @@ -1,4 +1,5 @@ CEXE_headers += WarpX_K.H +CEXE_headers += WarpX_FDTD.H CEXE_sources += WarpXPushFieldsEM.cpp ifeq ($(USE_PSATD),TRUE) CEXE_sources += WarpXFFT.cpp diff --git a/Source/FieldSolver/WarpX_FDTD.H b/Source/FieldSolver/WarpX_FDTD.H new file mode 100644 index 000000000..865277446 --- /dev/null +++ b/Source/FieldSolver/WarpX_FDTD.H @@ -0,0 +1,68 @@ +#ifndef WARPX_FDTD_H_ +#define WARPX_FDTD_H_ + +#include <AMReX_FArrayBox.H> + +using namespace amrex; + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void warpx_computedivb(int i, int j, int k, int dcomp, Array4<Real> const& divB, + Array4<Real const> const& Bx, Array4<Real const> const& By, Array4<Real const> const& Bz, + Real dxinv, Real dyinv, Real dzinv +#ifdef WARPX_RZ + ,const Real rmin +#endif + ) +{ +#if (AMREX_SPACEDIM == 3) + divB(i,j,k,dcomp) = (Bx(i+1,j ,k ) - Bx(i,j,k))*dxinv + + (By(i ,j+1,k ) - By(i,j,k))*dyinv + + (Bz(i ,j ,k+1) - Bz(i,j,k))*dzinv; +#else +#ifndef WARPX_RZ + divB(i,j,0,dcomp) = (Bx(i+1,j ,0) - Bx(i,j,0))*dxinv + + (Bz(i ,j+1,0) - Bz(i,j,0))*dzinv; +#else + Real ru = 1. + 0.5/(rmin*dxinv + i + 0.5); + Real rd = 1. - 0.5/(rmin*dxinv + i + 0.5); + divB(i,j,0,dcomp) = (ru*Bx(i+1,j,0) - rd*Bx(i,j,0))*dxinv + + (Bz(i,j+1,0) - Bz(i,j,0))*dzinv; +#endif +#endif +} + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void warpx_computedive(int i, int j, int k, int dcomp, Array4<Real> const& divE, + Array4<Real const> const& Ex, Array4<Real const> const& Ey, Array4<Real const> const& Ez, + Real dxinv, Real dyinv, Real dzinv +#ifdef WARPX_RZ + ,const Real rmin +#endif + ) +{ +#if (AMREX_SPACEDIM == 3) + divE(i,j,k,dcomp) = (Ex(i,j,k) - Ex(i-1,j,k))*dxinv + + (Ey(i,j,k) - Ey(i,j-1,k))*dyinv + + (Ez(i,j,k) - Ez(i,j,k-1))*dzinv; +#else +#ifndef WARPX_RZ + divE(i,j,0,dcomp) = (Ex(i,j,0) - Ex(i-1,j,0))*dxinv + + (Ez(i,j,0) - Ez(i,j-1,0))*dzinv; +#else + if (i == 0 && rmin == 0.) { + // the bulk equation diverges on axis + // (due to the 1/r terms). The following expressions regularize + // these divergences. + divE(i,j,0,dcomp) = 4.*Ex(i,j,0)*dxinv + + (Ez(i,j,0) - Ez(i,j-1,0))*dzinv; + } else { + Real ru = 1. + 0.5/(rmin*dxinv + i); + Real rd = 1. - 0.5/(rmin*dxinv + i); + divE(i,j,0,dcomp) = (ru*Ex(i,j,0) - rd*Ex(i-1,j,0))*dxinv + + (Ez(i,j,0) - Ez(i,j-1,0))*dzinv; + } +#endif +#endif +} + +#endif diff --git a/Source/FortranInterface/WarpX_f.F90 b/Source/FortranInterface/WarpX_f.F90 index d7f7a8053..0560a9981 100644 --- a/Source/FortranInterface/WarpX_f.F90 +++ b/Source/FortranInterface/WarpX_f.F90 @@ -84,143 +84,6 @@ contains end subroutine warpx_compute_E - subroutine warpx_compute_divb_3d (lo, hi, divB, dlo, dhi, & - Bx, xlo, xhi, By, ylo, yhi, Bz, zlo, zhi, dx) & - bind(c, name='warpx_compute_divb_3d') - integer, intent(in) :: lo(3),hi(3),dlo(3),dhi(3),xlo(3),xhi(3),ylo(3),yhi(3),zlo(3),zhi(3) - real(amrex_real), intent(in) :: dx(3) - real(amrex_real), intent(in ) :: Bx (xlo(1):xhi(1),xlo(2):xhi(2),xlo(3):xhi(3)) - real(amrex_real), intent(in ) :: By (ylo(1):yhi(1),ylo(2):yhi(2),ylo(3):yhi(3)) - real(amrex_real), intent(in ) :: Bz (zlo(1):zhi(1),zlo(2):zhi(2),zlo(3):zhi(3)) - real(amrex_real), intent(inout) :: divB(dlo(1):dhi(1),dlo(2):dhi(2),dlo(3):dhi(3)) - - integer :: i,j,k - real(amrex_real) :: dxinv(3) - - dxinv = 1.d0/dx - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - divB(i,j,k) = dxinv(1) * (Bx(i+1,j ,k ) - Bx(i,j,k)) & - + dxinv(2) * (By(i ,j+1,k ) - By(i,j,k)) & - + dxinv(3) * (Bz(i ,j ,k+1) - Bz(i,j,k)) - end do - end do - end do - end subroutine warpx_compute_divb_3d - - - subroutine warpx_compute_divb_2d (lo, hi, divB, dlo, dhi, & - Bx, xlo, xhi, By, ylo, yhi, Bz, zlo, zhi, dx) & - bind(c, name='warpx_compute_divb_2d') - integer, intent(in) :: lo(2),hi(2),dlo(2),dhi(2),xlo(2),xhi(2),ylo(2),yhi(2),zlo(2),zhi(2) - real(amrex_real), intent(in) :: dx(3) - real(amrex_real), intent(in ) :: Bx (xlo(1):xhi(1),xlo(2):xhi(2)) - real(amrex_real), intent(in ) :: By (ylo(1):yhi(1),ylo(2):yhi(2)) - real(amrex_real), intent(in ) :: Bz (zlo(1):zhi(1),zlo(2):zhi(2)) - real(amrex_real), intent(inout) :: divB(dlo(1):dhi(1),dlo(2):dhi(2)) - - integer :: i,k - real(amrex_real) :: dxinv(3) - - dxinv = 1.d0/dx - - do k = lo(2), hi(2) - do i = lo(1), hi(1) - divB(i,k) = dxinv(1) * (Bx(i+1,k ) - Bx(i,k)) & - + dxinv(3) * (Bz(i ,k+1) - Bz(i,k)) - end do - end do - end subroutine warpx_compute_divb_2d - - - subroutine warpx_compute_dive_3d (lo, hi, dive, dlo, dhi, & - Ex, xlo, xhi, Ey, ylo, yhi, Ez, zlo, zhi, dx) & - bind(c, name='warpx_compute_dive_3d') - integer, intent(in) :: lo(3),hi(3),dlo(3),dhi(3),xlo(3),xhi(3),ylo(3),yhi(3),zlo(3),zhi(3) - real(amrex_real), intent(in) :: dx(3) - real(amrex_real), intent(in ) :: Ex (xlo(1):xhi(1),xlo(2):xhi(2),xlo(3):xhi(3)) - real(amrex_real), intent(in ) :: Ey (ylo(1):yhi(1),ylo(2):yhi(2),ylo(3):yhi(3)) - real(amrex_real), intent(in ) :: Ez (zlo(1):zhi(1),zlo(2):zhi(2),zlo(3):zhi(3)) - real(amrex_real), intent(inout) :: dive(dlo(1):dhi(1),dlo(2):dhi(2),dlo(3):dhi(3)) - - integer :: i,j,k - real(amrex_real) :: dxinv(3) - - dxinv = 1.d0/dx - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - dive(i,j,k) = dxinv(1) * (Ex(i,j,k) - Ex(i-1,j,k)) & - + dxinv(2) * (Ey(i,j,k) - Ey(i,j-1,k)) & - + dxinv(3) * (Ez(i,j,k) - Ez(i,j,k-1)) - end do - end do - end do - end subroutine warpx_compute_dive_3d - - - subroutine warpx_compute_dive_2d (lo, hi, dive, dlo, dhi, & - Ex, xlo, xhi, Ey, ylo, yhi, Ez, zlo, zhi, dx) & - bind(c, name='warpx_compute_dive_2d') - integer, intent(in) :: lo(2),hi(2),dlo(2),dhi(2),xlo(2),xhi(2),ylo(2),yhi(2),zlo(2),zhi(2) - real(amrex_real), intent(in) :: dx(3) - real(amrex_real), intent(in ) :: Ex (xlo(1):xhi(1),xlo(2):xhi(2)) - real(amrex_real), intent(in ) :: Ey (ylo(1):yhi(1),ylo(2):yhi(2)) - real(amrex_real), intent(in ) :: Ez (zlo(1):zhi(1),zlo(2):zhi(2)) - real(amrex_real), intent(inout) :: dive(dlo(1):dhi(1),dlo(2):dhi(2)) - - integer :: i,k - real(amrex_real) :: dxinv(3) - - dxinv = 1.d0/dx - - do k = lo(2), hi(2) - do i = lo(1), hi(1) - dive(i,k) = dxinv(1) * (Ex(i,k) - Ex(i-1,k)) & - + dxinv(3) * (Ez(i,k) - Ez(i,k-1)) - end do - end do - end subroutine warpx_compute_dive_2d - - - subroutine warpx_compute_dive_rz (lo, hi, dive, dlo, dhi, & - Ex, xlo, xhi, Ey, ylo, yhi, Ez, zlo, zhi, dx, rmin) & - bind(c, name='warpx_compute_dive_rz') - integer, intent(in) :: lo(2),hi(2),dlo(2),dhi(2),xlo(2),xhi(2),ylo(2),yhi(2),zlo(2),zhi(2) - real(amrex_real), intent(in) :: dx(3), rmin - real(amrex_real), intent(in ) :: Ex (xlo(1):xhi(1),xlo(2):xhi(2)) - real(amrex_real), intent(in ) :: Ey (ylo(1):yhi(1),ylo(2):yhi(2)) - real(amrex_real), intent(in ) :: Ez (zlo(1):zhi(1),zlo(2):zhi(2)) - real(amrex_real), intent(inout) :: dive(dlo(1):dhi(1),dlo(2):dhi(2)) - - integer :: i,k - real(amrex_real) :: dxinv(3) - real(amrex_real) :: ru, rd - - dxinv = 1.d0/dx - - do k = lo(2), hi(2) - do i = lo(1), hi(1) - if (i == 0 .and. rmin == 0.) then - ! the bulk equation diverges on axis - ! (due to the 1/r terms). The following expressions regularize - ! these divergences. - dive(i,k) = 4.*dxinv(1) * Ex(i,k) & - + dxinv(3) * (Ez(i,k) - Ez(i,k-1)) - else - ru = 1.d0 + 0.5d0/(rmin*dxinv(1) + i) - rd = 1.d0 - 0.5d0/(rmin*dxinv(1) + i) - dive(i,k) = dxinv(1) * (ru*Ex(i,k) - rd*Ex(i-1,k)) & - + dxinv(3) * (Ez(i,k) - Ez(i,k-1)) - end if - end do - end do - end subroutine warpx_compute_dive_rz - - subroutine warpx_sync_current_2d (lo, hi, crse, clo, chi, fine, flo, fhi, dir) & bind(c, name='warpx_sync_current_2d') integer, intent(in) :: lo(2), hi(2), flo(2), fhi(2), clo(2), chi(2), dir diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp index de410b31f..515aa1f5d 100644 --- a/Source/Laser/LaserParticleContainer.cpp +++ b/Source/Laser/LaserParticleContainer.cpp @@ -503,8 +503,17 @@ LaserParticleContainer::Evolve (int lev, // // Current Deposition // - DepositCurrent(pti, wp, uxp, uyp, uzp, jx, jy, jz, - cjx, cjy, cjz, np_current, np, thread_num, lev, dt); + // Deposit inside domains + DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, + 0, np_current, thread_num, + lev, lev, dt); + bool has_buffer = cjx; + if (has_buffer){ + // Deposit in buffers + DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, + np_current, np-np_current, thread_num, + lev, lev-1, dt); + } // // copy particle data back diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 7e7c9534e..43b46ec49 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -1529,8 +1529,16 @@ PhysicalParticleContainer::Evolve (int lev, // // Current Deposition // - DepositCurrent(pti, wp, uxp, uyp, uzp, jx, jy, jz, - cjx, cjy, cjz, np_current, np, thread_num, lev, dt); + // Deposit inside domains + DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, + 0, np_current, thread_num, + lev, lev, dt); + if (has_buffer){ + // Deposit in buffers + DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, + np_current, np-np_current, thread_num, + lev, lev-1, dt); + } // // copy particle data back diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H index 1e3dfb2ae..0e800bf1d 100644 --- a/Source/Particles/WarpXParticleContainer.H +++ b/Source/Particles/WarpXParticleContainer.H @@ -164,32 +164,30 @@ public: bool local = false); std::unique_ptr<amrex::MultiFab> GetChargeDensity(int lev, bool local = false); - virtual void DepositCharge(WarpXParIter& pti, - RealVector& wp, - amrex::MultiFab* rhomf, - amrex::MultiFab* crhomf, - int icomp, - const long np_current, - const long np, - int thread_num, - int lev ); - - virtual void DepositCurrent(WarpXParIter& pti, - RealVector& wp, - RealVector& uxp, - RealVector& uyp, - RealVector& uzp, - amrex::MultiFab& jx, - amrex::MultiFab& jy, - amrex::MultiFab& jz, - amrex::MultiFab* cjx, - amrex::MultiFab* cjy, - amrex::MultiFab* cjz, - const long np_current, - const long np, - int thread_num, - int lev, - amrex::Real dt ); + virtual void DepositCharge(WarpXParIter& pti, + RealVector& wp, + amrex::MultiFab* rhomf, + amrex::MultiFab* crhomf, + int icomp, + const long np_current, + const long np, + int thread_num, + int lev ); + + virtual void DepositCurrent(WarpXParIter& pti, + RealVector& wp, + RealVector& uxp, + RealVector& uyp, + RealVector& uzp, + amrex::MultiFab* jx, + amrex::MultiFab* jy, + amrex::MultiFab* jz, + const long offset, + const long np_to_depose, + int thread_num, + int lev, + int depos_lev, + amrex::Real dt); // If particles start outside of the domain, ContinuousInjection // makes sure that they are initialized when they enter the domain, and diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index ac532912d..5cdcdd1e3 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -279,187 +279,140 @@ WarpXParticleContainer::AddNParticles (int lev, Redistribute(); } - +/* \brief Current Deposition for thread thread_num + * \param pti : Particle iterator + * \param wp : Array of particle weights + * \param uxp uyp uzp : Array of particle + * \param jx jy jz : Full array of current density + * \param offset : Index of first particle for which current is deposited + * \param np_to_depose: Number of particles for which current is deposited. + Particles [offset,offset+np_tp_depose] deposit their + current + * \param thread_num : Thread number (if tiling) + * \param lev : Level of box that contains particles + * \param depos_lev : Level on which particles deposit (if buffers are used) + * \param dt : Time step for particle level + * \param ngJ : Number of ghosts cells (of lev) + */ void WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, RealVector& wp, RealVector& uxp, RealVector& uyp, RealVector& uzp, - MultiFab& jx, MultiFab& jy, MultiFab& jz, - MultiFab* cjx, MultiFab* cjy, MultiFab* cjz, - const long np_current, const long np, - int thread_num, int lev, Real dt ) + MultiFab* jx, MultiFab* jy, MultiFab* jz, + const long offset, const long np_to_depose, + int thread_num, int lev, int depos_lev, + Real dt) { - Real *jx_ptr, *jy_ptr, *jz_ptr; - const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); - const std::array<Real,3>& dx = WarpX::CellSize(lev); - const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0)); - const std::array<Real, 3>& xyzmin = xyzmin_tile; - const long lvect = 8; - - BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); - BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); - - Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag); - Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag); - Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag); - - // WarpX assumes the same number of guard cells for Jx, Jy, Jz - long ngJ = jx.nGrow(); - - int j_is_nodal = jx.is_nodal() and jy.is_nodal() and jz.is_nodal(); - - // Deposit charge for particles that are not in the current buffers - if (np_current > 0) - { -#ifdef AMREX_USE_GPU - jx_ptr = jx[pti].dataPtr(); - jy_ptr = jy[pti].dataPtr(); - jz_ptr = jz[pti].dataPtr(); - - auto jxntot = jx[pti].length(); - auto jyntot = jy[pti].length(); - auto jzntot = jz[pti].length(); -#else - tbx.grow(ngJ); - tby.grow(ngJ); - tbz.grow(ngJ); - - local_jx[thread_num].resize(tbx); - local_jy[thread_num].resize(tby); - local_jz[thread_num].resize(tbz); - - jx_ptr = local_jx[thread_num].dataPtr(); - jy_ptr = local_jy[thread_num].dataPtr(); - jz_ptr = local_jz[thread_num].dataPtr(); - - local_jx[thread_num].setVal(0.0); - local_jy[thread_num].setVal(0.0); - local_jz[thread_num].setVal(0.0); - - auto jxntot = local_jx[thread_num].length(); - auto jyntot = local_jy[thread_num].length(); - auto jzntot = local_jz[thread_num].length(); -#endif - - BL_PROFILE_VAR_START(blp_pxr_cd); - warpx_current_deposition( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &np_current, - m_xp[thread_num].dataPtr(), - m_yp[thread_num].dataPtr(), - m_zp[thread_num].dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - m_giv[thread_num].dataPtr(), - wp.dataPtr(), &this->charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dt, &dx[0], &dx[1], &dx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, - &lvect,&WarpX::current_deposition_algo); - -#ifdef WARPX_RZ - warpx_current_deposition_rz_volume_scaling( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &xyzmin[0], &dx[0]); -#endif - BL_PROFILE_VAR_STOP(blp_pxr_cd); - -#ifndef AMREX_USE_GPU - BL_PROFILE_VAR_START(blp_accumulate); - - jx[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); - jy[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); - jz[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); - - BL_PROFILE_VAR_STOP(blp_accumulate); -#endif - } + AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) || + (depos_lev==(lev )), + "Deposition buffers only work for lev-1"); + // If no particles, do not do anything + if (np_to_depose == 0) return; + + const long ngJ = jx->nGrow(); + const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0)); + const long lvect = 8; + int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal(); + + BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); + BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); + + // Get tile box where current is deposited. + // The tile box is different when depositing in the buffers (depos_lev<lev) + // or when depositing inside the level (depos_lev=lev) + Box tilebox; + if (lev == depos_lev) { + tilebox = pti.tilebox(); + } else { + const IntVect& ref_ratio = WarpX::RefRatio(depos_lev); + tilebox = amrex::coarsen(pti.tilebox(),ref_ratio); + } + + // Staggered tile boxes (different in each direction) + Box tbx = convert(tilebox, WarpX::jx_nodal_flag); + Box tby = convert(tilebox, WarpX::jy_nodal_flag); + Box tbz = convert(tilebox, WarpX::jz_nodal_flag); - // Deposit charge for particles that are in the current buffers - if (np_current < np) - { - const IntVect& ref_ratio = WarpX::RefRatio(lev-1); - const Box& ctilebox = amrex::coarsen(pti.tilebox(),ref_ratio); - const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); + // Lower corner of tile box physical domain + const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); + const std::array<Real, 3>& xyzmin = xyzmin_tile; #ifdef AMREX_USE_GPU - jx_ptr = (*cjx)[pti].dataPtr(); - jy_ptr = (*cjy)[pti].dataPtr(); - jz_ptr = (*cjz)[pti].dataPtr(); - - auto jxntot = jx[pti].length(); - auto jyntot = jy[pti].length(); - auto jzntot = jz[pti].length(); + // No tiling on GPU: jx_ptr points to the full + // jx array (same for jy_ptr and jz_ptr). + Real* jx_ptr = (*jx)[pti].dataPtr(); + Real* jy_ptr = (*jy)[pti].dataPtr(); + Real* jz_ptr = (*jz)[pti].dataPtr(); + + auto jxntot = jx[pti].length(); + auto jyntot = jy[pti].length(); + auto jzntot = jz[pti].length(); #else + // Tiling is on: jx_ptr points to local_jx[thread_num] + // (same for jy_ptr and jz_ptr) + tbx.grow(ngJ); + tby.grow(ngJ); + tbz.grow(ngJ); + + local_jx[thread_num].resize(tbx); + local_jy[thread_num].resize(tby); + local_jz[thread_num].resize(tbz); + + Real* jx_ptr = local_jx[thread_num].dataPtr(); + Real* jy_ptr = local_jy[thread_num].dataPtr(); + Real* jz_ptr = local_jz[thread_num].dataPtr(); + + // local_jx[thread_num] is set to zero + local_jx[thread_num].setVal(0.0); + local_jy[thread_num].setVal(0.0); + local_jz[thread_num].setVal(0.0); + + auto jxntot = local_jx[thread_num].length(); + auto jyntot = local_jy[thread_num].length(); + auto jzntot = local_jz[thread_num].length(); +#endif + // GPU, no tiling: deposit directly in jx + // CPU, tiling: deposit into local_jx + // (same for jx and jz) + BL_PROFILE_VAR_START(blp_pxr_cd); + warpx_current_deposition( + jx_ptr, &ngJ, jxntot.getVect(), + jy_ptr, &ngJ, jyntot.getVect(), + jz_ptr, &ngJ, jzntot.getVect(), + &np_to_depose, + m_xp[thread_num].dataPtr() + offset, + m_yp[thread_num].dataPtr() + offset, + m_zp[thread_num].dataPtr() + offset, + uxp.dataPtr() + offset, + uyp.dataPtr() + offset, + uzp.dataPtr() + offset, + m_giv[thread_num].dataPtr() + offset, + wp.dataPtr() + offset, &this->charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dt, &dx[0], &dx[1], &dx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, + &lvect,&WarpX::current_deposition_algo); - tbx = amrex::convert(ctilebox, WarpX::jx_nodal_flag); - tby = amrex::convert(ctilebox, WarpX::jy_nodal_flag); - tbz = amrex::convert(ctilebox, WarpX::jz_nodal_flag); - tbx.grow(ngJ); - tby.grow(ngJ); - tbz.grow(ngJ); - - local_jx[thread_num].resize(tbx); - local_jy[thread_num].resize(tby); - local_jz[thread_num].resize(tbz); - - jx_ptr = local_jx[thread_num].dataPtr(); - jy_ptr = local_jy[thread_num].dataPtr(); - jz_ptr = local_jz[thread_num].dataPtr(); - - local_jx[thread_num].setVal(0.0); - local_jy[thread_num].setVal(0.0); - local_jz[thread_num].setVal(0.0); - - auto jxntot = local_jx[thread_num].length(); - auto jyntot = local_jy[thread_num].length(); - auto jzntot = local_jz[thread_num].length(); -#endif - - long ncrse = np - np_current; - BL_PROFILE_VAR_START(blp_pxr_cd); - warpx_current_deposition( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &ncrse, - m_xp[thread_num].dataPtr() +np_current, - m_yp[thread_num].dataPtr() +np_current, - m_zp[thread_num].dataPtr() +np_current, - uxp.dataPtr()+np_current, - uyp.dataPtr()+np_current, - uzp.dataPtr()+np_current, - m_giv[thread_num].dataPtr()+np_current, - wp.dataPtr()+np_current, &this->charge, - &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], - &dt, &cdx[0], &cdx[1], &cdx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, - &lvect,&WarpX::current_deposition_algo); #ifdef WARPX_RZ - warpx_current_deposition_rz_volume_scaling( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &xyzmin[0], &dx[0]); + // Rescale current in r-z mode + warpx_current_deposition_rz_volume_scaling( + jx_ptr, &ngJ, jxntot.getVect(), + jy_ptr, &ngJ, jyntot.getVect(), + jz_ptr, &ngJ, jzntot.getVect(), + &xyzmin[0], &dx[0]); #endif - - BL_PROFILE_VAR_STOP(blp_pxr_cd); + BL_PROFILE_VAR_STOP(blp_pxr_cd); #ifndef AMREX_USE_GPU - BL_PROFILE_VAR_START(blp_accumulate); - - (*cjx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); - (*cjy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); - (*cjz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); - - BL_PROFILE_VAR_STOP(blp_accumulate); + BL_PROFILE_VAR_START(blp_accumulate); + // CPU, tiling: atomicAdd local_jx into jx + // (same for jx and jz) + (*jx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); + (*jy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); + (*jz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); + BL_PROFILE_VAR_STOP(blp_accumulate); #endif - } -}; - +} void WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index c2cf97f30..877882037 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -18,6 +18,7 @@ #include <WarpXWrappers.h> #include <WarpXUtil.H> #include <WarpXAlgorithmSelection.H> +#include <WarpX_FDTD.H> #ifdef BL_USE_SENSEI_INSITU #include <AMReX_AmrMeshInSituBridge.H> @@ -924,18 +925,32 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp, const std::array<const MultiFab*, 3>& B, const std::array<Real,3>& dx) { + Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2]; + +#ifdef WARPX_RZ + const Real rmin = GetInstance().Geom(0).ProbLo(0); +#endif + #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(divB, true); mfi.isValid(); ++mfi) + for (MFIter mfi(divB, TilingIfNotGPU()); mfi.isValid(); ++mfi) { const Box& bx = mfi.tilebox(); - WRPX_COMPUTE_DIVB(bx.loVect(), bx.hiVect(), - BL_TO_FORTRAN_N_ANYD(divB[mfi],dcomp), - BL_TO_FORTRAN_ANYD((*B[0])[mfi]), - BL_TO_FORTRAN_ANYD((*B[1])[mfi]), - BL_TO_FORTRAN_ANYD((*B[2])[mfi]), - dx.data()); + auto const& Bxfab = B[0]->array(mfi); + auto const& Byfab = B[1]->array(mfi); + auto const& Bzfab = B[2]->array(mfi); + auto const& divBfab = divB.array(mfi); + + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept + { + warpx_computedivb(i, j, k, dcomp, divBfab, Bxfab, Byfab, Bzfab, dxinv, dyinv, dzinv +#ifdef WARPX_RZ + ,rmin +#endif + ); + }); } } @@ -944,18 +959,32 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp, const std::array<const MultiFab*, 3>& B, const std::array<Real,3>& dx, int ngrow) { + Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2]; + +#ifdef WARPX_RZ + const Real rmin = GetInstance().Geom(0).ProbLo(0); +#endif + #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(divB, true); mfi.isValid(); ++mfi) + for (MFIter mfi(divB, TilingIfNotGPU()); mfi.isValid(); ++mfi) { Box bx = mfi.growntilebox(ngrow); - WRPX_COMPUTE_DIVB(bx.loVect(), bx.hiVect(), - BL_TO_FORTRAN_N_ANYD(divB[mfi],dcomp), - BL_TO_FORTRAN_ANYD((*B[0])[mfi]), - BL_TO_FORTRAN_ANYD((*B[1])[mfi]), - BL_TO_FORTRAN_ANYD((*B[2])[mfi]), - dx.data()); + auto const& Bxfab = B[0]->array(mfi); + auto const& Byfab = B[1]->array(mfi); + auto const& Bzfab = B[2]->array(mfi); + auto const& divBfab = divB.array(mfi); + + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept + { + warpx_computedivb(i, j, k, dcomp, divBfab, Bxfab, Byfab, Bzfab, dxinv, dyinv, dzinv +#ifdef WARPX_RZ + ,rmin +#endif + ); + }); } } @@ -964,25 +993,32 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp, const std::array<const MultiFab*, 3>& E, const std::array<Real,3>& dx) { + Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2]; + +#ifdef WARPX_RZ + const Real rmin = GetInstance().Geom(0).ProbLo(0); +#endif + #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(divE, true); mfi.isValid(); ++mfi) + for (MFIter mfi(divE, TilingIfNotGPU()); mfi.isValid(); ++mfi) { const Box& bx = mfi.tilebox(); + auto const& Exfab = E[0]->array(mfi); + auto const& Eyfab = E[1]->array(mfi); + auto const& Ezfab = E[2]->array(mfi); + auto const& divEfab = divE.array(mfi); + + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept + { + warpx_computedive(i, j, k, dcomp, divEfab, Exfab, Eyfab, Ezfab, dxinv, dyinv, dzinv #ifdef WARPX_RZ - const Real xmin = GetInstance().Geom(0).ProbLo(0); -#endif - WRPX_COMPUTE_DIVE(bx.loVect(), bx.hiVect(), - BL_TO_FORTRAN_N_ANYD(divE[mfi],dcomp), - BL_TO_FORTRAN_ANYD((*E[0])[mfi]), - BL_TO_FORTRAN_ANYD((*E[1])[mfi]), - BL_TO_FORTRAN_ANYD((*E[2])[mfi]), - dx.data() -#ifdef WARPX_RZ - ,&xmin + ,rmin #endif - ); + ); + }); } } @@ -991,25 +1027,32 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp, const std::array<const MultiFab*, 3>& E, const std::array<Real,3>& dx, int ngrow) { + Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2]; + +#ifdef WARPX_RZ + const Real rmin = GetInstance().Geom(0).ProbLo(0); +#endif + #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif - for (MFIter mfi(divE, true); mfi.isValid(); ++mfi) + for (MFIter mfi(divE, TilingIfNotGPU()); mfi.isValid(); ++mfi) { Box bx = mfi.growntilebox(ngrow); + auto const& Exfab = E[0]->array(mfi); + auto const& Eyfab = E[1]->array(mfi); + auto const& Ezfab = E[2]->array(mfi); + auto const& divEfab = divE.array(mfi); + + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept + { + warpx_computedive(i, j, k, dcomp, divEfab, Exfab, Eyfab, Ezfab, dxinv, dyinv, dzinv #ifdef WARPX_RZ - const Real xmin = GetInstance().Geom(0).ProbLo(0); -#endif - WRPX_COMPUTE_DIVE(bx.loVect(), bx.hiVect(), - BL_TO_FORTRAN_N_ANYD(divE[mfi],dcomp), - BL_TO_FORTRAN_ANYD((*E[0])[mfi]), - BL_TO_FORTRAN_ANYD((*E[1])[mfi]), - BL_TO_FORTRAN_ANYD((*E[2])[mfi]), - dx.data() -#ifdef WARPX_RZ - ,&xmin + ,rmin #endif - ); + ); + }); } } |