diff options
author | 2019-07-11 14:07:54 -0700 | |
---|---|---|
committer | 2019-07-11 14:07:54 -0700 | |
commit | a944bdd6c5a4612e32aaefc894c232b958129bea (patch) | |
tree | 7214d83425252264f7cab110305d4ef948ed70e8 /Source/Particles/WarpXParticleContainer.cpp | |
parent | 69ad6bab082470ce2f8c5199c07ef370e8b55f34 (diff) | |
download | WarpX-a944bdd6c5a4612e32aaefc894c232b958129bea.tar.gz WarpX-a944bdd6c5a4612e32aaefc894c232b958129bea.tar.zst WarpX-a944bdd6c5a4612e32aaefc894c232b958129bea.zip |
keep option to use PICSAR current deposition
Diffstat (limited to 'Source/Particles/WarpXParticleContainer.cpp')
-rw-r--r-- | Source/Particles/WarpXParticleContainer.cpp | 171 |
1 files changed, 144 insertions, 27 deletions
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 6ea36c841..42b99efcb 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -280,23 +280,7 @@ WarpXParticleContainer::AddNParticles (int lev, Redistribute(); } -//doDepositionShapeN<1>(wp, uxp, uyp, uzp, jx_arr, jy_arr, jz_arr, -// offset, np_to_depose, dt, dx, -// tilebox, stagger_shift, q); - -// template <int depos_order> -/* -void WarpXParticleContainer::doDepositionShapeN(RealVector& wp, RealVector& uxp, - RealVector& uyp, RealVector& uzp, - Array4<Real>& jx_arr, - Array4<Real>& jy_arr, - Array4<Real>& jz_arr, - const long offset, const long np_to_depose, - const Real dt, const std::array<Real,3>& dx, - const Box tilebox, const Real stagger_shift, - const Real q) -*/ -/* \brief Current Deposition for thread thread_num +/* \brief Current Deposition for thread thread_num using PICSAR * \param pti : Particle iterator * \param wp : Array of particle weights * \param uxp uyp uzp : Array of particle @@ -311,6 +295,139 @@ void WarpXParticleContainer::doDepositionShapeN(RealVector& wp, RealVector& uxp, * \param dt : Time step for particle level */ void +WarpXParticleContainer::DepositCurrentFortran(WarpXParIter& pti, + RealVector& wp, RealVector& uxp, + RealVector& uyp, RealVector& uzp, + MultiFab* jx, MultiFab* jy, MultiFab* jz, + const long offset, const long np_to_depose, + int thread_num, int lev, int depos_lev, + Real dt) +{ + AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) || + (depos_lev==(lev )), + "Deposition buffers only work for lev-1"); + // If no particles, do not do anything + if (np_to_depose == 0) return; + + const long ngJ = jx->nGrow(); + const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0)); + const long lvect = 8; + int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal(); + + BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); + BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); + + // Get tile box where current is deposited. + // The tile box is different when depositing in the buffers (depos_lev<lev) + // or when depositing inside the level (depos_lev=lev) + Box tilebox; + if (lev == depos_lev) { + tilebox = pti.tilebox(); + } else { + const IntVect& ref_ratio = WarpX::RefRatio(depos_lev); + tilebox = amrex::coarsen(pti.tilebox(),ref_ratio); + } + + // Staggered tile boxes (different in each direction) + Box tbx = convert(tilebox, WarpX::jx_nodal_flag); + Box tby = convert(tilebox, WarpX::jy_nodal_flag); + Box tbz = convert(tilebox, WarpX::jz_nodal_flag); + + // Lower corner of tile box physical domain + const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); + const std::array<Real, 3>& xyzmin = xyzmin_tile; + +#ifdef AMREX_USE_GPU + // No tiling on GPU: jx_ptr points to the full + // jx array (same for jy_ptr and jz_ptr). + Real* jx_ptr = (*jx)[pti].dataPtr(); + Real* jy_ptr = (*jy)[pti].dataPtr(); + Real* jz_ptr = (*jz)[pti].dataPtr(); + + auto jxntot = (*jx)[pti].length(); + auto jyntot = (*jy)[pti].length(); + auto jzntot = (*jz)[pti].length(); +#else + // Tiling is on: jx_ptr points to local_jx[thread_num] + // (same for jy_ptr and jz_ptr) + tbx.grow(ngJ); + tby.grow(ngJ); + tbz.grow(ngJ); + + local_jx[thread_num].resize(tbx); + local_jy[thread_num].resize(tby); + local_jz[thread_num].resize(tbz); + + Real* jx_ptr = local_jx[thread_num].dataPtr(); + Real* jy_ptr = local_jy[thread_num].dataPtr(); + Real* jz_ptr = local_jz[thread_num].dataPtr(); + + // local_jx[thread_num] is set to zero + local_jx[thread_num].setVal(0.0); + local_jy[thread_num].setVal(0.0); + local_jz[thread_num].setVal(0.0); + + auto jxntot = local_jx[thread_num].length(); + auto jyntot = local_jy[thread_num].length(); + auto jzntot = local_jz[thread_num].length(); +#endif + // GPU, no tiling: deposit directly in jx + // CPU, tiling: deposit into local_jx + // (same for jx and jz) + BL_PROFILE_VAR_START(blp_pxr_cd); + warpx_current_deposition( + jx_ptr, &ngJ, jxntot.getVect(), + jy_ptr, &ngJ, jyntot.getVect(), + jz_ptr, &ngJ, jzntot.getVect(), + &np_to_depose, + m_xp[thread_num].dataPtr() + offset, + m_yp[thread_num].dataPtr() + offset, + m_zp[thread_num].dataPtr() + offset, + uxp.dataPtr() + offset, + uyp.dataPtr() + offset, + uzp.dataPtr() + offset, + m_giv[thread_num].dataPtr() + offset, + wp.dataPtr() + offset, &this->charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dt, &dx[0], &dx[1], &dx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, + &lvect,&WarpX::current_deposition_algo); + +#ifdef WARPX_RZ + // Rescale current in r-z mode + warpx_current_deposition_rz_volume_scaling( + jx_ptr, &ngJ, jxntot.getVect(), + jy_ptr, &ngJ, jyntot.getVect(), + jz_ptr, &ngJ, jzntot.getVect(), + &xyzmin[0], &dx[0]); +#endif + BL_PROFILE_VAR_STOP(blp_pxr_cd); + +#ifndef AMREX_USE_GPU + BL_PROFILE_VAR_START(blp_accumulate); + // CPU, tiling: atomicAdd local_jx into jx + // (same for jx and jz) + (*jx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); + (*jy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); + (*jz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); + BL_PROFILE_VAR_STOP(blp_accumulate); +#endif +} + +/* \brief Current Deposition for thread thread_num + * \param pti : Particle iterator + * \param wp : Array of particle weights + * \param uxp uyp uzp : Array of particle + * \param jx jy jz : Full array of current density + * \param offset : Index of first particle for which current is deposited + * \param np_to_depose: Number of particles for which current is deposited. + Particles [offset,offset+np_tp_depose] deposit current + * \param thread_num : Thread number (if tiling) + * \param lev : Level of box that contains particles + * \param depos_lev : Level on which particles deposit (if buffers are used) + * \param dt : Time step for particle level + */ +void WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, RealVector& wp, RealVector& uxp, RealVector& uyp, RealVector& uzp, @@ -356,9 +473,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Array4<Real> const& jx_arr = jx->array(pti); Array4<Real> const& jy_arr = jy->array(pti); Array4<Real> const& jz_arr = jz->array(pti); - //auto const& jx_arr = jx->array(pti); - //auto const& jy_arr = jy->array(pti); - //auto const& jz_arr = jz->array(pti); #else // Tiling is on: jx_ptr points to local_jx[thread_num] // (same for jy_ptr and jz_ptr) @@ -396,17 +510,20 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, // Better for memory? worth trying? const Dim3 lo = lbound(tilebox); - if (WarpX::nox == 1){ - doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, - offset, np_to_depose, dt, dx, + if (WarpX::nox == 1){ + doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } else if (WarpX::nox == 2){ - doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, - offset, np_to_depose, dt, dx, + doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } else if (WarpX::nox == 3){ - doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, - offset, np_to_depose, dt, dx, + doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } |