diff options
Diffstat (limited to 'Source')
-rw-r--r-- | Source/Diagnostics/BoostedFrameDiagnostic.cpp | 4 | ||||
-rw-r--r-- | Source/Laser/LaserParticleContainer.cpp | 13 | ||||
-rw-r--r-- | Source/Make.WarpX | 10 | ||||
-rw-r--r-- | Source/Particles/PhysicalParticleContainer.cpp | 12 | ||||
-rw-r--r-- | Source/Particles/WarpXParticleContainer.H | 50 | ||||
-rw-r--r-- | Source/Particles/WarpXParticleContainer.cpp | 285 | ||||
-rw-r--r-- | Source/WarpX.H | 33 |
7 files changed, 193 insertions, 214 deletions
diff --git a/Source/Diagnostics/BoostedFrameDiagnostic.cpp b/Source/Diagnostics/BoostedFrameDiagnostic.cpp index 5e7d4e0ad..ad4f50806 100644 --- a/Source/Diagnostics/BoostedFrameDiagnostic.cpp +++ b/Source/Diagnostics/BoostedFrameDiagnostic.cpp @@ -468,9 +468,9 @@ namespace { const int icomp = field_map_ptr[n]; #if (AMREX_SPACEDIM == 3) - buf_arr(i,j,k_lab,n) += tmp_arr(i,j,k,icomp); + buf_arr(i,j,k_lab,n) = tmp_arr(i,j,k,icomp); #else - buf_arr(i,k_lab,k,n) += tmp_arr(i,j,k,icomp); + buf_arr(i,k_lab,k,n) = tmp_arr(i,j,k,icomp); #endif } ); diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp index de410b31f..515aa1f5d 100644 --- a/Source/Laser/LaserParticleContainer.cpp +++ b/Source/Laser/LaserParticleContainer.cpp @@ -503,8 +503,17 @@ LaserParticleContainer::Evolve (int lev, // // Current Deposition // - DepositCurrent(pti, wp, uxp, uyp, uzp, jx, jy, jz, - cjx, cjy, cjz, np_current, np, thread_num, lev, dt); + // Deposit inside domains + DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, + 0, np_current, thread_num, + lev, lev, dt); + bool has_buffer = cjx; + if (has_buffer){ + // Deposit in buffers + DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, + np_current, np-np_current, thread_num, + lev, lev-1, dt); + } // // copy particle data back diff --git a/Source/Make.WarpX b/Source/Make.WarpX index c1ef54c33..bc837c797 100644 --- a/Source/Make.WarpX +++ b/Source/Make.WarpX @@ -45,6 +45,16 @@ endif DEFINES += -DWARPX_USE_PY endif +ifeq ($(DIM),3) + DEFINES += -DWARPX_DIM_3D +else +ifeq ($(USE_RZ),TRUE) + DEFINES += -DWARPX_DIM_RZ +else + DEFINES += -DWARPX_DIM_2D +endif +endif + -include Make.package include $(WARPX_HOME)/Source/Make.package include $(WARPX_HOME)/Source/BoundaryConditions/Make.package diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 7e7c9534e..43b46ec49 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -1529,8 +1529,16 @@ PhysicalParticleContainer::Evolve (int lev, // // Current Deposition // - DepositCurrent(pti, wp, uxp, uyp, uzp, jx, jy, jz, - cjx, cjy, cjz, np_current, np, thread_num, lev, dt); + // Deposit inside domains + DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, + 0, np_current, thread_num, + lev, lev, dt); + if (has_buffer){ + // Deposit in buffers + DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, + np_current, np-np_current, thread_num, + lev, lev-1, dt); + } // // copy particle data back diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H index 1e3dfb2ae..0e800bf1d 100644 --- a/Source/Particles/WarpXParticleContainer.H +++ b/Source/Particles/WarpXParticleContainer.H @@ -164,32 +164,30 @@ public: bool local = false); std::unique_ptr<amrex::MultiFab> GetChargeDensity(int lev, bool local = false); - virtual void DepositCharge(WarpXParIter& pti, - RealVector& wp, - amrex::MultiFab* rhomf, - amrex::MultiFab* crhomf, - int icomp, - const long np_current, - const long np, - int thread_num, - int lev ); - - virtual void DepositCurrent(WarpXParIter& pti, - RealVector& wp, - RealVector& uxp, - RealVector& uyp, - RealVector& uzp, - amrex::MultiFab& jx, - amrex::MultiFab& jy, - amrex::MultiFab& jz, - amrex::MultiFab* cjx, - amrex::MultiFab* cjy, - amrex::MultiFab* cjz, - const long np_current, - const long np, - int thread_num, - int lev, - amrex::Real dt ); + virtual void DepositCharge(WarpXParIter& pti, + RealVector& wp, + amrex::MultiFab* rhomf, + amrex::MultiFab* crhomf, + int icomp, + const long np_current, + const long np, + int thread_num, + int lev ); + + virtual void DepositCurrent(WarpXParIter& pti, + RealVector& wp, + RealVector& uxp, + RealVector& uyp, + RealVector& uzp, + amrex::MultiFab* jx, + amrex::MultiFab* jy, + amrex::MultiFab* jz, + const long offset, + const long np_to_depose, + int thread_num, + int lev, + int depos_lev, + amrex::Real dt); // If particles start outside of the domain, ContinuousInjection // makes sure that they are initialized when they enter the domain, and diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index ac532912d..317f46fd4 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -279,187 +279,140 @@ WarpXParticleContainer::AddNParticles (int lev, Redistribute(); } - +/* \brief Current Deposition for thread thread_num + * \param pti : Particle iterator + * \param wp : Array of particle weights + * \param uxp uyp uzp : Array of particle + * \param jx jy jz : Full array of current density + * \param offset : Index of first particle for which current is deposited + * \param np_to_depose: Number of particles for which current is deposited. + Particles [offset,offset+np_tp_depose] deposit their + current + * \param thread_num : Thread number (if tiling) + * \param lev : Level of box that contains particles + * \param depos_lev : Level on which particles deposit (if buffers are used) + * \param dt : Time step for particle level + * \param ngJ : Number of ghosts cells (of lev) + */ void WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, RealVector& wp, RealVector& uxp, RealVector& uyp, RealVector& uzp, - MultiFab& jx, MultiFab& jy, MultiFab& jz, - MultiFab* cjx, MultiFab* cjy, MultiFab* cjz, - const long np_current, const long np, - int thread_num, int lev, Real dt ) + MultiFab* jx, MultiFab* jy, MultiFab* jz, + const long offset, const long np_to_depose, + int thread_num, int lev, int depos_lev, + Real dt) { - Real *jx_ptr, *jy_ptr, *jz_ptr; - const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); - const std::array<Real,3>& dx = WarpX::CellSize(lev); - const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0)); - const std::array<Real, 3>& xyzmin = xyzmin_tile; - const long lvect = 8; - - BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); - BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); - - Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag); - Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag); - Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag); - - // WarpX assumes the same number of guard cells for Jx, Jy, Jz - long ngJ = jx.nGrow(); - - int j_is_nodal = jx.is_nodal() and jy.is_nodal() and jz.is_nodal(); - - // Deposit charge for particles that are not in the current buffers - if (np_current > 0) - { -#ifdef AMREX_USE_GPU - jx_ptr = jx[pti].dataPtr(); - jy_ptr = jy[pti].dataPtr(); - jz_ptr = jz[pti].dataPtr(); - - auto jxntot = jx[pti].length(); - auto jyntot = jy[pti].length(); - auto jzntot = jz[pti].length(); -#else - tbx.grow(ngJ); - tby.grow(ngJ); - tbz.grow(ngJ); - - local_jx[thread_num].resize(tbx); - local_jy[thread_num].resize(tby); - local_jz[thread_num].resize(tbz); - - jx_ptr = local_jx[thread_num].dataPtr(); - jy_ptr = local_jy[thread_num].dataPtr(); - jz_ptr = local_jz[thread_num].dataPtr(); - - local_jx[thread_num].setVal(0.0); - local_jy[thread_num].setVal(0.0); - local_jz[thread_num].setVal(0.0); - - auto jxntot = local_jx[thread_num].length(); - auto jyntot = local_jy[thread_num].length(); - auto jzntot = local_jz[thread_num].length(); -#endif - - BL_PROFILE_VAR_START(blp_pxr_cd); - warpx_current_deposition( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &np_current, - m_xp[thread_num].dataPtr(), - m_yp[thread_num].dataPtr(), - m_zp[thread_num].dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - m_giv[thread_num].dataPtr(), - wp.dataPtr(), &this->charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dt, &dx[0], &dx[1], &dx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, - &lvect,&WarpX::current_deposition_algo); - -#ifdef WARPX_RZ - warpx_current_deposition_rz_volume_scaling( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &xyzmin[0], &dx[0]); -#endif - BL_PROFILE_VAR_STOP(blp_pxr_cd); - -#ifndef AMREX_USE_GPU - BL_PROFILE_VAR_START(blp_accumulate); - - jx[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); - jy[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); - jz[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); - - BL_PROFILE_VAR_STOP(blp_accumulate); -#endif - } + AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) || + (depos_lev==(lev )), + "Deposition buffers only work for lev-1"); + // If no particles, do not do anything + if (np_to_depose == 0) return; + + const long ngJ = jx->nGrow(); + const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0)); + const long lvect = 8; + int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal(); + + BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); + BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); + + // Get tile box where current is deposited. + // The tile box is different when depositing in the buffers (depos_lev<lev) + // or when depositing inside the level (depos_lev=lev) + Box tilebox; + if (lev == depos_lev) { + tilebox = pti.tilebox(); + } else { + const IntVect& ref_ratio = WarpX::RefRatio(depos_lev); + tilebox = amrex::coarsen(pti.tilebox(),ref_ratio); + } + + // Staggered tile boxes (different in each direction) + Box tbx = convert(tilebox, WarpX::jx_nodal_flag); + Box tby = convert(tilebox, WarpX::jy_nodal_flag); + Box tbz = convert(tilebox, WarpX::jz_nodal_flag); - // Deposit charge for particles that are in the current buffers - if (np_current < np) - { - const IntVect& ref_ratio = WarpX::RefRatio(lev-1); - const Box& ctilebox = amrex::coarsen(pti.tilebox(),ref_ratio); - const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); + // Lower corner of tile box physical domain + const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); + const std::array<Real, 3>& xyzmin = xyzmin_tile; #ifdef AMREX_USE_GPU - jx_ptr = (*cjx)[pti].dataPtr(); - jy_ptr = (*cjy)[pti].dataPtr(); - jz_ptr = (*cjz)[pti].dataPtr(); - - auto jxntot = jx[pti].length(); - auto jyntot = jy[pti].length(); - auto jzntot = jz[pti].length(); + // No tiling on GPU: jx_ptr points to the full + // jx array (same for jy_ptr and jz_ptr). + Real* jx_ptr = (*jx)[pti].dataPtr(); + Real* jy_ptr = (*jy)[pti].dataPtr(); + Real* jz_ptr = (*jz)[pti].dataPtr(); + + auto jxntot = (*jx)[pti].length(); + auto jyntot = (*jy)[pti].length(); + auto jzntot = (*jz)[pti].length(); #else + // Tiling is on: jx_ptr points to local_jx[thread_num] + // (same for jy_ptr and jz_ptr) + tbx.grow(ngJ); + tby.grow(ngJ); + tbz.grow(ngJ); + + local_jx[thread_num].resize(tbx); + local_jy[thread_num].resize(tby); + local_jz[thread_num].resize(tbz); + + Real* jx_ptr = local_jx[thread_num].dataPtr(); + Real* jy_ptr = local_jy[thread_num].dataPtr(); + Real* jz_ptr = local_jz[thread_num].dataPtr(); + + // local_jx[thread_num] is set to zero + local_jx[thread_num].setVal(0.0); + local_jy[thread_num].setVal(0.0); + local_jz[thread_num].setVal(0.0); + + auto jxntot = local_jx[thread_num].length(); + auto jyntot = local_jy[thread_num].length(); + auto jzntot = local_jz[thread_num].length(); +#endif + // GPU, no tiling: deposit directly in jx + // CPU, tiling: deposit into local_jx + // (same for jx and jz) + BL_PROFILE_VAR_START(blp_pxr_cd); + warpx_current_deposition( + jx_ptr, &ngJ, jxntot.getVect(), + jy_ptr, &ngJ, jyntot.getVect(), + jz_ptr, &ngJ, jzntot.getVect(), + &np_to_depose, + m_xp[thread_num].dataPtr() + offset, + m_yp[thread_num].dataPtr() + offset, + m_zp[thread_num].dataPtr() + offset, + uxp.dataPtr() + offset, + uyp.dataPtr() + offset, + uzp.dataPtr() + offset, + m_giv[thread_num].dataPtr() + offset, + wp.dataPtr() + offset, &this->charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dt, &dx[0], &dx[1], &dx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, + &lvect,&WarpX::current_deposition_algo); - tbx = amrex::convert(ctilebox, WarpX::jx_nodal_flag); - tby = amrex::convert(ctilebox, WarpX::jy_nodal_flag); - tbz = amrex::convert(ctilebox, WarpX::jz_nodal_flag); - tbx.grow(ngJ); - tby.grow(ngJ); - tbz.grow(ngJ); - - local_jx[thread_num].resize(tbx); - local_jy[thread_num].resize(tby); - local_jz[thread_num].resize(tbz); - - jx_ptr = local_jx[thread_num].dataPtr(); - jy_ptr = local_jy[thread_num].dataPtr(); - jz_ptr = local_jz[thread_num].dataPtr(); - - local_jx[thread_num].setVal(0.0); - local_jy[thread_num].setVal(0.0); - local_jz[thread_num].setVal(0.0); - - auto jxntot = local_jx[thread_num].length(); - auto jyntot = local_jy[thread_num].length(); - auto jzntot = local_jz[thread_num].length(); -#endif - - long ncrse = np - np_current; - BL_PROFILE_VAR_START(blp_pxr_cd); - warpx_current_deposition( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &ncrse, - m_xp[thread_num].dataPtr() +np_current, - m_yp[thread_num].dataPtr() +np_current, - m_zp[thread_num].dataPtr() +np_current, - uxp.dataPtr()+np_current, - uyp.dataPtr()+np_current, - uzp.dataPtr()+np_current, - m_giv[thread_num].dataPtr()+np_current, - wp.dataPtr()+np_current, &this->charge, - &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], - &dt, &cdx[0], &cdx[1], &cdx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, - &lvect,&WarpX::current_deposition_algo); #ifdef WARPX_RZ - warpx_current_deposition_rz_volume_scaling( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &xyzmin[0], &dx[0]); + // Rescale current in r-z mode + warpx_current_deposition_rz_volume_scaling( + jx_ptr, &ngJ, jxntot.getVect(), + jy_ptr, &ngJ, jyntot.getVect(), + jz_ptr, &ngJ, jzntot.getVect(), + &xyzmin[0], &dx[0]); #endif - - BL_PROFILE_VAR_STOP(blp_pxr_cd); + BL_PROFILE_VAR_STOP(blp_pxr_cd); #ifndef AMREX_USE_GPU - BL_PROFILE_VAR_START(blp_accumulate); - - (*cjx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); - (*cjy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); - (*cjz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); - - BL_PROFILE_VAR_STOP(blp_accumulate); + BL_PROFILE_VAR_START(blp_accumulate); + // CPU, tiling: atomicAdd local_jx into jx + // (same for jx and jz) + (*jx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); + (*jy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); + (*jz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); + BL_PROFILE_VAR_STOP(blp_accumulate); #endif - } -}; - +} void WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, diff --git a/Source/WarpX.H b/Source/WarpX.H index 251270e21..4ad3d119f 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -251,6 +251,23 @@ public: void WriteSlicePlotFile () const; void ClearSliceMultiFabs (); + // these should be private, but can't due to Cuda limitations + static void ComputeDivB (amrex::MultiFab& divB, int dcomp, + const std::array<const amrex::MultiFab*, 3>& B, + const std::array<amrex::Real,3>& dx); + + static void ComputeDivB (amrex::MultiFab& divB, int dcomp, + const std::array<const amrex::MultiFab*, 3>& B, + const std::array<amrex::Real,3>& dx, int ngrow); + + static void ComputeDivE (amrex::MultiFab& divE, int dcomp, + const std::array<const amrex::MultiFab*, 3>& B, + const std::array<amrex::Real,3>& dx); + + static void ComputeDivE (amrex::MultiFab& divE, int dcomp, + const std::array<const amrex::MultiFab*, 3>& B, + const std::array<amrex::Real,3>& dx, int ngrow); + protected: //! Tagging cells for refinement @@ -384,22 +401,6 @@ private: std::array<std::unique_ptr<amrex::MultiFab>, 3> getInterpolatedB(int lev) const; - static void ComputeDivB (amrex::MultiFab& divB, int dcomp, - const std::array<const amrex::MultiFab*, 3>& B, - const std::array<amrex::Real,3>& dx); - - static void ComputeDivB (amrex::MultiFab& divB, int dcomp, - const std::array<const amrex::MultiFab*, 3>& B, - const std::array<amrex::Real,3>& dx, int ngrow); - - static void ComputeDivE (amrex::MultiFab& divE, int dcomp, - const std::array<const amrex::MultiFab*, 3>& B, - const std::array<amrex::Real,3>& dx); - - static void ComputeDivE (amrex::MultiFab& divE, int dcomp, - const std::array<const amrex::MultiFab*, 3>& B, - const std::array<amrex::Real,3>& dx, int ngrow); - void SyncCurrent (const std::array<const amrex::MultiFab*,3>& fine, const std::array< amrex::MultiFab*,3>& crse, int ref_ratio); |