diff options
Diffstat (limited to 'Source/PhysicalParticleContainer.cpp')
-rw-r--r-- | Source/PhysicalParticleContainer.cpp | 173 |
1 files changed, 112 insertions, 61 deletions
diff --git a/Source/PhysicalParticleContainer.cpp b/Source/PhysicalParticleContainer.cpp index df0ee3b3c..d9fd55afa 100644 --- a/Source/PhysicalParticleContainer.cpp +++ b/Source/PhysicalParticleContainer.cpp @@ -607,7 +607,7 @@ PhysicalParticleContainer::FieldGather (int lev, #pragma omp parallel #endif { - Vector<Real> xp, yp, zp; + RealVector xp, yp, zp; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -722,14 +722,19 @@ PhysicalParticleContainer::Evolve (int lev, #pragma omp parallel #endif { - Vector<Real> xp, yp, zp, giv; - FArrayBox local_rho, local_jx, local_jy, local_jz; + RealVector xp, yp, zp, giv; + + std::unique_ptr<FArrayBox> local_rho(new FArrayBox()); + std::unique_ptr<FArrayBox> local_jx(new FArrayBox()); + std::unique_ptr<FArrayBox> local_jy(new FArrayBox()); + std::unique_ptr<FArrayBox> local_jz(new FArrayBox()); + FArrayBox filtered_Ex, filtered_Ey, filtered_Ez; FArrayBox filtered_Bx, filtered_By, filtered_Bz; std::vector<bool> inexflag; Vector<long> pid; - Vector<Real> tmp; - Vector<ParticleType> particle_tmp; + RealVector tmp; + ParticleVector particle_tmp; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -827,12 +832,21 @@ PhysicalParticleContainer::Evolve (int lev, FArrayBox& jyfab = jy[pti]; FArrayBox& jzfab = jz[pti]; +#ifdef AMREX_USE_CUDA + thrust::fill(thrust::device, thrust::device_ptr<Real>(Exp.data()), thrust::device_ptr<Real>(Exp.data() + np), 0.0); + thrust::fill(thrust::device, thrust::device_ptr<Real>(Eyp.data()), thrust::device_ptr<Real>(Eyp.data() + np), 0.0); + thrust::fill(thrust::device, thrust::device_ptr<Real>(Ezp.data()), thrust::device_ptr<Real>(Ezp.data() + np), 0.0); + thrust::fill(thrust::device, thrust::device_ptr<Real>(Bxp.data()), thrust::device_ptr<Real>(Bxp.data() + np), WarpX::B_external[0]); + thrust::fill(thrust::device, thrust::device_ptr<Real>(Byp.data()), thrust::device_ptr<Real>(Byp.data() + np), WarpX::B_external[1]); + thrust::fill(thrust::device, thrust::device_ptr<Real>(Bzp.data()), thrust::device_ptr<Real>(Bzp.data() + np), WarpX::B_external[2]); +#else Exp.assign(np,0.0); Eyp.assign(np,0.0); Ezp.assign(np,0.0); Bxp.assign(np,WarpX::B_external[0]); Byp.assign(np,WarpX::B_external[1]); Bzp.assign(np,WarpX::B_external[2]); +#endif giv.resize(np); @@ -954,10 +968,10 @@ PhysicalParticleContainer::Evolve (int lev, FArrayBox& rhofab = (*rhomf)[pti]; const std::array<Real, 3>& xyzmin = xyzmin_tile; tile_box.grow(ngRho); - local_rho.resize(tile_box); - local_rho = 0.0; - data_ptr = local_rho.dataPtr(); - rholen = local_rho.length(); + local_rho->resize(tile_box); + local_rho->setVal(0.0); + data_ptr = local_rho->dataPtr(); + rholen = local_rho->length(); #if (AMREX_SPACEDIM == 3) const long nx = rholen[0]-1-2*ngRho; @@ -978,8 +992,12 @@ PhysicalParticleContainer::Evolve (int lev, &lvect, &WarpX::charge_deposition_algo); const int ncomp = 1; - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho), - BL_TO_FORTRAN_N_3D(rhofab,icomp), ncomp); + FArrayBox const* local_fab = local_rho.get(); + FArrayBox* global_fab = &rhofab; + AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx, + { + global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp); + }); } if (np_current < np) @@ -991,12 +1009,12 @@ PhysicalParticleContainer::Evolve (int lev, tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector()); tile_box.grow(ngRho); - local_rho.resize(tile_box); + local_rho->resize(tile_box); - local_rho = 0.0; + local_rho->setVal(0.0); - data_ptr = local_rho.dataPtr(); - rholen = local_rho.length(); + data_ptr = local_rho->dataPtr(); + rholen = local_rho->length(); #if (AMREX_SPACEDIM == 3) const long nx = rholen[0]-1-2*ngRho; @@ -1024,8 +1042,12 @@ PhysicalParticleContainer::Evolve (int lev, FArrayBox& crhofab = (*crhomf)[pti]; const int ncomp = 1; - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho), - BL_TO_FORTRAN_N_3D(crhofab,icomp), ncomp); + FArrayBox const* local_fab = local_rho.get(); + FArrayBox* global_fab = &crhofab; + AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx, + { + global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp); + }); } }; @@ -1175,7 +1197,6 @@ PhysicalParticleContainer::Evolve (int lev, Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag); Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag); Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag); - Box gtbx, gtby, gtbz; const std::array<Real, 3>& xyzmin = xyzmin_tile; @@ -1185,21 +1206,21 @@ PhysicalParticleContainer::Evolve (int lev, tby.grow(ngJ); tbz.grow(ngJ); - local_jx.resize(tbx); - local_jy.resize(tby); - local_jz.resize(tbz); + local_jx->resize(tbx); + local_jy->resize(tby); + local_jz->resize(tbz); - local_jx = 0.0; - local_jy = 0.0; - local_jz = 0.0; + local_jx->setVal(0.0); + local_jy->setVal(0.0); + local_jz->setVal(0.0); - jx_ptr = local_jx.dataPtr(); - jy_ptr = local_jy.dataPtr(); - jz_ptr = local_jz.dataPtr(); + jx_ptr = local_jx->dataPtr(); + jy_ptr = local_jy->dataPtr(); + jz_ptr = local_jz->dataPtr(); - jxntot = local_jx.length(); - jyntot = local_jy.length(); - jzntot = local_jz.length(); + jxntot = local_jx->length(); + jyntot = local_jy->length(); + jzntot = local_jz->length(); warpx_current_deposition( jx_ptr, &ngJ, jxntot, @@ -1216,15 +1237,28 @@ PhysicalParticleContainer::Evolve (int lev, BL_PROFILE_VAR_STOP(blp_pxr_cd); BL_PROFILE_VAR_START(blp_accumulate); - const int ncomp = 1; - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jx), - BL_TO_FORTRAN_3D(jxfab), ncomp); - - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jy), - BL_TO_FORTRAN_3D(jyfab), ncomp); - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jz), - BL_TO_FORTRAN_3D(jzfab), ncomp); + FArrayBox const* local_jx_ptr = local_jx.get(); + FArrayBox* global_jx_ptr = &jxfab; + AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx, + { + global_jx_ptr->atomicAdd(*local_jx_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + FArrayBox const* local_jy_ptr = local_jy.get(); + FArrayBox* global_jy_ptr = &jyfab; + AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx, + { + global_jy_ptr->atomicAdd(*local_jy_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + FArrayBox const* local_jz_ptr = local_jz.get(); + FArrayBox* global_jz_ptr = &jzfab; + AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx, + { + global_jz_ptr->atomicAdd(*local_jz_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + BL_PROFILE_VAR_STOP(blp_accumulate); } @@ -1241,21 +1275,21 @@ PhysicalParticleContainer::Evolve (int lev, tby.grow(ngJ); tbz.grow(ngJ); - local_jx.resize(tbx); - local_jy.resize(tby); - local_jz.resize(tbz); + local_jx->resize(tbx); + local_jy->resize(tby); + local_jz->resize(tbz); - local_jx = 0.0; - local_jy = 0.0; - local_jz = 0.0; + local_jx->setVal(0.0); + local_jy->setVal(0.0); + local_jz->setVal(0.0); - jx_ptr = local_jx.dataPtr(); - jy_ptr = local_jy.dataPtr(); - jz_ptr = local_jz.dataPtr(); + jx_ptr = local_jx->dataPtr(); + jy_ptr = local_jy->dataPtr(); + jz_ptr = local_jz->dataPtr(); - jxntot = local_jx.length(); - jyntot = local_jy.length(); - jzntot = local_jz.length(); + jxntot = local_jx->length(); + jyntot = local_jy->length(); + jzntot = local_jz->length(); long ncrse = np - nfine_current; warpx_current_deposition( @@ -1274,13 +1308,30 @@ PhysicalParticleContainer::Evolve (int lev, FArrayBox& cjyfab = (*cjy)[pti]; FArrayBox& cjzfab = (*cjz)[pti]; - const int ncomp = 1; - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jx), - BL_TO_FORTRAN_3D(cjxfab), ncomp); - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jy), - BL_TO_FORTRAN_3D(cjyfab), ncomp); - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jz), - BL_TO_FORTRAN_3D(cjzfab), ncomp); + BL_PROFILE_VAR_START(blp_accumulate); + + FArrayBox const* local_jx_ptr = local_jx.get(); + FArrayBox* global_jx_ptr = &cjxfab; + AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx, + { + global_jx_ptr->atomicAdd(*local_jx_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + FArrayBox const* local_jy_ptr = local_jy.get(); + FArrayBox* global_jy_ptr = &cjyfab; + AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx, + { + global_jy_ptr->atomicAdd(*local_jy_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + FArrayBox const* local_jz_ptr = local_jz.get(); + FArrayBox* global_jz_ptr = &cjzfab; + AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx, + { + global_jz_ptr->atomicAdd(*local_jz_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + BL_PROFILE_VAR_STOP(blp_accumulate); } // @@ -1304,8 +1355,8 @@ PhysicalParticleContainer::Evolve (int lev, void PhysicalParticleContainer::PushPX(WarpXParIter& pti, - Vector<Real>& xp, Vector<Real>& yp, Vector<Real>& zp, - Vector<Real>& giv, + RealVector& xp, RealVector& yp, RealVector& zp, + RealVector& giv, Real dt) { @@ -1359,7 +1410,7 @@ PhysicalParticleContainer::PushP (int lev, Real dt, #pragma omp parallel #endif { - Vector<Real> xp, yp, zp, giv; + RealVector xp, yp, zp, giv; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -1482,7 +1533,7 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real #pragma omp parallel #endif { - Vector<Real> xp_new, yp_new, zp_new; + RealVector xp_new, yp_new, zp_new; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { |