diff options
author | 2018-12-12 10:36:42 -0800 | |
---|---|---|
committer | 2018-12-12 10:36:42 -0800 | |
commit | 86ff3fa5cfb5695f55cd514d8df0e99b81187cce (patch) | |
tree | 22c9f627f37ad905bee89782cb71b4a14e468d53 /Source/WarpXParticleContainer.cpp | |
parent | 22dd9a7bdbfcf07fb3ad5d442011e4423cddbd6c (diff) | |
download | WarpX-86ff3fa5cfb5695f55cd514d8df0e99b81187cce.tar.gz WarpX-86ff3fa5cfb5695f55cd514d8df0e99b81187cce.tar.zst WarpX-86ff3fa5cfb5695f55cd514d8df0e99b81187cce.zip |
manually reverting back to last known working version of WarpX
Diffstat (limited to 'Source/WarpXParticleContainer.cpp')
-rw-r--r-- | Source/WarpXParticleContainer.cpp | 480 |
1 files changed, 420 insertions, 60 deletions
diff --git a/Source/WarpXParticleContainer.cpp b/Source/WarpXParticleContainer.cpp index ad6979462..a9699071c 100644 --- a/Source/WarpXParticleContainer.cpp +++ b/Source/WarpXParticleContainer.cpp @@ -18,14 +18,14 @@ WarpXParIter::WarpXParIter (ContainerType& pc, int level) #if (AMREX_SPACEDIM == 2) void -WarpXParIter::GetPosition (Vector<Real>& x, Vector<Real>& y, Vector<Real>& z) const +WarpXParIter::GetPosition (Cuda::DeviceVector<Real>& x, Cuda::DeviceVector<Real>& y, Cuda::DeviceVector<Real>& z) const { amrex::ParIter<0,0,PIdx::nattribs>::GetPosition(x, z); y.resize(x.size(), std::numeric_limits<Real>::quiet_NaN()); } void -WarpXParIter::SetPosition (const Vector<Real>& x, const Vector<Real>& y, const Vector<Real>& z) +WarpXParIter::SetPosition (const Cuda::DeviceVector<Real>& x, const Cuda::DeviceVector<Real>& y, const Cuda::DeviceVector<Real>& z) { amrex::ParIter<0,0,PIdx::nattribs>::SetPosition(x, z); } @@ -40,6 +40,30 @@ WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies) } SetParticleSize(); ReadParameters(); + + // Initialize temporary local arrays for charge/current deposition + int num_threads = 1; + #ifdef _OPENMP + #pragma omp parallel + #pragma omp single + num_threads = omp_get_num_threads(); + #endif + local_rho.resize(num_threads); + local_jx.resize(num_threads); + local_jy.resize(num_threads); + local_jz.resize(num_threads); + m_xp.resize(num_threads); + m_yp.resize(num_threads); + m_zp.resize(num_threads); + m_giv.resize(num_threads); + for (int i = 0; i < num_threads; ++i) + { + local_rho[i].reset(nullptr); + local_jx[i].reset(nullptr); + local_jy[i].reset(nullptr); + local_jz[i].reset(nullptr); + } + } void @@ -50,10 +74,14 @@ WarpXParticleContainer::ReadParameters () { ParmParse pp("particles"); - do_tiling = true; // because the default in amrex is false +#ifdef AMREX_USE_GPU + do_tiling = false; // By default, tiling is off on GPU +#else + do_tiling = true; +#endif pp.query("do_tiling", do_tiling); pp.query("do_not_push", do_not_push); - + initialized = true; } } @@ -73,7 +101,7 @@ WarpXParticleContainer::AddOneParticle (int lev, int grid, int tile, const std::array<Real,PIdx::nattribs>& attribs) { auto& particle_tile = GetParticles(lev)[std::make_pair(grid,tile)]; - AddOneParticle(particle_tile, x, y, z, attribs); + AddOneParticle(particle_tile, x, y, z, attribs); } void @@ -92,7 +120,7 @@ WarpXParticleContainer::AddOneParticle (ParticleTileType& particle_tile, p.pos(0) = x; p.pos(1) = z; #endif - + particle_tile.push_back(p); particle_tile.push_back_real(attribs); } @@ -153,16 +181,341 @@ WarpXParticleContainer::AddNParticles (int lev, particle_tile.push_back_real(PIdx::ux, vx + ibegin, vx + iend); particle_tile.push_back_real(PIdx::uy, vy + ibegin, vy + iend); particle_tile.push_back_real(PIdx::uz, vz + ibegin, vz + iend); - + for (int comp = PIdx::uz+1; comp < PIdx::nattribs; ++comp) { particle_tile.push_back_real(comp, np, 0.0); } - } + } Redistribute(); } + +void +WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, + RealVector& wp, RealVector& uxp, + RealVector& uyp, RealVector& uzp, + MultiFab& jx, MultiFab& jy, MultiFab& jz, + MultiFab* cjx, MultiFab* cjy, MultiFab* cjz, + const long np_current, const long np, + int thread_num, int lev, Real dt ) +{ + Real *jx_ptr, *jy_ptr, *jz_ptr; + const int *jxntot, *jyntot, *jzntot; + const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); + const std::array<Real,3>& dx = WarpX::CellSize(lev); + const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0)); + const std::array<Real, 3>& xyzmin = xyzmin_tile; + const long lvect = 8; + + BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); + BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); + + Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag); + Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag); + Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag); + + // WarpX assumes the same number of guard cells for Jx, Jy, Jz + long ngJ = jx.nGrow(); + + // Deposit charge for particles that are not in the current buffers + if (np_current > 0) + { + tbx.grow(ngJ); + tby.grow(ngJ); + tbz.grow(ngJ); + + local_jx[thread_num]->resize(tbx); + local_jy[thread_num]->resize(tby); + local_jz[thread_num]->resize(tbz); + + jx_ptr = local_jx[thread_num]->dataPtr(); + jy_ptr = local_jy[thread_num]->dataPtr(); + jz_ptr = local_jz[thread_num]->dataPtr(); + + FArrayBox* local_jx_ptr = local_jx[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, b, + { + local_jx_ptr->setVal(0.0, b, 0, 1); + }); + + FArrayBox* local_jy_ptr = local_jy[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, b, + { + local_jy_ptr->setVal(0.0, b, 0, 1); + }); + + FArrayBox* local_jz_ptr = local_jz[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, b, + { + local_jz_ptr->setVal(0.0, b, 0, 1); + }); + + jxntot = local_jx[thread_num]->length(); + jyntot = local_jy[thread_num]->length(); + jzntot = local_jz[thread_num]->length(); + + BL_PROFILE_VAR_START(blp_pxr_cd); + warpx_current_deposition( + jx_ptr, &ngJ, jxntot, + jy_ptr, &ngJ, jyntot, + jz_ptr, &ngJ, jzntot, + &np_current, + m_xp[thread_num].dataPtr(), + m_yp[thread_num].dataPtr(), + m_zp[thread_num].dataPtr(), + uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), + m_giv[thread_num].dataPtr(), + wp.dataPtr(), &this->charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dt, &dx[0], &dx[1], &dx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, + &lvect,&WarpX::current_deposition_algo); + BL_PROFILE_VAR_STOP(blp_pxr_cd); + + BL_PROFILE_VAR_START(blp_accumulate); + + FArrayBox const* local_jx_const_ptr = local_jx[thread_num].get(); + FArrayBox* global_jx_ptr = jx.fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx, + { + global_jx_ptr->atomicAdd(*local_jx_const_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + FArrayBox const* local_jy_const_ptr = local_jy[thread_num].get(); + FArrayBox* global_jy_ptr = jy.fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx, + { + global_jy_ptr->atomicAdd(*local_jy_const_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + FArrayBox const* local_jz_const_ptr = local_jz[thread_num].get(); + FArrayBox* global_jz_ptr = jz.fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx, + { + global_jz_ptr->atomicAdd(*local_jz_const_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + BL_PROFILE_VAR_STOP(blp_accumulate); + } + + // Deposit charge for particles that are in the current buffers + if (np_current < np) + { + const IntVect& ref_ratio = WarpX::RefRatio(lev-1); + const Box& ctilebox = amrex::coarsen(pti.tilebox(),ref_ratio); + const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); + + tbx = amrex::convert(ctilebox, WarpX::jx_nodal_flag); + tby = amrex::convert(ctilebox, WarpX::jy_nodal_flag); + tbz = amrex::convert(ctilebox, WarpX::jz_nodal_flag); + tbx.grow(ngJ); + tby.grow(ngJ); + tbz.grow(ngJ); + + local_jx[thread_num]->resize(tbx); + local_jy[thread_num]->resize(tby); + local_jz[thread_num]->resize(tbz); + + jx_ptr = local_jx[thread_num]->dataPtr(); + jy_ptr = local_jy[thread_num]->dataPtr(); + jz_ptr = local_jz[thread_num]->dataPtr(); + + FArrayBox* local_jx_ptr = local_jx[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, b, + { + local_jx_ptr->setVal(0.0, b, 0, 1); + }); + + FArrayBox* local_jy_ptr = local_jy[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, b, + { + local_jy_ptr->setVal(0.0, b, 0, 1); + }); + + FArrayBox* local_jz_ptr = local_jz[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, b, + { + local_jz_ptr->setVal(0.0, b, 0, 1); + }); + jxntot = local_jx[thread_num]->length(); + jyntot = local_jy[thread_num]->length(); + jzntot = local_jz[thread_num]->length(); + + long ncrse = np - np_current; + BL_PROFILE_VAR_START(blp_pxr_cd); + warpx_current_deposition( + jx_ptr, &ngJ, jxntot, + jy_ptr, &ngJ, jyntot, + jz_ptr, &ngJ, jzntot, + &ncrse, + m_xp[thread_num].dataPtr() +np_current, + m_yp[thread_num].dataPtr() +np_current, + m_zp[thread_num].dataPtr() +np_current, + uxp.dataPtr()+np_current, + uyp.dataPtr()+np_current, + uzp.dataPtr()+np_current, + m_giv[thread_num].dataPtr()+np_current, + wp.dataPtr()+np_current, &this->charge, + &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], + &dt, &cdx[0], &cdx[1], &cdx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, + &lvect,&WarpX::current_deposition_algo); + BL_PROFILE_VAR_STOP(blp_pxr_cd); + + BL_PROFILE_VAR_START(blp_accumulate); + + FArrayBox const* local_jx_const_ptr = local_jx[thread_num].get(); + FArrayBox* global_jx_ptr = cjx->fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx, + { + global_jx_ptr->atomicAdd(*local_jx_const_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + FArrayBox const* local_jy_const_ptr = local_jy[thread_num].get(); + FArrayBox* global_jy_ptr = cjy->fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx, + { + global_jy_ptr->atomicAdd(*local_jy_const_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + FArrayBox const* local_jz_const_ptr = local_jz[thread_num].get(); + FArrayBox* global_jz_ptr = cjz->fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx, + { + global_jz_ptr->atomicAdd(*local_jz_const_ptr, thread_bx, thread_bx, 0, 0, 1); + }); + + BL_PROFILE_VAR_STOP(blp_accumulate); + } +}; + + +void +WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, + MultiFab* rhomf, MultiFab* crhomf, int icomp, + const long np_current, + const long np, int thread_num, int lev ) +{ + + BL_PROFILE_VAR_NS("PICSAR::ChargeDeposition", blp_pxr_chd); + BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); + + const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); + const long lvect = 8; + + long ngRho = rhomf->nGrow(); + Real* data_ptr; + Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector()); + const int *rholen; + + const std::array<Real,3>& dx = WarpX::CellSize(lev); + const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0)); + + // Deposit charge for particles that are not in the current buffers + if (np_current > 0) + { + const std::array<Real, 3>& xyzmin = xyzmin_tile; + tile_box.grow(ngRho); + local_rho[thread_num]->resize(tile_box); + FArrayBox* local_rho_ptr = local_rho[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, b, + { + local_rho_ptr->setVal(0.0, b, 0, 1); + }); + + data_ptr = local_rho[thread_num]->dataPtr(); + rholen = local_rho[thread_num]->length(); +#if (AMREX_SPACEDIM == 3) + const long nx = rholen[0]-1-2*ngRho; + const long ny = rholen[1]-1-2*ngRho; + const long nz = rholen[2]-1-2*ngRho; +#else + const long nx = rholen[0]-1-2*ngRho; + const long ny = 0; + const long nz = rholen[1]-1-2*ngRho; +#endif + BL_PROFILE_VAR_START(blp_pxr_chd); + warpx_charge_deposition(data_ptr, &np_current, + m_xp[thread_num].dataPtr(), + m_yp[thread_num].dataPtr(), + m_zp[thread_num].dataPtr(), + wp.dataPtr(), + &this->charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dx[0], &dx[1], &dx[2], &nx, &ny, &nz, + &ngRho, &ngRho, &ngRho, + &WarpX::nox,&WarpX::noy,&WarpX::noz, + &lvect, &WarpX::charge_deposition_algo); + BL_PROFILE_VAR_STOP(blp_pxr_chd); + + const int ncomp = 1; + FArrayBox const* local_fab = local_rho[thread_num].get(); + FArrayBox* global_fab = rhomf->fabPtr(pti); + BL_PROFILE_VAR_START(blp_accumulate); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx, + { + global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp); + }); + BL_PROFILE_VAR_STOP(blp_accumulate); + } + + // Deposit charge for particles that are in the current buffers + if (np_current < np) + { + const IntVect& ref_ratio = WarpX::RefRatio(lev-1); + const Box& ctilebox = amrex::coarsen(pti.tilebox(), ref_ratio); + const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); + + tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector()); + tile_box.grow(ngRho); + local_rho[thread_num]->resize(tile_box); + FArrayBox* local_rho_ptr = local_rho[thread_num].get(); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, b, + { + local_rho_ptr->setVal(0.0, b, 0, 1); + }); + + data_ptr = local_rho[thread_num]->dataPtr(); + rholen = local_rho[thread_num]->length(); +#if (AMREX_SPACEDIM == 3) + const long nx = rholen[0]-1-2*ngRho; + const long ny = rholen[1]-1-2*ngRho; + const long nz = rholen[2]-1-2*ngRho; +#else + const long nx = rholen[0]-1-2*ngRho; + const long ny = 0; + const long nz = rholen[1]-1-2*ngRho; +#endif + + long ncrse = np - np_current; + BL_PROFILE_VAR_START(blp_pxr_chd); + warpx_charge_deposition(data_ptr, &ncrse, + m_xp[thread_num].dataPtr() + np_current, + m_yp[thread_num].dataPtr() + np_current, + m_zp[thread_num].dataPtr() + np_current, + wp.dataPtr() + np_current, + &this->charge, + &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], + &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz, + &ngRho, &ngRho, &ngRho, + &WarpX::nox,&WarpX::noy,&WarpX::noz, + &lvect, &WarpX::charge_deposition_algo); + BL_PROFILE_VAR_STOP(blp_pxr_chd); + + const int ncomp = 1; + FArrayBox const* local_fab = local_rho[thread_num].get(); + FArrayBox* global_fab = crhomf->fabPtr(pti); + BL_PROFILE_VAR_START(blp_accumulate); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx, + { + global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp); + }); + BL_PROFILE_VAR_STOP(blp_accumulate); + } +}; + void WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, bool local) { @@ -172,32 +525,32 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, // each level deposits it's own particles const int ng = rho[0]->nGrow(); - for (int lev = 0; lev < num_levels; ++lev) { + for (int lev = 0; lev < num_levels; ++lev) { rho[lev]->setVal(0.0, ng); const auto& gm = m_gdb->Geom(lev); const auto& ba = m_gdb->ParticleBoxArray(lev); const auto& dm = m_gdb->DistributionMap(lev); - + const Real* dx = gm.CellSize(); const Real* plo = gm.ProbLo(); BoxArray nba = ba; nba.surroundingNodes(); - + for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { const Box& box = nba[pti]; - + auto& wp = pti.GetAttribs(PIdx::w); const auto& particles = pti.GetArrayOfStructs(); int nstride = particles.dataShape().first; const long np = pti.numParticles(); - + FArrayBox& rhofab = (*rho[lev])[pti]; - - WRPX_DEPOSIT_CIC(particles.data(), nstride, np, - wp.data(), &this->charge, - rhofab.dataPtr(), box.loVect(), box.hiVect(), + + WRPX_DEPOSIT_CIC(particles.dataPtr(), nstride, np, + wp.dataPtr(), &this->charge, + rhofab.dataPtr(), box.loVect(), box.hiVect(), plo, dx, &ng); } @@ -211,12 +564,12 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, const DistributionMapping& fine_dm = rho[lev+1]->DistributionMap(); BoxArray coarsened_fine_BA = fine_BA; coarsened_fine_BA.coarsen(m_gdb->refRatio(lev)); - + MultiFab coarsened_fine_data(coarsened_fine_BA, fine_dm, 1, 0); coarsened_fine_data.setVal(0.0); - + IntVect ratio(AMREX_D_DECL(2, 2, 2)); // FIXME - + for (MFIter mfi(coarsened_fine_data); mfi.isValid(); ++mfi) { const Box& bx = mfi.validbox(); const Box& crse_box = coarsened_fine_data[mfi].box(); @@ -225,7 +578,7 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, coarsened_fine_data[mfi].dataPtr(), crse_box.loVect(), crse_box.hiVect(), (*rho[lev+1])[mfi].dataPtr(), fine_box.loVect(), fine_box.hiVect()); } - + rho[lev]->copy(coarsened_fine_data, m_gdb->Geom(lev).periodicity(), FabArrayBase::ADD); } } @@ -250,19 +603,19 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local) #pragma omp parallel #endif { - Vector<Real> xp, yp, zp; + Cuda::DeviceVector<Real> xp, yp, zp; FArrayBox local_rho; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { const Box& box = pti.validbox(); - + auto& wp = pti.GetAttribs(PIdx::w); - + const long np = pti.numParticles(); pti.GetPosition(xp, yp, zp); - + const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); @@ -298,26 +651,26 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local) long nyg = ng; long nzg = ng; long lvect = 8; - + warpx_charge_deposition(data_ptr, - &np, xp.data(), yp.data(), zp.data(), wp.data(), - &this->charge, &xyzmin[0], &xyzmin[1], &xyzmin[2], + &np, + xp.dataPtr(), + yp.dataPtr(), + zp.dataPtr(), wp.dataPtr(), + &this->charge, &xyzmin[0], &xyzmin[1], &xyzmin[2], &dx[0], &dx[1], &dx[2], &nx, &ny, &nz, &nxg, &nyg, &nzg, &WarpX::nox,&WarpX::noy,&WarpX::noz, &lvect, &WarpX::charge_deposition_algo); - + #ifdef _OPENMP - const Box& fabbox = rhofab.box(); - const int ncomp = 1; - amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho), - BL_TO_FORTRAN_3D(rhofab), ncomp); + rhofab.atomicAdd(local_rho); #endif } - + } if (!local) rho->SumBoundary(gm.periodicity()); - + return rho; } @@ -327,7 +680,7 @@ Real WarpXParticleContainer::sumParticleCharge(bool local) { for (int lev = 0; lev < finestLevel(); ++lev) { - + #ifdef _OPENMP #pragma omp parallel reduction(+:total_charge) #endif @@ -356,7 +709,7 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) { amrex::Real inv_clight_sq = 1.0/PhysConst::c/PhysConst::c; for (int lev = 0; lev <= finestLevel(); ++lev) { - + #ifdef _OPENMP #pragma omp parallel reduction(+:vx_total, vy_total, vz_total, np_total) #endif @@ -365,9 +718,9 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) { auto& ux = pti.GetAttribs(PIdx::ux); auto& uy = pti.GetAttribs(PIdx::uy); auto& uz = pti.GetAttribs(PIdx::uz); - + np_total += pti.numParticles(); - + for (unsigned long i = 0; i < ux.size(); i++) { Real usq = (ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i])*inv_clight_sq; Real gaminv = 1.0/std::sqrt(1.0 + usq); @@ -377,14 +730,14 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) { } } } - + if (!local) { ParallelDescriptor::ReduceRealSum(vx_total); ParallelDescriptor::ReduceRealSum(vy_total); ParallelDescriptor::ReduceRealSum(vz_total); ParallelDescriptor::ReduceLongSum(np_total); } - + std::array<Real, 3> mean_v; if (np_total > 0) { mean_v[0] = vx_total / np_total; @@ -415,7 +768,7 @@ Real WarpXParticleContainer::maxParticleVelocity(bool local) { } } } - + if (!local) ParallelDescriptor::ReduceRealMax(max_v); return max_v; } @@ -427,23 +780,23 @@ WarpXParticleContainer::PushXES (Real dt) int num_levels = finestLevel() + 1; - for (int lev = 0; lev < num_levels; ++lev) { + for (int lev = 0; lev < num_levels; ++lev) { const auto& gm = m_gdb->Geom(lev); const RealBox& prob_domain = gm.ProbDomain(); for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { auto& particles = pti.GetArrayOfStructs(); - int nstride = particles.dataShape().first; + int nstride = particles.dataShape().first; const long np = pti.numParticles(); - - auto& attribs = pti.GetAttribs(); + + auto& attribs = pti.GetAttribs(); auto& uxp = attribs[PIdx::ux]; auto& uyp = attribs[PIdx::uy]; auto& uzp = attribs[PIdx::uz]; - - WRPX_PUSH_LEAPFROG_POSITIONS(particles.data(), nstride, np, - uxp.data(), uyp.data(), + + WRPX_PUSH_LEAPFROG_POSITIONS(particles.dataPtr(), nstride, np, + uxp.dataPtr(), uyp.dataPtr(), #if AMREX_SPACEDIM == 3 - uzp.data(), + uzp.dataPtr(), #endif &dt, prob_domain.lo(), prob_domain.hi()); @@ -474,7 +827,7 @@ WarpXParticleContainer::PushX (int lev, Real dt) #pragma omp parallel #endif { - Vector<Real> xp, yp, zp, giv; + Cuda::DeviceVector<Real> xp, yp, zp, giv; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -485,26 +838,30 @@ WarpXParticleContainer::PushX (int lev, Real dt) auto& uxp = attribs[PIdx::ux]; auto& uyp = attribs[PIdx::uy]; auto& uzp = attribs[PIdx::uz]; - + const long np = pti.numParticles(); - + giv.resize(np); - + // // copy data from particle container to temp arrays // BL_PROFILE_VAR_START(blp_copy); pti.GetPosition(xp, yp, zp); BL_PROFILE_VAR_STOP(blp_copy); - + // // Particle Push // BL_PROFILE_VAR_START(blp_pxr_pp); - warpx_particle_pusher_positions(&np, xp.data(), yp.data(), zp.data(), - uxp.data(), uyp.data(), uzp.data(), giv.data(), &dt); + warpx_particle_pusher_positions(&np, + xp.dataPtr(), + yp.dataPtr(), + zp.dataPtr(), + uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), + giv.dataPtr(), &dt); BL_PROFILE_VAR_STOP(blp_pxr_pp); - + // // copy particle data back // @@ -515,9 +872,12 @@ WarpXParticleContainer::PushX (int lev, Real dt) if (cost) { const Box& tbx = pti.tilebox(); wt = (amrex::second() - wt) / tbx.d_numPts(); - (*cost)[pti].plus(wt, tbx); + FArrayBox* costfab = cost->fabPtr(pti); + AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box, + { + costfab->plus(wt, work_box); + }); } } } } - |