diff options
Diffstat (limited to 'Source/WarpXParticleContainer.cpp')
-rw-r--r-- | Source/WarpXParticleContainer.cpp | 480 |
1 files changed, 60 insertions, 420 deletions
diff --git a/Source/WarpXParticleContainer.cpp b/Source/WarpXParticleContainer.cpp index a9699071c..ad6979462 100644 --- a/Source/WarpXParticleContainer.cpp +++ b/Source/WarpXParticleContainer.cpp @@ -18,14 +18,14 @@ WarpXParIter::WarpXParIter (ContainerType& pc, int level) #if (AMREX_SPACEDIM == 2) void -WarpXParIter::GetPosition (Cuda::DeviceVector<Real>& x, Cuda::DeviceVector<Real>& y, Cuda::DeviceVector<Real>& z) const +WarpXParIter::GetPosition (Vector<Real>& x, Vector<Real>& y, Vector<Real>& z) const { amrex::ParIter<0,0,PIdx::nattribs>::GetPosition(x, z); y.resize(x.size(), std::numeric_limits<Real>::quiet_NaN()); } void -WarpXParIter::SetPosition (const Cuda::DeviceVector<Real>& x, const Cuda::DeviceVector<Real>& y, const Cuda::DeviceVector<Real>& z) +WarpXParIter::SetPosition (const Vector<Real>& x, const Vector<Real>& y, const Vector<Real>& z) { amrex::ParIter<0,0,PIdx::nattribs>::SetPosition(x, z); } @@ -40,30 +40,6 @@ WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies) } SetParticleSize(); ReadParameters(); - - // Initialize temporary local arrays for charge/current deposition - int num_threads = 1; - #ifdef _OPENMP - #pragma omp parallel - #pragma omp single - num_threads = omp_get_num_threads(); - #endif - local_rho.resize(num_threads); - local_jx.resize(num_threads); - local_jy.resize(num_threads); - local_jz.resize(num_threads); - m_xp.resize(num_threads); - m_yp.resize(num_threads); - m_zp.resize(num_threads); - m_giv.resize(num_threads); - for (int i = 0; i < num_threads; ++i) - { - local_rho[i].reset(nullptr); - local_jx[i].reset(nullptr); - local_jy[i].reset(nullptr); - local_jz[i].reset(nullptr); - } - } void @@ -74,14 +50,10 @@ WarpXParticleContainer::ReadParameters () { ParmParse pp("particles"); -#ifdef AMREX_USE_GPU - do_tiling = false; // By default, tiling is off on GPU -#else - do_tiling = true; -#endif + do_tiling = true; // because the default in amrex is false pp.query("do_tiling", do_tiling); pp.query("do_not_push", do_not_push); - + initialized = true; } } @@ -101,7 +73,7 @@ WarpXParticleContainer::AddOneParticle (int lev, int grid, int tile, const std::array<Real,PIdx::nattribs>& attribs) { auto& particle_tile = GetParticles(lev)[std::make_pair(grid,tile)]; - AddOneParticle(particle_tile, x, y, z, attribs); + AddOneParticle(particle_tile, x, y, z, attribs); } void @@ -120,7 +92,7 @@ WarpXParticleContainer::AddOneParticle (ParticleTileType& particle_tile, p.pos(0) = x; p.pos(1) = z; #endif - + particle_tile.push_back(p); particle_tile.push_back_real(attribs); } @@ -181,341 +153,16 @@ WarpXParticleContainer::AddNParticles (int lev, particle_tile.push_back_real(PIdx::ux, vx + ibegin, vx + iend); particle_tile.push_back_real(PIdx::uy, vy + ibegin, vy + iend); particle_tile.push_back_real(PIdx::uz, vz + ibegin, vz + iend); - + for (int comp = PIdx::uz+1; comp < PIdx::nattribs; ++comp) { particle_tile.push_back_real(comp, np, 0.0); } - } + } Redistribute(); } - -void -WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, - RealVector& wp, RealVector& uxp, - RealVector& uyp, RealVector& uzp, - MultiFab& jx, MultiFab& jy, MultiFab& jz, - MultiFab* cjx, MultiFab* cjy, MultiFab* cjz, - const long np_current, const long np, - int thread_num, int lev, Real dt ) -{ - Real *jx_ptr, *jy_ptr, *jz_ptr; - const int *jxntot, *jyntot, *jzntot; - const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); - const std::array<Real,3>& dx = WarpX::CellSize(lev); - const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0)); - const std::array<Real, 3>& xyzmin = xyzmin_tile; - const long lvect = 8; - - BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); - BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); - - Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag); - Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag); - Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag); - - // WarpX assumes the same number of guard cells for Jx, Jy, Jz - long ngJ = jx.nGrow(); - - // Deposit charge for particles that are not in the current buffers - if (np_current > 0) - { - tbx.grow(ngJ); - tby.grow(ngJ); - tbz.grow(ngJ); - - local_jx[thread_num]->resize(tbx); - local_jy[thread_num]->resize(tby); - local_jz[thread_num]->resize(tbz); - - jx_ptr = local_jx[thread_num]->dataPtr(); - jy_ptr = local_jy[thread_num]->dataPtr(); - jz_ptr = local_jz[thread_num]->dataPtr(); - - FArrayBox* local_jx_ptr = local_jx[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, b, - { - local_jx_ptr->setVal(0.0, b, 0, 1); - }); - - FArrayBox* local_jy_ptr = local_jy[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, b, - { - local_jy_ptr->setVal(0.0, b, 0, 1); - }); - - FArrayBox* local_jz_ptr = local_jz[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, b, - { - local_jz_ptr->setVal(0.0, b, 0, 1); - }); - - jxntot = local_jx[thread_num]->length(); - jyntot = local_jy[thread_num]->length(); - jzntot = local_jz[thread_num]->length(); - - BL_PROFILE_VAR_START(blp_pxr_cd); - warpx_current_deposition( - jx_ptr, &ngJ, jxntot, - jy_ptr, &ngJ, jyntot, - jz_ptr, &ngJ, jzntot, - &np_current, - m_xp[thread_num].dataPtr(), - m_yp[thread_num].dataPtr(), - m_zp[thread_num].dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - m_giv[thread_num].dataPtr(), - wp.dataPtr(), &this->charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dt, &dx[0], &dx[1], &dx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, - &lvect,&WarpX::current_deposition_algo); - BL_PROFILE_VAR_STOP(blp_pxr_cd); - - BL_PROFILE_VAR_START(blp_accumulate); - - FArrayBox const* local_jx_const_ptr = local_jx[thread_num].get(); - FArrayBox* global_jx_ptr = jx.fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx, - { - global_jx_ptr->atomicAdd(*local_jx_const_ptr, thread_bx, thread_bx, 0, 0, 1); - }); - - FArrayBox const* local_jy_const_ptr = local_jy[thread_num].get(); - FArrayBox* global_jy_ptr = jy.fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx, - { - global_jy_ptr->atomicAdd(*local_jy_const_ptr, thread_bx, thread_bx, 0, 0, 1); - }); - - FArrayBox const* local_jz_const_ptr = local_jz[thread_num].get(); - FArrayBox* global_jz_ptr = jz.fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx, - { - global_jz_ptr->atomicAdd(*local_jz_const_ptr, thread_bx, thread_bx, 0, 0, 1); - }); - - BL_PROFILE_VAR_STOP(blp_accumulate); - } - - // Deposit charge for particles that are in the current buffers - if (np_current < np) - { - const IntVect& ref_ratio = WarpX::RefRatio(lev-1); - const Box& ctilebox = amrex::coarsen(pti.tilebox(),ref_ratio); - const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); - - tbx = amrex::convert(ctilebox, WarpX::jx_nodal_flag); - tby = amrex::convert(ctilebox, WarpX::jy_nodal_flag); - tbz = amrex::convert(ctilebox, WarpX::jz_nodal_flag); - tbx.grow(ngJ); - tby.grow(ngJ); - tbz.grow(ngJ); - - local_jx[thread_num]->resize(tbx); - local_jy[thread_num]->resize(tby); - local_jz[thread_num]->resize(tbz); - - jx_ptr = local_jx[thread_num]->dataPtr(); - jy_ptr = local_jy[thread_num]->dataPtr(); - jz_ptr = local_jz[thread_num]->dataPtr(); - - FArrayBox* local_jx_ptr = local_jx[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, b, - { - local_jx_ptr->setVal(0.0, b, 0, 1); - }); - - FArrayBox* local_jy_ptr = local_jy[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, b, - { - local_jy_ptr->setVal(0.0, b, 0, 1); - }); - - FArrayBox* local_jz_ptr = local_jz[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, b, - { - local_jz_ptr->setVal(0.0, b, 0, 1); - }); - jxntot = local_jx[thread_num]->length(); - jyntot = local_jy[thread_num]->length(); - jzntot = local_jz[thread_num]->length(); - - long ncrse = np - np_current; - BL_PROFILE_VAR_START(blp_pxr_cd); - warpx_current_deposition( - jx_ptr, &ngJ, jxntot, - jy_ptr, &ngJ, jyntot, - jz_ptr, &ngJ, jzntot, - &ncrse, - m_xp[thread_num].dataPtr() +np_current, - m_yp[thread_num].dataPtr() +np_current, - m_zp[thread_num].dataPtr() +np_current, - uxp.dataPtr()+np_current, - uyp.dataPtr()+np_current, - uzp.dataPtr()+np_current, - m_giv[thread_num].dataPtr()+np_current, - wp.dataPtr()+np_current, &this->charge, - &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], - &dt, &cdx[0], &cdx[1], &cdx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, - &lvect,&WarpX::current_deposition_algo); - BL_PROFILE_VAR_STOP(blp_pxr_cd); - - BL_PROFILE_VAR_START(blp_accumulate); - - FArrayBox const* local_jx_const_ptr = local_jx[thread_num].get(); - FArrayBox* global_jx_ptr = cjx->fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx, - { - global_jx_ptr->atomicAdd(*local_jx_const_ptr, thread_bx, thread_bx, 0, 0, 1); - }); - - FArrayBox const* local_jy_const_ptr = local_jy[thread_num].get(); - FArrayBox* global_jy_ptr = cjy->fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx, - { - global_jy_ptr->atomicAdd(*local_jy_const_ptr, thread_bx, thread_bx, 0, 0, 1); - }); - - FArrayBox const* local_jz_const_ptr = local_jz[thread_num].get(); - FArrayBox* global_jz_ptr = cjz->fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx, - { - global_jz_ptr->atomicAdd(*local_jz_const_ptr, thread_bx, thread_bx, 0, 0, 1); - }); - - BL_PROFILE_VAR_STOP(blp_accumulate); - } -}; - - -void -WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, - MultiFab* rhomf, MultiFab* crhomf, int icomp, - const long np_current, - const long np, int thread_num, int lev ) -{ - - BL_PROFILE_VAR_NS("PICSAR::ChargeDeposition", blp_pxr_chd); - BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); - - const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); - const long lvect = 8; - - long ngRho = rhomf->nGrow(); - Real* data_ptr; - Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector()); - const int *rholen; - - const std::array<Real,3>& dx = WarpX::CellSize(lev); - const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0)); - - // Deposit charge for particles that are not in the current buffers - if (np_current > 0) - { - const std::array<Real, 3>& xyzmin = xyzmin_tile; - tile_box.grow(ngRho); - local_rho[thread_num]->resize(tile_box); - FArrayBox* local_rho_ptr = local_rho[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, b, - { - local_rho_ptr->setVal(0.0, b, 0, 1); - }); - - data_ptr = local_rho[thread_num]->dataPtr(); - rholen = local_rho[thread_num]->length(); -#if (AMREX_SPACEDIM == 3) - const long nx = rholen[0]-1-2*ngRho; - const long ny = rholen[1]-1-2*ngRho; - const long nz = rholen[2]-1-2*ngRho; -#else - const long nx = rholen[0]-1-2*ngRho; - const long ny = 0; - const long nz = rholen[1]-1-2*ngRho; -#endif - BL_PROFILE_VAR_START(blp_pxr_chd); - warpx_charge_deposition(data_ptr, &np_current, - m_xp[thread_num].dataPtr(), - m_yp[thread_num].dataPtr(), - m_zp[thread_num].dataPtr(), - wp.dataPtr(), - &this->charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dx[0], &dx[1], &dx[2], &nx, &ny, &nz, - &ngRho, &ngRho, &ngRho, - &WarpX::nox,&WarpX::noy,&WarpX::noz, - &lvect, &WarpX::charge_deposition_algo); - BL_PROFILE_VAR_STOP(blp_pxr_chd); - - const int ncomp = 1; - FArrayBox const* local_fab = local_rho[thread_num].get(); - FArrayBox* global_fab = rhomf->fabPtr(pti); - BL_PROFILE_VAR_START(blp_accumulate); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx, - { - global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp); - }); - BL_PROFILE_VAR_STOP(blp_accumulate); - } - - // Deposit charge for particles that are in the current buffers - if (np_current < np) - { - const IntVect& ref_ratio = WarpX::RefRatio(lev-1); - const Box& ctilebox = amrex::coarsen(pti.tilebox(), ref_ratio); - const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); - - tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector()); - tile_box.grow(ngRho); - local_rho[thread_num]->resize(tile_box); - FArrayBox* local_rho_ptr = local_rho[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, b, - { - local_rho_ptr->setVal(0.0, b, 0, 1); - }); - - data_ptr = local_rho[thread_num]->dataPtr(); - rholen = local_rho[thread_num]->length(); -#if (AMREX_SPACEDIM == 3) - const long nx = rholen[0]-1-2*ngRho; - const long ny = rholen[1]-1-2*ngRho; - const long nz = rholen[2]-1-2*ngRho; -#else - const long nx = rholen[0]-1-2*ngRho; - const long ny = 0; - const long nz = rholen[1]-1-2*ngRho; -#endif - - long ncrse = np - np_current; - BL_PROFILE_VAR_START(blp_pxr_chd); - warpx_charge_deposition(data_ptr, &ncrse, - m_xp[thread_num].dataPtr() + np_current, - m_yp[thread_num].dataPtr() + np_current, - m_zp[thread_num].dataPtr() + np_current, - wp.dataPtr() + np_current, - &this->charge, - &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], - &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz, - &ngRho, &ngRho, &ngRho, - &WarpX::nox,&WarpX::noy,&WarpX::noz, - &lvect, &WarpX::charge_deposition_algo); - BL_PROFILE_VAR_STOP(blp_pxr_chd); - - const int ncomp = 1; - FArrayBox const* local_fab = local_rho[thread_num].get(); - FArrayBox* global_fab = crhomf->fabPtr(pti); - BL_PROFILE_VAR_START(blp_accumulate); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx, - { - global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp); - }); - BL_PROFILE_VAR_STOP(blp_accumulate); - } -}; - void WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, bool local) { @@ -525,32 +172,32 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, // each level deposits it's own particles const int ng = rho[0]->nGrow(); - for (int lev = 0; lev < num_levels; ++lev) { + for (int lev = 0; lev < num_levels; ++lev) { rho[lev]->setVal(0.0, ng); const auto& gm = m_gdb->Geom(lev); const auto& ba = m_gdb->ParticleBoxArray(lev); const auto& dm = m_gdb->DistributionMap(lev); - + const Real* dx = gm.CellSize(); const Real* plo = gm.ProbLo(); BoxArray nba = ba; nba.surroundingNodes(); - + for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { const Box& box = nba[pti]; - + auto& wp = pti.GetAttribs(PIdx::w); const auto& particles = pti.GetArrayOfStructs(); int nstride = particles.dataShape().first; const long np = pti.numParticles(); - + FArrayBox& rhofab = (*rho[lev])[pti]; - - WRPX_DEPOSIT_CIC(particles.dataPtr(), nstride, np, - wp.dataPtr(), &this->charge, - rhofab.dataPtr(), box.loVect(), box.hiVect(), + + WRPX_DEPOSIT_CIC(particles.data(), nstride, np, + wp.data(), &this->charge, + rhofab.dataPtr(), box.loVect(), box.hiVect(), plo, dx, &ng); } @@ -564,12 +211,12 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, const DistributionMapping& fine_dm = rho[lev+1]->DistributionMap(); BoxArray coarsened_fine_BA = fine_BA; coarsened_fine_BA.coarsen(m_gdb->refRatio(lev)); - + MultiFab coarsened_fine_data(coarsened_fine_BA, fine_dm, 1, 0); coarsened_fine_data.setVal(0.0); - + IntVect ratio(AMREX_D_DECL(2, 2, 2)); // FIXME - + for (MFIter mfi(coarsened_fine_data); mfi.isValid(); ++mfi) { const Box& bx = mfi.validbox(); const Box& crse_box = coarsened_fine_data[mfi].box(); @@ -578,7 +225,7 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, coarsened_fine_data[mfi].dataPtr(), crse_box.loVect(), crse_box.hiVect(), (*rho[lev+1])[mfi].dataPtr(), fine_box.loVect(), fine_box.hiVect()); } - + rho[lev]->copy(coarsened_fine_data, m_gdb->Geom(lev).periodicity(), FabArrayBase::ADD); } } @@ -603,19 +250,19 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local) #pragma omp parallel #endif { - Cuda::DeviceVector<Real> xp, yp, zp; + Vector<Real> xp, yp, zp; FArrayBox local_rho; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { const Box& box = pti.validbox(); - + auto& wp = pti.GetAttribs(PIdx::w); - + const long np = pti.numParticles(); pti.GetPosition(xp, yp, zp); - + const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); @@ -651,26 +298,26 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local) long nyg = ng; long nzg = ng; long lvect = 8; - + warpx_charge_deposition(data_ptr, - &np, - xp.dataPtr(), - yp.dataPtr(), - zp.dataPtr(), wp.dataPtr(), - &this->charge, &xyzmin[0], &xyzmin[1], &xyzmin[2], + &np, xp.data(), yp.data(), zp.data(), wp.data(), + &this->charge, &xyzmin[0], &xyzmin[1], &xyzmin[2], &dx[0], &dx[1], &dx[2], &nx, &ny, &nz, &nxg, &nyg, &nzg, &WarpX::nox,&WarpX::noy,&WarpX::noz, &lvect, &WarpX::charge_deposition_algo); - + #ifdef _OPENMP - rhofab.atomicAdd(local_rho); + const Box& fabbox = rhofab.box(); + const int ncomp = 1; + amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho), + BL_TO_FORTRAN_3D(rhofab), ncomp); #endif } - + } if (!local) rho->SumBoundary(gm.periodicity()); - + return rho; } @@ -680,7 +327,7 @@ Real WarpXParticleContainer::sumParticleCharge(bool local) { for (int lev = 0; lev < finestLevel(); ++lev) { - + #ifdef _OPENMP #pragma omp parallel reduction(+:total_charge) #endif @@ -709,7 +356,7 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) { amrex::Real inv_clight_sq = 1.0/PhysConst::c/PhysConst::c; for (int lev = 0; lev <= finestLevel(); ++lev) { - + #ifdef _OPENMP #pragma omp parallel reduction(+:vx_total, vy_total, vz_total, np_total) #endif @@ -718,9 +365,9 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) { auto& ux = pti.GetAttribs(PIdx::ux); auto& uy = pti.GetAttribs(PIdx::uy); auto& uz = pti.GetAttribs(PIdx::uz); - + np_total += pti.numParticles(); - + for (unsigned long i = 0; i < ux.size(); i++) { Real usq = (ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i])*inv_clight_sq; Real gaminv = 1.0/std::sqrt(1.0 + usq); @@ -730,14 +377,14 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) { } } } - + if (!local) { ParallelDescriptor::ReduceRealSum(vx_total); ParallelDescriptor::ReduceRealSum(vy_total); ParallelDescriptor::ReduceRealSum(vz_total); ParallelDescriptor::ReduceLongSum(np_total); } - + std::array<Real, 3> mean_v; if (np_total > 0) { mean_v[0] = vx_total / np_total; @@ -768,7 +415,7 @@ Real WarpXParticleContainer::maxParticleVelocity(bool local) { } } } - + if (!local) ParallelDescriptor::ReduceRealMax(max_v); return max_v; } @@ -780,23 +427,23 @@ WarpXParticleContainer::PushXES (Real dt) int num_levels = finestLevel() + 1; - for (int lev = 0; lev < num_levels; ++lev) { + for (int lev = 0; lev < num_levels; ++lev) { const auto& gm = m_gdb->Geom(lev); const RealBox& prob_domain = gm.ProbDomain(); for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { auto& particles = pti.GetArrayOfStructs(); - int nstride = particles.dataShape().first; + int nstride = particles.dataShape().first; const long np = pti.numParticles(); - - auto& attribs = pti.GetAttribs(); + + auto& attribs = pti.GetAttribs(); auto& uxp = attribs[PIdx::ux]; auto& uyp = attribs[PIdx::uy]; auto& uzp = attribs[PIdx::uz]; - - WRPX_PUSH_LEAPFROG_POSITIONS(particles.dataPtr(), nstride, np, - uxp.dataPtr(), uyp.dataPtr(), + + WRPX_PUSH_LEAPFROG_POSITIONS(particles.data(), nstride, np, + uxp.data(), uyp.data(), #if AMREX_SPACEDIM == 3 - uzp.dataPtr(), + uzp.data(), #endif &dt, prob_domain.lo(), prob_domain.hi()); @@ -827,7 +474,7 @@ WarpXParticleContainer::PushX (int lev, Real dt) #pragma omp parallel #endif { - Cuda::DeviceVector<Real> xp, yp, zp, giv; + Vector<Real> xp, yp, zp, giv; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -838,30 +485,26 @@ WarpXParticleContainer::PushX (int lev, Real dt) auto& uxp = attribs[PIdx::ux]; auto& uyp = attribs[PIdx::uy]; auto& uzp = attribs[PIdx::uz]; - + const long np = pti.numParticles(); - + giv.resize(np); - + // // copy data from particle container to temp arrays // BL_PROFILE_VAR_START(blp_copy); pti.GetPosition(xp, yp, zp); BL_PROFILE_VAR_STOP(blp_copy); - + // // Particle Push // BL_PROFILE_VAR_START(blp_pxr_pp); - warpx_particle_pusher_positions(&np, - xp.dataPtr(), - yp.dataPtr(), - zp.dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - giv.dataPtr(), &dt); + warpx_particle_pusher_positions(&np, xp.data(), yp.data(), zp.data(), + uxp.data(), uyp.data(), uzp.data(), giv.data(), &dt); BL_PROFILE_VAR_STOP(blp_pxr_pp); - + // // copy particle data back // @@ -872,12 +515,9 @@ WarpXParticleContainer::PushX (int lev, Real dt) if (cost) { const Box& tbx = pti.tilebox(); wt = (amrex::second() - wt) / tbx.d_numPts(); - FArrayBox* costfab = cost->fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box, - { - costfab->plus(wt, work_box); - }); + (*cost)[pti].plus(wt, tbx); } } } } + |