From 8bfa3488bae938d4c1c4ec9f71eababa51556324 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Fri, 8 Mar 2019 11:10:17 -0800 Subject: DeviceVector -> ManagedDeviceVector --- Source/Particles/WarpXParticleContainer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index ad80f7c4f..d54dd261d 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -18,14 +18,14 @@ WarpXParIter::WarpXParIter (ContainerType& pc, int level) #if (AMREX_SPACEDIM == 2) void -WarpXParIter::GetPosition (Cuda::DeviceVector& x, Cuda::DeviceVector& y, Cuda::DeviceVector& z) const +WarpXParIter::GetPosition (Cuda::ManagedDeviceVector& x, Cuda::ManagedDeviceVector& y, Cuda::ManagedDeviceVector& z) const { amrex::ParIter<0,0,PIdx::nattribs>::GetPosition(x, z); y.resize(x.size(), std::numeric_limits::quiet_NaN()); } void -WarpXParIter::SetPosition (const Cuda::DeviceVector& x, const Cuda::DeviceVector& y, const Cuda::DeviceVector& z) +WarpXParIter::SetPosition (const Cuda::ManagedDeviceVector& x, const Cuda::ManagedDeviceVector& y, const Cuda::ManagedDeviceVector& z) { amrex::ParIter<0,0,PIdx::nattribs>::SetPosition(x, z); } @@ -732,7 +732,7 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local) #pragma omp parallel #endif { - Cuda::DeviceVector xp, yp, zp; + Cuda::ManagedDeviceVector xp, yp, zp; FArrayBox local_rho; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) @@ -955,7 +955,7 @@ WarpXParticleContainer::PushX (int lev, Real dt) #pragma omp parallel #endif { - Cuda::DeviceVector xp, yp, zp, giv; + Cuda::ManagedDeviceVector xp, yp, zp, giv; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { -- cgit v1.2.3 From 2e8dda19ad45656a6e928bdc1a2fc5fd0a3fa5a9 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Thu, 11 Apr 2019 18:30:24 -0400 Subject: Switch over to using Array4 in PPC::Evolve, which didn't exist when we first ported it --- Source/Laser/LaserParticleContainer.cpp | 5 - Source/Particles/PhysicalParticleContainer.cpp | 4 - Source/Particles/WarpXParticleContainer.H | 10 +- Source/Particles/WarpXParticleContainer.cpp | 197 +++++++++++++------------ 4 files changed, 110 insertions(+), 106 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp index 3ef1be154..db5499b8e 100644 --- a/Source/Laser/LaserParticleContainer.cpp +++ b/Source/Laser/LaserParticleContainer.cpp @@ -342,11 +342,6 @@ LaserParticleContainer::Evolve (int lev, int thread_num = 0; #endif - if (local_rho[thread_num] == nullptr) local_rho[thread_num].reset( new amrex::FArrayBox()); - if (local_jx[thread_num] == nullptr) local_jx[thread_num].reset( new amrex::FArrayBox()); - if (local_jy[thread_num] == nullptr) local_jy[thread_num].reset( new amrex::FArrayBox()); - if (local_jz[thread_num] == nullptr) local_jz[thread_num].reset( new amrex::FArrayBox()); - Cuda::ManagedDeviceVector plane_Xp, plane_Yp, amplitude_E; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index d98e79177..1031b488f 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -1048,10 +1048,6 @@ PhysicalParticleContainer::Evolve (int lev, #else int thread_num = 0; #endif - if (local_rho[thread_num] == nullptr) local_rho[thread_num].reset( new amrex::FArrayBox()); - if (local_jx[thread_num] == nullptr) local_jx[thread_num].reset( new amrex::FArrayBox()); - if (local_jy[thread_num] == nullptr) local_jy[thread_num].reset( new amrex::FArrayBox()); - if (local_jz[thread_num] == nullptr) local_jz[thread_num].reset( new amrex::FArrayBox()); FArrayBox filtered_Ex, filtered_Ey, filtered_Ez; FArrayBox filtered_Bx, filtered_By, filtered_Bz; diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H index 3b3fd81f4..6ac2ca621 100644 --- a/Source/Particles/WarpXParticleContainer.H +++ b/Source/Particles/WarpXParticleContainer.H @@ -215,12 +215,12 @@ protected: static int do_not_push; - amrex::Vector > local_rho; - amrex::Vector > local_jx; - amrex::Vector > local_jy; - amrex::Vector > local_jz; + amrex::Vector local_rho; + amrex::Vector local_jx; + amrex::Vector local_jy; + amrex::Vector local_jz; - amrex::Vector > m_xp, m_yp, m_zp, m_giv; + amrex::Vector > m_xp, m_yp, m_zp, m_giv; private: virtual void particlePostLocate(ParticleType& p, const amrex::ParticleLocData& pld, diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index d54dd261d..53ac9d3ff 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -56,14 +56,6 @@ WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies) m_yp.resize(num_threads); m_zp.resize(num_threads); m_giv.resize(num_threads); - for (int i = 0; i < num_threads; ++i) - { - local_rho[i].reset(nullptr); - local_jx[i].reset(nullptr); - local_jy[i].reset(nullptr); - local_jz[i].reset(nullptr); - } - } void @@ -232,35 +224,38 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, tby.grow(ngJ); tbz.grow(ngJ); - local_jx[thread_num]->resize(tbx); - local_jy[thread_num]->resize(tby); - local_jz[thread_num]->resize(tbz); + local_jx[thread_num].resize(tbx); + local_jy[thread_num].resize(tby); + local_jz[thread_num].resize(tbz); - jx_ptr = local_jx[thread_num]->dataPtr(); - jy_ptr = local_jy[thread_num]->dataPtr(); - jz_ptr = local_jz[thread_num]->dataPtr(); + jx_ptr = local_jx[thread_num].dataPtr(); + jy_ptr = local_jy[thread_num].dataPtr(); + jz_ptr = local_jz[thread_num].dataPtr(); - FArrayBox* local_jx_ptr = local_jx[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, b, + auto jxarr = local_jx[thread_num].array(); + amrex::ParallelFor(tbx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - local_jx_ptr->setVal(0.0, b, 0, 1); + jxarr(i,j,k) = 0.0; }); - FArrayBox* local_jy_ptr = local_jy[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, b, + auto jyarr = local_jy[thread_num].array(); + amrex::ParallelFor(tby, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - local_jy_ptr->setVal(0.0, b, 0, 1); + jyarr(i,j,k) = 0.0; }); - FArrayBox* local_jz_ptr = local_jz[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, b, + auto jzarr = local_jz[thread_num].array(); + amrex::ParallelFor(tbz, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - local_jz_ptr->setVal(0.0, b, 0, 1); + jzarr(i,j,k) = 0.0; }); - auto jxntot = local_jx[thread_num]->length(); - auto jyntot = local_jy[thread_num]->length(); - auto jzntot = local_jz[thread_num]->length(); + auto jxntot = local_jx[thread_num].length(); + auto jyntot = local_jy[thread_num].length(); + auto jzntot = local_jz[thread_num].length(); BL_PROFILE_VAR_START(blp_pxr_cd); if (j_is_nodal) { @@ -341,28 +336,31 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, &lvect,&WarpX::current_deposition_algo); } BL_PROFILE_VAR_STOP(blp_pxr_cd); - + BL_PROFILE_VAR_START(blp_accumulate); - - FArrayBox const* local_jx_const_ptr = local_jx[thread_num].get(); - FArrayBox* global_jx_ptr = jx.fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx, + + const auto local_jx_arr = local_jx[thread_num].array(); + auto global_jx_arr = jx.array(pti); + amrex::ParallelFor(tbx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - global_jx_ptr->atomicAdd(*local_jx_const_ptr, thread_bx, thread_bx, 0, 0, 1); + Gpu::Atomic::Add(&global_jx_arr(i, j, k), local_jx_arr(i, j, k)); }); - FArrayBox const* local_jy_const_ptr = local_jy[thread_num].get(); - FArrayBox* global_jy_ptr = jy.fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx, + const auto local_jy_arr = local_jy[thread_num].array(); + auto global_jy_arr = jy.array(pti); + amrex::ParallelFor(tby, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - global_jy_ptr->atomicAdd(*local_jy_const_ptr, thread_bx, thread_bx, 0, 0, 1); + Gpu::Atomic::Add(&global_jy_arr(i, j, k), local_jy_arr(i, j, k)); }); - FArrayBox const* local_jz_const_ptr = local_jz[thread_num].get(); - FArrayBox* global_jz_ptr = jz.fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx, + const auto local_jz_arr = local_jz[thread_num].array(); + auto global_jz_arr = jz.array(pti); + amrex::ParallelFor(tbz, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - global_jz_ptr->atomicAdd(*local_jz_const_ptr, thread_bx, thread_bx, 0, 0, 1); + Gpu::Atomic::Add(&global_jz_arr(i, j, k), local_jz_arr(i, j, k)); }); BL_PROFILE_VAR_STOP(blp_accumulate); @@ -382,34 +380,38 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, tby.grow(ngJ); tbz.grow(ngJ); - local_jx[thread_num]->resize(tbx); - local_jy[thread_num]->resize(tby); - local_jz[thread_num]->resize(tbz); + local_jx[thread_num].resize(tbx); + local_jy[thread_num].resize(tby); + local_jz[thread_num].resize(tbz); - jx_ptr = local_jx[thread_num]->dataPtr(); - jy_ptr = local_jy[thread_num]->dataPtr(); - jz_ptr = local_jz[thread_num]->dataPtr(); + jx_ptr = local_jx[thread_num].dataPtr(); + jy_ptr = local_jy[thread_num].dataPtr(); + jz_ptr = local_jz[thread_num].dataPtr(); - FArrayBox* local_jx_ptr = local_jx[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, b, + auto jxarr = local_jx[thread_num].array(); + amrex::ParallelFor(tbx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - local_jx_ptr->setVal(0.0, b, 0, 1); + jxarr(i,j,k) = 0.0; }); - FArrayBox* local_jy_ptr = local_jy[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, b, + auto jyarr = local_jy[thread_num].array(); + amrex::ParallelFor(tby, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - local_jy_ptr->setVal(0.0, b, 0, 1); + jyarr(i,j,k) = 0.0; }); - FArrayBox* local_jz_ptr = local_jz[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, b, + auto jzarr = local_jz[thread_num].array(); + amrex::ParallelFor(tbz, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - local_jz_ptr->setVal(0.0, b, 0, 1); + jzarr(i,j,k) = 0.0; }); - auto jxntot = local_jx[thread_num]->length(); - auto jyntot = local_jy[thread_num]->length(); - auto jzntot = local_jz[thread_num]->length(); + + auto jxntot = local_jx[thread_num].length(); + auto jyntot = local_jy[thread_num].length(); + auto jzntot = local_jz[thread_num].length(); long ncrse = np - np_current; BL_PROFILE_VAR_START(blp_pxr_cd); @@ -496,25 +498,28 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, BL_PROFILE_VAR_START(blp_accumulate); - FArrayBox const* local_jx_const_ptr = local_jx[thread_num].get(); - FArrayBox* global_jx_ptr = cjx->fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx, + const auto local_jx_arr = local_jx[thread_num].array(); + auto global_jx_arr = cjx->array(pti); + amrex::ParallelFor(tbx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - global_jx_ptr->atomicAdd(*local_jx_const_ptr, thread_bx, thread_bx, 0, 0, 1); + Gpu::Atomic::Add(&global_jx_arr(i, j, k), local_jx_arr(i, j, k)); }); - FArrayBox const* local_jy_const_ptr = local_jy[thread_num].get(); - FArrayBox* global_jy_ptr = cjy->fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx, + const auto local_jy_arr = local_jy[thread_num].array(); + auto global_jy_arr = cjy->array(pti); + amrex::ParallelFor(tby, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - global_jy_ptr->atomicAdd(*local_jy_const_ptr, thread_bx, thread_bx, 0, 0, 1); + Gpu::Atomic::Add(&global_jy_arr(i, j, k), local_jy_arr(i, j, k)); }); - FArrayBox const* local_jz_const_ptr = local_jz[thread_num].get(); - FArrayBox* global_jz_ptr = cjz->fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx, + const auto local_jz_arr = local_jz[thread_num].array(); + auto global_jz_arr = cjz->array(pti); + amrex::ParallelFor(tbz, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - global_jz_ptr->atomicAdd(*local_jz_const_ptr, thread_bx, thread_bx, 0, 0, 1); + Gpu::Atomic::Add(&global_jz_arr(i, j, k), local_jz_arr(i, j, k)); }); BL_PROFILE_VAR_STOP(blp_accumulate); @@ -547,15 +552,17 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, { const std::array& xyzmin = xyzmin_tile; tile_box.grow(ngRho); - local_rho[thread_num]->resize(tile_box); - FArrayBox* local_rho_ptr = local_rho[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, b, + local_rho[thread_num].resize(tile_box); + + auto rhoarr = local_rho[thread_num].array(); + amrex::ParallelFor(tile_box, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - local_rho_ptr->setVal(0.0, b, 0, 1); + rhoarr(i,j,k) = 0.0; }); - data_ptr = local_rho[thread_num]->dataPtr(); - auto rholen = local_rho[thread_num]->length(); + data_ptr = local_rho[thread_num].dataPtr(); + auto rholen = local_rho[thread_num].length(); #if (AMREX_SPACEDIM == 3) const long nx = rholen[0]-1-2*ngRho; const long ny = rholen[1]-1-2*ngRho; @@ -579,14 +586,16 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, &lvect, &WarpX::charge_deposition_algo); BL_PROFILE_VAR_STOP(blp_pxr_chd); - const int ncomp = 1; - FArrayBox const* local_fab = local_rho[thread_num].get(); - FArrayBox* global_fab = rhomf->fabPtr(pti); BL_PROFILE_VAR_START(blp_accumulate); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx, + + const auto local_rho_arr = local_rho[thread_num].array(); + auto global_rho_arr = rhomf->array(pti); + amrex::ParallelFor(tile_box, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp); + Gpu::Atomic::Add(&global_rho_arr(i, j, k), local_rho_arr(i, j, k)); }); + BL_PROFILE_VAR_STOP(blp_accumulate); } @@ -599,15 +608,17 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector()); tile_box.grow(ngRho); - local_rho[thread_num]->resize(tile_box); - FArrayBox* local_rho_ptr = local_rho[thread_num].get(); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, b, + local_rho[thread_num].resize(tile_box); + + auto rhoarr = local_rho[thread_num].array(); + amrex::ParallelFor(tile_box, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - local_rho_ptr->setVal(0.0, b, 0, 1); + rhoarr(i,j,k) = 0.0; }); - data_ptr = local_rho[thread_num]->dataPtr(); - auto rholen = local_rho[thread_num]->length(); + data_ptr = local_rho[thread_num].dataPtr(); + auto rholen = local_rho[thread_num].length(); #if (AMREX_SPACEDIM == 3) const long nx = rholen[0]-1-2*ngRho; const long ny = rholen[1]-1-2*ngRho; @@ -633,14 +644,16 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, &lvect, &WarpX::charge_deposition_algo); BL_PROFILE_VAR_STOP(blp_pxr_chd); - const int ncomp = 1; - FArrayBox const* local_fab = local_rho[thread_num].get(); - FArrayBox* global_fab = crhomf->fabPtr(pti); BL_PROFILE_VAR_START(blp_accumulate); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx, + + const auto local_rho_arr = local_rho[thread_num].array(); + auto global_rho_arr = crhomf->array(pti); + amrex::ParallelFor(tile_box, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp); + Gpu::Atomic::Add(&global_rho_arr(i, j, k), local_rho_arr(i, j, k)); }); + BL_PROFILE_VAR_STOP(blp_accumulate); } }; -- cgit v1.2.3 From ed78757698b7eb5bba4e2e5d0fccedb8620ca80e Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Mon, 15 Apr 2019 14:32:38 -0400 Subject: properly pass through icomp when depositing rho --- Source/Particles/WarpXParticleContainer.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 53ac9d3ff..a9e0a7418 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -529,9 +529,9 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, void WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, - MultiFab* rhomf, MultiFab* crhomf, int icomp, - const long np_current, - const long np, int thread_num, int lev ) + MultiFab* rhomf, MultiFab* crhomf, int icomp, + const long np_current, + const long np, int thread_num, int lev ) { BL_PROFILE_VAR_NS("PICSAR::ChargeDeposition", blp_pxr_chd); @@ -593,7 +593,7 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, amrex::ParallelFor(tile_box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - Gpu::Atomic::Add(&global_rho_arr(i, j, k), local_rho_arr(i, j, k)); + Gpu::Atomic::Add(&global_rho_arr(i, j, k, icomp), local_rho_arr(i, j, k)); }); BL_PROFILE_VAR_STOP(blp_accumulate); @@ -651,7 +651,7 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, amrex::ParallelFor(tile_box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - Gpu::Atomic::Add(&global_rho_arr(i, j, k), local_rho_arr(i, j, k)); + Gpu::Atomic::Add(&global_rho_arr(i, j, k, icomp), local_rho_arr(i, j, k)); }); BL_PROFILE_VAR_STOP(blp_accumulate); -- cgit v1.2.3