diff options
author | 2019-09-19 11:32:46 -0700 | |
---|---|---|
committer | 2019-09-19 11:32:46 -0700 | |
commit | 9f8d0990f8690eff0ba3c7930b716a825a1b95cb (patch) | |
tree | 004715b2a75dafdfa09cbd74540c26aea62fd8b9 /Source/Particles/PhysicalParticleContainer.cpp | |
parent | ab0d9778e01616bb153cea2a4e3517ea617bac1b (diff) | |
download | WarpX-9f8d0990f8690eff0ba3c7930b716a825a1b95cb.tar.gz WarpX-9f8d0990f8690eff0ba3c7930b716a825a1b95cb.tar.zst WarpX-9f8d0990f8690eff0ba3c7930b716a825a1b95cb.zip |
Implementing GPU version of particle copy and commented out the CPU copy since the newly implemented chunk of code to add particles on the GPU in GetParticleSlice() works both on cpu and gpu
Diffstat (limited to 'Source/Particles/PhysicalParticleContainer.cpp')
-rw-r--r-- | Source/Particles/PhysicalParticleContainer.cpp | 234 |
1 files changed, 158 insertions, 76 deletions
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 9681f3682..d67ebd971 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -1766,6 +1766,11 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real #pragma omp parallel #endif { +#ifdef _OPENMP + int thread_num = omp_get_thread_num(); +#else + int thread_num = 0; +#endif RealVector xp_new, yp_new, zp_new; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) @@ -1780,98 +1785,175 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real if ( !slice_box.intersects(tile_real_box) ) continue; - pti.GetPosition(xp_new, yp_new, zp_new); +// pti.GetPosition(xp_new, yp_new, zp_new); + // for gpu // + pti.GetPosition(m_xp[thread_num],m_yp[thread_num],m_zp[thread_num]); + Real *const AMREX_RESTRICT xpnew = m_xp[thread_num].dataPtr(); + Real *const AMREX_RESTRICT ypnew = m_yp[thread_num].dataPtr(); + Real *const AMREX_RESTRICT zpnew = m_zp[thread_num].dataPtr(); auto& attribs = pti.GetAttribs(); - auto& wp = attribs[PIdx::w ]; - - auto& uxp_new = attribs[PIdx::ux ]; - auto& uyp_new = attribs[PIdx::uy ]; - auto& uzp_new = attribs[PIdx::uz ]; - - auto& xp_old = tmp_particle_data[lev][index][TmpIdx::xold]; - auto& yp_old = tmp_particle_data[lev][index][TmpIdx::yold]; - auto& zp_old = tmp_particle_data[lev][index][TmpIdx::zold]; - auto& uxp_old = tmp_particle_data[lev][index][TmpIdx::uxold]; - auto& uyp_old = tmp_particle_data[lev][index][TmpIdx::uyold]; - auto& uzp_old = tmp_particle_data[lev][index][TmpIdx::uzold]; +// auto& wp = attribs[PIdx::w ]; + // for gpu // + Real* const AMREX_RESTRICT wpnew = attribs[PIdx::w].dataPtr(); + +// auto& uxp_new = attribs[PIdx::ux ]; +// auto& uyp_new = attribs[PIdx::uy ]; +// auto& uzp_new = attribs[PIdx::uz ]; + // for gpu // + Real* const AMREX_RESTRICT uxpnew = attribs[PIdx::ux ].dataPtr(); + Real* const AMREX_RESTRICT uypnew = attribs[PIdx::uy ].dataPtr(); + Real* const AMREX_RESTRICT uzpnew = attribs[PIdx::uz ].dataPtr(); + +// auto& xp_old = tmp_particle_data[lev][index][TmpIdx::xold]; +// auto& yp_old = tmp_particle_data[lev][index][TmpIdx::yold]; +// auto& zp_old = tmp_particle_data[lev][index][TmpIdx::zold]; +// auto& uxp_old = tmp_particle_data[lev][index][TmpIdx::uxold]; +// auto& uyp_old = tmp_particle_data[lev][index][TmpIdx::uyold]; +// auto& uzp_old = tmp_particle_data[lev][index][TmpIdx::uzold]; + // for gpu // + Real* const AMREX_RESTRICT xpold = tmp_particle_data[lev][index][TmpIdx::xold].dataPtr(); + Real* const AMREX_RESTRICT ypold = tmp_particle_data[lev][index][TmpIdx::yold].dataPtr(); + Real* const AMREX_RESTRICT zpold = tmp_particle_data[lev][index][TmpIdx::zold].dataPtr(); + Real* const AMREX_RESTRICT uxpold = tmp_particle_data[lev][index][TmpIdx::uxold].dataPtr(); + Real* const AMREX_RESTRICT uypold = tmp_particle_data[lev][index][TmpIdx::uyold].dataPtr(); + Real* const AMREX_RESTRICT uzpold = tmp_particle_data[lev][index][TmpIdx::uzold].dataPtr(); const long np = pti.numParticles(); Real uzfrm = -WarpX::gamma_boost*WarpX::beta_boost*PhysConst::c; Real inv_c2 = 1.0/PhysConst::c/PhysConst::c; -#ifdef AMREX_USE_GPU - amrex::Gpu::DeviceVector<int> FlagForPartCopy(np); - amrex::Gpu::DeviceVector<int> IndexForPartCopy(np); +//#ifdef AMREX_USE_GPU + // temporary arrays to store copy_flag and copy_index for particles that cross the z-plane + + amrex::Gpu::ManagedDeviceVector<int> FlagForPartCopy(np); + amrex::Gpu::ManagedDeviceVector<int> IndexForPartCopy(np); -// amrex parallel for to flag particles that need to be copied + int* const AMREX_RESTRICT Flag = FlagForPartCopy.dataPtr(); + int* const AMREX_RESTRICT IndexLocation = IndexForPartCopy.dataPtr(); + + //Flag particles that need to be copied if they cross the z_slice amrex::ParallelFor(np, - [=] AMREX_GPU_DEVICE(int i) noexcept + [=] AMREX_GPU_DEVICE(int i) { - FlagForPartCopy[i] = 0; - if ( not (((zp_new[i] >= z_new) && (zp_old[i] <= z_old)) || - ((zp_new[i] <= z_new) && (zp_old[i] >= z_old))) ) + Flag[i] = 0; + if ( (((zpnew[i] >= z_new) && (zpold[i] <= z_old)) || + ((zpnew[i] <= z_new) && (zpold[i] >= z_old))) ) { - FlagForPartCopy[i] = 1; + Flag[i] = 1; } }); -// exclusive scan to obtain location indices in the dst array - amrex::Gpu::exclusive_scan(FlagForPartCopy,FlagForPartCopy+np,IndexForPartCopy); -// Finally copy on the GPU - -#endif - - - for (long i = 0; i < np; ++i) { - - // if the particle did not cross the plane of z_boost in the last - // timestep, skip it. - - if ( not (((zp_new[i] >= z_new) && (zp_old[i] <= z_old)) || - ((zp_new[i] <= z_new) && (zp_old[i] >= z_old))) ) continue; - - - // Lorentz transform particles to lab frame - Real gamma_new_p = std::sqrt(1.0 + inv_c2*(uxp_new[i]*uxp_new[i] + uyp_new[i]*uyp_new[i] + uzp_new[i]*uzp_new[i])); - Real t_new_p = WarpX::gamma_boost*t_boost - uzfrm*zp_new[i]*inv_c2; - Real z_new_p = WarpX::gamma_boost*(zp_new[i] + WarpX::beta_boost*PhysConst::c*t_boost); - Real uz_new_p = WarpX::gamma_boost*uzp_new[i] - gamma_new_p*uzfrm; - - Real gamma_old_p = std::sqrt(1.0 + inv_c2*(uxp_old[i]*uxp_old[i] + uyp_old[i]*uyp_old[i] + uzp_old[i]*uzp_old[i])); - Real t_old_p = WarpX::gamma_boost*(t_boost - dt) - uzfrm*zp_old[i]*inv_c2; - Real z_old_p = WarpX::gamma_boost*(zp_old[i] + WarpX::beta_boost*PhysConst::c*(t_boost-dt)); - Real uz_old_p = WarpX::gamma_boost*uzp_old[i] - gamma_old_p*uzfrm; - - // interpolate in time to t_lab - Real weight_old = (t_new_p - t_lab) / (t_new_p - t_old_p); - Real weight_new = (t_lab - t_old_p) / (t_new_p - t_old_p); - - Real xp = xp_old[i]*weight_old + xp_new[i]*weight_new; - Real yp = yp_old[i]*weight_old + yp_new[i]*weight_new; - Real zp = z_old_p *weight_old + z_new_p *weight_new; - - Real uxp = uxp_old[i]*weight_old + uxp_new[i]*weight_new; - Real uyp = uyp_old[i]*weight_old + uyp_new[i]*weight_new; - Real uzp = uz_old_p *weight_old + uz_new_p *weight_new; - - - ++counter_for_ParticleCopy; - diagnostic_particles[lev][index].GetRealData(DiagIdx::w).push_back(wp[i]); - - diagnostic_particles[lev][index].GetRealData(DiagIdx::x).push_back(xp); - diagnostic_particles[lev][index].GetRealData(DiagIdx::y).push_back(yp); - diagnostic_particles[lev][index].GetRealData(DiagIdx::z).push_back(zp); - - diagnostic_particles[lev][index].GetRealData(DiagIdx::ux).push_back(uxp); - diagnostic_particles[lev][index].GetRealData(DiagIdx::uy).push_back(uyp); - diagnostic_particles[lev][index].GetRealData(DiagIdx::uz).push_back(uzp); - } - if (counter_for_ParticleCopy > 0) + + // exclusive scan to obtain location indices using flag values + amrex::Gpu::exclusive_scan(Flag,Flag+np,IndexLocation); + + int total_partdiag_size = IndexLocation[np-1] + Flag[np-1]; + + // allocate array size for diagnostic particle array + diagnostic_particles[lev][index].resize(total_partdiag_size); + + amrex::Real gammaboost = WarpX::gamma_boost; + amrex::Real betaboost = WarpX::beta_boost; + amrex::Real Phys_c = PhysConst::c; + + Real* const AMREX_RESTRICT diag_wp = diagnostic_particles[lev][index].GetRealData(DiagIdx::w).data(); + Real* const AMREX_RESTRICT diag_xp = diagnostic_particles[lev][index].GetRealData(DiagIdx::x).data(); + Real* const AMREX_RESTRICT diag_yp = diagnostic_particles[lev][index].GetRealData(DiagIdx::y).data(); + Real* const AMREX_RESTRICT diag_zp = diagnostic_particles[lev][index].GetRealData(DiagIdx::z).data(); + Real* const AMREX_RESTRICT diag_uxp = diagnostic_particles[lev][index].GetRealData(DiagIdx::ux).data(); + Real* const AMREX_RESTRICT diag_uyp = diagnostic_particles[lev][index].GetRealData(DiagIdx::uy).data(); + Real* const AMREX_RESTRICT diag_uzp = diagnostic_particles[lev][index].GetRealData(DiagIdx::uz).data(); + + // Copy particle data to diagnostic particle array on the GPU using flag and index values + amrex::ParallelFor(np, + [=] AMREX_GPU_DEVICE(int i) { - amrex::Print() << " counter index " << counter_for_ParticleCopy << "\n"; - } + if (Flag[i] == 1) + { + const Real gamma_new_p = std::sqrt(1.0 + inv_c2*(uxpnew[i]*uxpnew[i] + uypnew[i]*uypnew[i] + uzpnew[i]*uzpnew[i])); + const Real t_new_p = gammaboost*t_boost - uzfrm*zpnew[i]*inv_c2; + const Real z_new_p = gammaboost*(zpnew[i] + betaboost*Phys_c*t_boost); + const Real uz_new_p = gammaboost*uzpnew[i] - gamma_new_p*uzfrm; + + const Real gamma_old_p = std::sqrt(1.0 + inv_c2*(uxpold[i]*uxpold[i] + uypold[i]*uypold[i] + uzpold[i]*uzpold[i])); + const Real t_old_p = gammaboost*(t_boost - dt) - uzfrm*zpold[i]*inv_c2; + const Real z_old_p = gammaboost*(zpold[i] + betaboost*Phys_c*(t_boost-dt)); + const Real uz_old_p = gammaboost*uzpold[i] - gamma_old_p*uzfrm; + + // interpolate in time to t_lab + const Real weight_old = (t_new_p - t_lab) / (t_new_p - t_old_p); + const Real weight_new = (t_lab - t_old_p) / (t_new_p - t_old_p); + + const Real xp = xpold[i]*weight_old + xpnew[i]*weight_new; + const Real yp = ypold[i]*weight_old + ypnew[i]*weight_new; + const Real zp = z_old_p *weight_old + z_new_p *weight_new; + + const Real uxp = uxpold[i]*weight_old + uxpnew[i]*weight_new; + const Real uyp = uypold[i]*weight_old + uypnew[i]*weight_new; + const Real uzp = uz_old_p *weight_old + uz_new_p *weight_new; + + int loc = IndexLocation[i]; + diag_wp[loc] = wpnew[i]; + diag_xp[loc] = xp; + diag_yp[loc] = yp; + diag_zp[loc] = zp; + diag_uxp[loc] = uxp; + diag_uyp[loc] = uyp; + diag_uzp[loc] = uzp; + } + }); +//#else +// +// for (long i = 0; i < np; ++i) { +// +// // if the particle did not cross the plane of z_boost in the last +// // timestep, skip it. +// +// if ( not (((zp_new[i] >= z_new) && (zp_old[i] <= z_old)) || +// ((zp_new[i] <= z_new) && (zp_old[i] >= z_old))) ) continue; +// +// // Lorentz transform particles to lab frame +// Real gamma_new_p = std::sqrt(1.0 + inv_c2*(uxp_new[i]*uxp_new[i] + uyp_new[i]*uyp_new[i] + uzp_new[i]*uzp_new[i])); +// Real t_new_p = WarpX::gamma_boost*t_boost - uzfrm*zp_new[i]*inv_c2; +// Real z_new_p = WarpX::gamma_boost*(zp_new[i] + WarpX::beta_boost*PhysConst::c*t_boost); +// Real uz_new_p = WarpX::gamma_boost*uzp_new[i] - gamma_new_p*uzfrm; +// +// Real gamma_old_p = std::sqrt(1.0 + inv_c2*(uxp_old[i]*uxp_old[i] + uyp_old[i]*uyp_old[i] + uzp_old[i]*uzp_old[i])); +// Real t_old_p = WarpX::gamma_boost*(t_boost - dt) - uzfrm*zp_old[i]*inv_c2; +// Real z_old_p = WarpX::gamma_boost*(zp_old[i] + WarpX::beta_boost*PhysConst::c*(t_boost-dt)); +// Real uz_old_p = WarpX::gamma_boost*uzp_old[i] - gamma_old_p*uzfrm; +// +// // interpolate in time to t_lab +// Real weight_old = (t_new_p - t_lab) / (t_new_p - t_old_p); +// Real weight_new = (t_lab - t_old_p) / (t_new_p - t_old_p); +// +// Real xp = xp_old[i]*weight_old + xp_new[i]*weight_new; +// Real yp = yp_old[i]*weight_old + yp_new[i]*weight_new; +// Real zp = z_old_p *weight_old + z_new_p *weight_new; +// +// Real uxp = uxp_old[i]*weight_old + uxp_new[i]*weight_new; +// Real uyp = uyp_old[i]*weight_old + uyp_new[i]*weight_new; +// Real uzp = uz_old_p *weight_old + uz_new_p *weight_new; +// +// +// ++counter_for_ParticleCopy; +// diagnostic_particles[lev][index].GetRealData(DiagIdx::w).push_back(wp[i]); +// +// diagnostic_particles[lev][index].GetRealData(DiagIdx::x).push_back(xp); +// diagnostic_particles[lev][index].GetRealData(DiagIdx::y).push_back(yp); +// diagnostic_particles[lev][index].GetRealData(DiagIdx::z).push_back(zp); +// +// diagnostic_particles[lev][index].GetRealData(DiagIdx::ux).push_back(uxp); +// diagnostic_particles[lev][index].GetRealData(DiagIdx::uy).push_back(uyp); +// diagnostic_particles[lev][index].GetRealData(DiagIdx::uz).push_back(uzp); +// } +// if (counter_for_ParticleCopy > 0) +// { +// amrex::Print() << " counter index " << counter_for_ParticleCopy << "\n"; +// } +//#endif } } } |