From dea53978d67c232f27da076b3ddcb0fca01bdfe6 Mon Sep 17 00:00:00 2001 From: Yin-YinjianZhao Date: Mon, 7 Oct 2019 10:09:43 -0700 Subject: Fix issue: 'AddParticles' is slow on GPU. --- Source/Particles/PhysicalParticleContainer.cpp | 72 +++++++++++++++++++++----- 1 file changed, 58 insertions(+), 14 deletions(-) (limited to 'Source/Particles/PhysicalParticleContainer.cpp') diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index d1f25fe5d..281345f55 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -156,15 +156,17 @@ PhysicalParticleContainer::AddGaussianBeam(Real x_m, Real y_m, Real z_m, std::normal_distribution disty(y_m, y_rms); std::normal_distribution distz(z_m, z_rms); + // Allocate temporary vectors on the CPU + Gpu::HostVector particle_x; + Gpu::HostVector particle_y; + Gpu::HostVector particle_z; + Gpu::HostVector particle_ux; + Gpu::HostVector particle_uy; + Gpu::HostVector particle_uz; + Gpu::HostVector particle_w; + int np = 0; + if (ParallelDescriptor::IOProcessor()) { - // Allocate temporary vectors on the CPU - Gpu::HostVector particle_x; - Gpu::HostVector particle_y; - Gpu::HostVector particle_z; - Gpu::HostVector particle_ux; - Gpu::HostVector particle_uy; - Gpu::HostVector particle_uz; - Gpu::HostVector particle_w; // If do_symmetrize, create 4x fewer particles, and // Replicate each particle 4 times (x,y) (-x,y) (x,-y) (-x,-y) if (do_symmetrize){ @@ -189,17 +191,59 @@ PhysicalParticleContainer::AddGaussianBeam(Real x_m, Real y_m, Real z_m, u.z *= PhysConst::c; if (do_symmetrize){ // Add four particles to the beam: - CheckAndAddParticle( x, y, z, { u.x, u.y, u.z}, weight/4. ); - CheckAndAddParticle( x,-y, z, { u.x,-u.y, u.z}, weight/4. ); - CheckAndAddParticle(-x, y, z, {-u.x, u.y, u.z}, weight/4. ); - CheckAndAddParticle(-x,-y, z, {-u.x,-u.y, u.z}, weight/4. ); + CheckAndAddParticle(x, y, z, { u.x, u.y, u.z}, weight/4., + particle_x, particle_y, particle_z, + particle_ux, particle_uy, particle_uz, + particle_w); + CheckAndAddParticle(x, -y, z, { u.x, -u.y, u.z}, weight/4., + particle_x, particle_y, particle_z, + particle_ux, particle_uy, particle_uz, + particle_w); + CheckAndAddParticle(-x, y, z, { -u.x, u.y, u.z}, weight/4., + particle_x, particle_y, particle_z, + particle_ux, particle_uy, particle_uz, + particle_w); + CheckAndAddParticle(-x, -y, z, { -u.x, -u.y, u.z}, weight/4., + particle_x, particle_y, particle_z, + particle_ux, particle_uy, particle_uz, + particle_w); + } else { - CheckAndAddParticle(x, y, z, {u.x,u.y,u.z}, weight); + CheckAndAddParticle(x, y, z, { u.x, u.y, u.z}, weight, + particle_x, particle_y, particle_z, + particle_ux, particle_uy, particle_uz, + particle_w); } } } } - Redistribute(); + // Add the temporary CPU vectors to the particle structure + np = particle_z.size(); + AddNParticles(0,np, + particle_x.dataPtr(), particle_y.dataPtr(), particle_z.dataPtr(), + particle_ux.dataPtr(), particle_uy.dataPtr(), particle_uz.dataPtr(), + 1, particle_w.dataPtr(),1); +} + +void +PhysicalParticleContainer::CheckAndAddParticle(Real x, Real y, Real z, + std::array u, + Real weight, + Gpu::HostVector& particle_x, + Gpu::HostVector& particle_y, + Gpu::HostVector& particle_z, + Gpu::HostVector& particle_ux, + Gpu::HostVector& particle_uy, + Gpu::HostVector& particle_uz, + Gpu::HostVector& particle_w) +{ + particle_x.push_back(x); + particle_y.push_back(y); + particle_z.push_back(z); + particle_ux.push_back(u[0]); + particle_uy.push_back(u[1]); + particle_uz.push_back(u[2]); + particle_w.push_back(weight); } void -- cgit v1.2.3