aboutsummaryrefslogtreecommitdiff
path: root/Source/Particles/WarpXParticleContainer.cpp
diff options
context:
space:
mode:
authorGravatar Andrew Myers <atmyers@lbl.gov> 2020-03-30 09:07:48 -0700
committerGravatar GitHub <noreply@github.com> 2020-03-30 09:07:48 -0700
commit6a425feed17500c087a78243d29edd86a70df52f (patch)
treecc1f0082be53971fde29b20641cbb4f24266eb6d /Source/Particles/WarpXParticleContainer.cpp
parent2bcc3ca3c89a5245000c04cbf2afc2d2c55e332b (diff)
downloadWarpX-6a425feed17500c087a78243d29edd86a70df52f.tar.gz
WarpX-6a425feed17500c087a78243d29edd86a70df52f.tar.zst
WarpX-6a425feed17500c087a78243d29edd86a70df52f.zip
Port rigid injection to the gpu (#862)
* port rigid injection to the gpu * eol * Apply suggestions from code review Co-Authored-By: MaxThevenet <mthevenet@lbl.gov> * define csqi Co-authored-by: MaxThevenet <mthevenet@lbl.gov>
Diffstat (limited to 'Source/Particles/WarpXParticleContainer.cpp')
-rw-r--r--Source/Particles/WarpXParticleContainer.cpp67
1 files changed, 52 insertions, 15 deletions
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 81dd0c7e1..40debd257 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -646,25 +646,62 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) {
amrex::Real inv_clight_sq = 1.0/PhysConst::c/PhysConst::c;
const int nLevels = finestLevel();
- for (int lev = 0; lev <= nLevels; ++lev) {
+#ifdef AMREX_USE_GPU
+ if (Gpu::inLaunchRegion())
+ {
+ ReduceOps<ReduceOpSum, ReduceOpSum, ReduceOpSum> reduce_op;
+ ReduceData<Real, Real, Real> reduce_data(reduce_op);
+ using ReduceTuple = typename decltype(reduce_data)::Type;
+ for (int lev = 0; lev <= nLevels; ++lev) {
+ for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+ {
+ const auto uxp = pti.GetAttribs(PIdx::ux).data();
+ const auto uyp = pti.GetAttribs(PIdx::uy).data();
+ const auto uzp = pti.GetAttribs(PIdx::uz).data();
+
+ const long np = pti.numParticles();
+ np_total += np;
+
+ reduce_op.eval(np, reduce_data,
+ [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
+ {
+ Real usq = (uxp[i]*uxp[i] +
+ uyp[i]*uyp[i] +
+ uzp[i]*uzp[i])*inv_clight_sq;
+ Real gaminv = 1.0_rt/std::sqrt(1.0_rt + usq);
+ return {uxp[i]*gaminv, uyp[i]*gaminv, uzp[i]*gaminv};
+ });
+ }
+ }
+
+ ReduceTuple hv = reduce_data.value();
+ vx_total = amrex::get<0>(hv);
+ vy_total = amrex::get<1>(hv);
+ vz_total = amrex::get<2>(hv);
+ }
+ else
+#endif
+ {
+ for (int lev = 0; lev <= nLevels; ++lev) {
#ifdef _OPENMP
#pragma omp parallel reduction(+:vx_total, vy_total, vz_total, np_total)
#endif
- for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
- {
- auto& ux = pti.GetAttribs(PIdx::ux);
- auto& uy = pti.GetAttribs(PIdx::uy);
- auto& uz = pti.GetAttribs(PIdx::uz);
-
- np_total += pti.numParticles();
-
- for (unsigned long i = 0; i < ux.size(); i++) {
- Real usq = (ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i])*inv_clight_sq;
- Real gaminv = 1.0/std::sqrt(1.0 + usq);
- vx_total += ux[i]*gaminv;
- vy_total += uy[i]*gaminv;
- vz_total += uz[i]*gaminv;
+ for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+ {
+ auto& ux = pti.GetAttribs(PIdx::ux);
+ auto& uy = pti.GetAttribs(PIdx::uy);
+ auto& uz = pti.GetAttribs(PIdx::uz);
+
+ np_total += pti.numParticles();
+
+ for (unsigned long i = 0; i < ux.size(); i++) {
+ Real usq = (ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i])*inv_clight_sq;
+ Real gaminv = 1.0_rt/std::sqrt(1.0_rt + usq);
+ vx_total += ux[i]*gaminv;
+ vy_total += uy[i]*gaminv;
+ vz_total += uz[i]*gaminv;
+ }
}
}
}