Port rigid injection to the gpu (#862)

* port rigid injection to the gpu * eol * Apply suggestions from code review Co-Authored-By: MaxThevenet <mthevenet@lbl.gov> * define csqi Co-authored-by: MaxThevenet <mthevenet@lbl.gov>
author: Andrew Myers <atmyers@lbl.gov> 2020-03-30 09:07:48 -0700
committer: GitHub <noreply@github.com> 2020-03-30 09:07:48 -0700
commit: 6a425feed17500c087a78243d29edd86a70df52f (patch)
tree: cc1f0082be53971fde29b20641cbb4f24266eb6d /Source/Particles/WarpXParticleContainer.cpp
parent: 2bcc3ca3c89a5245000c04cbf2afc2d2c55e332b (diff)
download: WarpX-6a425feed17500c087a78243d29edd86a70df52f.tar.gz
WarpX-6a425feed17500c087a78243d29edd86a70df52f.tar.zst
WarpX-6a425feed17500c087a78243d29edd86a70df52f.zip
1 files changed, 52 insertions, 15 deletions
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 81dd0c7e1..40debd257 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -646,25 +646,62 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) {
     amrex::Real inv_clight_sq = 1.0/PhysConst::c/PhysConst::c;
 
     const int nLevels = finestLevel();
-    for (int lev = 0; lev <= nLevels; ++lev) {
 
+#ifdef AMREX_USE_GPU
+    if (Gpu::inLaunchRegion())
+    {
+        ReduceOps<ReduceOpSum, ReduceOpSum, ReduceOpSum> reduce_op;
+        ReduceData<Real, Real, Real> reduce_data(reduce_op);
+        using ReduceTuple = typename decltype(reduce_data)::Type;
+        for (int lev = 0; lev <= nLevels; ++lev) {
+            for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+            {
+                const auto uxp = pti.GetAttribs(PIdx::ux).data();
+                const auto uyp = pti.GetAttribs(PIdx::uy).data();
+                const auto uzp = pti.GetAttribs(PIdx::uz).data();
+
+                const long np = pti.numParticles();
+                np_total += np;
+
+                reduce_op.eval(np, reduce_data,
+                               [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
+                               {
+                                   Real usq = (uxp[i]*uxp[i] +
+                                               uyp[i]*uyp[i] +
+                                               uzp[i]*uzp[i])*inv_clight_sq;
+                                   Real gaminv = 1.0_rt/std::sqrt(1.0_rt + usq);
+                                   return {uxp[i]*gaminv,  uyp[i]*gaminv, uzp[i]*gaminv};
+                               });
+            }
+        }
+
+        ReduceTuple hv = reduce_data.value();
+        vx_total = amrex::get<0>(hv);
+        vy_total = amrex::get<1>(hv);
+        vz_total = amrex::get<2>(hv);
+    }
+    else
+#endif
+    {
+        for (int lev = 0; lev <= nLevels; ++lev) {
 #ifdef _OPENMP
 #pragma omp parallel reduction(+:vx_total, vy_total, vz_total, np_total)
 #endif
-        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
-        {
-            auto& ux = pti.GetAttribs(PIdx::ux);
-            auto& uy = pti.GetAttribs(PIdx::uy);
-            auto& uz = pti.GetAttribs(PIdx::uz);
-
-            np_total += pti.numParticles();
-
-            for (unsigned long i = 0; i < ux.size(); i++) {
-                Real usq = (ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i])*inv_clight_sq;
-                Real gaminv = 1.0/std::sqrt(1.0 + usq);
-                vx_total += ux[i]*gaminv;
-                vy_total += uy[i]*gaminv;
-                vz_total += uz[i]*gaminv;
+            for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+            {
+                auto& ux = pti.GetAttribs(PIdx::ux);
+                auto& uy = pti.GetAttribs(PIdx::uy);
+                auto& uz = pti.GetAttribs(PIdx::uz);
+
+                np_total += pti.numParticles();
+
+                for (unsigned long i = 0; i < ux.size(); i++) {
+                    Real usq = (ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i])*inv_clight_sq;
+                    Real gaminv = 1.0_rt/std::sqrt(1.0_rt + usq);
+                    vx_total += ux[i]*gaminv;
+                    vy_total += uy[i]*gaminv;
+                    vz_total += uz[i]*gaminv;
+                }
             }
         }
     }
author	Andrew Myers <atmyers@lbl.gov>	2020-03-30 09:07:48 -0700
committer	GitHub <noreply@github.com>	2020-03-30 09:07:48 -0700
commit	6a425feed17500c087a78243d29edd86a70df52f (patch)
tree	cc1f0082be53971fde29b20641cbb4f24266eb6d /Source/Particles/WarpXParticleContainer.cpp
parent	2bcc3ca3c89a5245000c04cbf2afc2d2c55e332b (diff)
download	WarpX-6a425feed17500c087a78243d29edd86a70df52f.tar.gz WarpX-6a425feed17500c087a78243d29edd86a70df52f.tar.zst WarpX-6a425feed17500c087a78243d29edd86a70df52f.zip