Avoid touching device memory on host in FieldProbe (#3579)

The implementation here can be optimized by using shared memory. However, instead of doing it here, I am going to add a helper function to AMReX. Also removed an unnecessary synchronization, because there is a synchronization in the destructor of the particle iterator.
author: Weiqun Zhang <WeiqunZhang@lbl.gov> 2022-12-21 14:09:26 -0800
committer: GitHub <noreply@github.com> 2022-12-21 23:09:26 +0100
commit: 9cb604ec271dcc37a3609bbad967a27b10982f14 (patch)
tree: 92729c826442367c00ea715ee9d3b8d1d490c9ae
parent: dc23e049ddfead001bd78004da929b192c11b012 (diff)
download: WarpX-9cb604ec271dcc37a3609bbad967a27b10982f14.tar.gz
WarpX-9cb604ec271dcc37a3609bbad967a27b10982f14.tar.zst
WarpX-9cb604ec271dcc37a3609bbad967a27b10982f14.zip
1 files changed, 23 insertions, 16 deletions
diff --git a/Source/Diagnostics/ReducedDiags/FieldProbe.cpp b/Source/Diagnostics/ReducedDiags/FieldProbe.cpp
index d6621f789..073116627 100644
--- a/Source/Diagnostics/ReducedDiags/FieldProbe.cpp
+++ b/Source/Diagnostics/ReducedDiags/FieldProbe.cpp
@@ -570,31 +570,38 @@ void FieldProbe::ComputeDiags (int step)
                 });// ParallelFor Close
                 // this check is here because for m_field_probe_integrate == True, we always compute
                 // but we only write when we truly are in an output interval step
-                if (m_intervals.contains(step+1))
+                if (m_intervals.contains(step+1) && np > 0)
                 {
-                    for (auto ip=0; ip < np; ip++)
+                    // This could be optimized by using shared memory.
+                    amrex::Gpu::DeviceVector<amrex::Real> dv(np*noutputs);
+                    amrex::Real* dvp = dv.data();
+                    amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE (long ip)
                     {
                         amrex::ParticleReal xp, yp, zp;
                         getPosition(ip, xp, yp, zp);
-
-                        // push to output vector
-                        m_data.push_back(xp);
-                        m_data.push_back(yp);
-                        m_data.push_back(zp);
-                        m_data.push_back(part_Ex[ip]);
-                        m_data.push_back(part_Ey[ip]);
-                        m_data.push_back(part_Ez[ip]);
-                        m_data.push_back(part_Bx[ip]);
-                        m_data.push_back(part_By[ip]);
-                        m_data.push_back(part_Bz[ip]);
-                        m_data.push_back(part_S[ip]);
-                    }
+                        long idx = ip*noutputs;
+                        dvp[idx++] = xp;
+                        dvp[idx++] = yp;
+                        dvp[idx++] = zp;
+                        dvp[idx++] = part_Ex[ip];
+                        dvp[idx++] = part_Ey[ip];
+                        dvp[idx++] = part_Ez[ip];
+                        dvp[idx++] = part_Bx[ip];
+                        dvp[idx++] = part_By[ip];
+                        dvp[idx++] = part_Bz[ip];
+                        dvp[idx++] = part_S[ip];
+                    });
+                    auto oldsize = m_data.size();
+                    m_data.resize(oldsize + dv.size());
+                    amrex::Gpu::copyAsync(amrex::Gpu::deviceToHost,
+                                          dv.begin(), dv.end(), &m_data[oldsize]);
+                    Gpu::streamSynchronize();
                 /* m_data now contains up-to-date values for:
                  *  [x, y, z, Ex, Ey, Ez, Bx, By, Bz, and S] */
                 }
             }
         } // end particle iterator loop
-        Gpu::synchronize();
+
         if (m_intervals.contains(step+1))
         {
             // returns total number of mpi notes into mpisize
author	Weiqun Zhang <WeiqunZhang@lbl.gov>	2022-12-21 14:09:26 -0800
committer	GitHub <noreply@github.com>	2022-12-21 23:09:26 +0100
commit	9cb604ec271dcc37a3609bbad967a27b10982f14 (patch)
tree	92729c826442367c00ea715ee9d3b8d1d490c9ae
parent	dc23e049ddfead001bd78004da929b192c11b012 (diff)
download	WarpX-9cb604ec271dcc37a3609bbad967a27b10982f14.tar.gz WarpX-9cb604ec271dcc37a3609bbad967a27b10982f14.tar.zst WarpX-9cb604ec271dcc37a3609bbad967a27b10982f14.zip