aboutsummaryrefslogtreecommitdiff
path: root/Source/Particles/Sorting/Partition.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Source/Particles/Sorting/Partition.cpp')
-rw-r--r--Source/Particles/Sorting/Partition.cpp108
1 files changed, 58 insertions, 50 deletions
diff --git a/Source/Particles/Sorting/Partition.cpp b/Source/Particles/Sorting/Partition.cpp
index 683dbbd04..e88af017f 100644
--- a/Source/Particles/Sorting/Partition.cpp
+++ b/Source/Particles/Sorting/Partition.cpp
@@ -1,5 +1,6 @@
-#include <WarpX.H>
+#include <SortingUtils.H>
#include <PhysicalParticleContainer.H>
+#include <WarpX.H>
#include <AMReX_Particles.H>
using namespace amrex;
@@ -41,56 +42,64 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
{
BL_PROFILE("PPC::Evolve::partition");
- std::vector<bool> inexflag;
+ // Initialize temporary arrays
+ Gpu::DeviceVector<int> inexflag;
inexflag.resize(np);
+ Gpu::DeviceVector<long> pid;
+ pid.resize(np);
- auto& aos = pti.GetArrayOfStructs();
+ // First, partition particles into the larger buffer
- // We need to partition the large buffer first
+ // - Select the larger buffer
iMultiFab const* bmasks =
(WarpX::n_field_gather_buffer >= WarpX::n_current_deposition_buffer) ?
gather_masks : current_masks;
- int i = 0;
- const auto& msk = (*bmasks)[pti];
- for (const auto& p : aos) {
- const IntVect& iv = Index(p, lev);
- inexflag[i++] = msk(iv);
- }
-
- Vector<long> pid;
- pid.resize(np);
- std::iota(pid.begin(), pid.end(), 0L);
- auto sep = std::stable_partition(pid.begin(), pid.end(),
- [&inexflag](long id) { return inexflag[id]; });
+ // - For each particle, find whether it is in the larger buffer,
+ // by looking up the mask. Store the answer in `inexflag`.
+ amrex::ParallelFor( np, fillBufferFlag(pti, bmasks, inexflag, Geom(lev)) );
+ // - Find the indices that reorder particles so that the last particles
+ // are in the larger buffer
+ fillWithConsecutiveIntegers( pid );
+ auto const sep = stablePartition( pid.begin(), pid.end(), inexflag );
+ // At the end of this step, `pid` contains the indices that should be used to
+ // reorder the particles, and `sep` is the position in the array that
+ // separates the particles that deposit/gather on the fine patch (first part)
+ // and the particles that deposit/gather in the buffers (last part)
+ long const n_fine = iteratorDistance(pid.begin(), sep);
+ // Number of particles on fine patch, i.e. outside of the larger buffer
+
+ // Second, among particles that are in the larger buffer, partition
+ // particles into the smaller buffer
if (WarpX::n_current_deposition_buffer == WarpX::n_field_gather_buffer) {
- nfine_current = nfine_gather = std::distance(pid.begin(), sep);
- } else if (sep != pid.end()) {
+ // No need to do anything if the buffers have the same size
+ nfine_current = nfine_gather = iteratorDistance(pid.begin(), sep);
+ } else if (sep == pid.end()) {
+ // No need to do anything if there are no particles in the larger buffer
+ nfine_current = nfine_gather = np;
+ } else {
int n_buf;
if (bmasks == gather_masks) {
- nfine_gather = std::distance(pid.begin(), sep);
+ nfine_gather = n_fine;
bmasks = current_masks;
n_buf = WarpX::n_current_deposition_buffer;
} else {
- nfine_current = std::distance(pid.begin(), sep);
+ nfine_current = n_fine;
bmasks = gather_masks;
n_buf = WarpX::n_field_gather_buffer;
}
if (n_buf > 0)
{
- const auto& msk2 = (*bmasks)[pti];
- for (auto it = sep; it != pid.end(); ++it) {
- const long id = *it;
- const IntVect& iv = Index(aos[id], lev);
- inexflag[id] = msk2(iv);
- }
+ // - For each particle in the large buffer, find whether it is in
+ // the smaller buffer, by looking up the mask. Store the answer in `inexflag`.
+ amrex::ParallelFor( np - n_fine,
+ fillBufferFlagRemainingParticles(pti, bmasks, inexflag, Geom(lev), pid, n_fine) );
+ auto const sep2 = stablePartition( sep, pid.end(), inexflag );
- auto sep2 = std::stable_partition(sep, pid.end(),
- [&inexflag](long id) {return inexflag[id];});
if (bmasks == gather_masks) {
- nfine_gather = std::distance(pid.begin(), sep2);
+ nfine_gather = iteratorDistance(pid.begin(), sep2);
} else {
- nfine_current = std::distance(pid.begin(), sep2);
+ nfine_current = iteratorDistance(pid.begin(), sep2);
}
}
}
@@ -103,39 +112,38 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
nfine_gather = 0;
}
+ // Reorder the actual particle array, using the `pid` indices
if (nfine_current != np || nfine_gather != np)
{
-
+ // Temporary array for particle AoS
ParticleVector particle_tmp;
particle_tmp.resize(np);
- for (long ip = 0; ip < np; ++ip) {
- particle_tmp[ip] = aos[pid[ip]];
- }
+ // Copy particle AoS
+ auto& aos = pti.GetArrayOfStructs();
+ amrex::ParallelFor( np,
+ copyAndReorder<ParticleType>( aos(), particle_tmp, pid ) );
std::swap(aos(), particle_tmp);
+ // Temporary array for particle individual attributes
RealVector tmp;
tmp.resize(np);
- for (long ip = 0; ip < np; ++ip) {
- tmp[ip] = wp[pid[ip]];
- }
+ // Copy individual attributes
+ amrex::ParallelFor( np, copyAndReorder<Real>( wp, tmp, pid ) );
std::swap(wp, tmp);
-
- for (long ip = 0; ip < np; ++ip) {
- tmp[ip] = uxp[pid[ip]];
- }
+ amrex::ParallelFor( np, copyAndReorder<Real>( uxp, tmp, pid ) );
std::swap(uxp, tmp);
-
- for (long ip = 0; ip < np; ++ip) {
- tmp[ip] = uyp[pid[ip]];
- }
+ amrex::ParallelFor( np, copyAndReorder<Real>( uyp, tmp, pid ) );
std::swap(uyp, tmp);
-
- for (long ip = 0; ip < np; ++ip) {
- tmp[ip] = uzp[pid[ip]];
- }
+ amrex::ParallelFor( np, copyAndReorder<Real>( uzp, tmp, pid ) );
std::swap(uzp, tmp);
- }
+ // Make sure that the temporary arrays are not destroyed before
+ // the GPU kernels finish running
+ Gpu::streamSynchronize();
+ }
+ // Make sure that the temporary arrays are not destroyed before
+ // the GPU kernels finish running
+ Gpu::streamSynchronize();
}