diff options
Diffstat (limited to 'Source/Particles/Sorting/Partition.cpp')
-rw-r--r-- | Source/Particles/Sorting/Partition.cpp | 108 |
1 files changed, 58 insertions, 50 deletions
diff --git a/Source/Particles/Sorting/Partition.cpp b/Source/Particles/Sorting/Partition.cpp index 683dbbd04..e88af017f 100644 --- a/Source/Particles/Sorting/Partition.cpp +++ b/Source/Particles/Sorting/Partition.cpp @@ -1,5 +1,6 @@ -#include <WarpX.H> +#include <SortingUtils.H> #include <PhysicalParticleContainer.H> +#include <WarpX.H> #include <AMReX_Particles.H> using namespace amrex; @@ -41,56 +42,64 @@ PhysicalParticleContainer::PartitionParticlesInBuffers( { BL_PROFILE("PPC::Evolve::partition"); - std::vector<bool> inexflag; + // Initialize temporary arrays + Gpu::DeviceVector<int> inexflag; inexflag.resize(np); + Gpu::DeviceVector<long> pid; + pid.resize(np); - auto& aos = pti.GetArrayOfStructs(); + // First, partition particles into the larger buffer - // We need to partition the large buffer first + // - Select the larger buffer iMultiFab const* bmasks = (WarpX::n_field_gather_buffer >= WarpX::n_current_deposition_buffer) ? gather_masks : current_masks; - int i = 0; - const auto& msk = (*bmasks)[pti]; - for (const auto& p : aos) { - const IntVect& iv = Index(p, lev); - inexflag[i++] = msk(iv); - } - - Vector<long> pid; - pid.resize(np); - std::iota(pid.begin(), pid.end(), 0L); - auto sep = std::stable_partition(pid.begin(), pid.end(), - [&inexflag](long id) { return inexflag[id]; }); + // - For each particle, find whether it is in the larger buffer, + // by looking up the mask. Store the answer in `inexflag`. + amrex::ParallelFor( np, fillBufferFlag(pti, bmasks, inexflag, Geom(lev)) ); + // - Find the indices that reorder particles so that the last particles + // are in the larger buffer + fillWithConsecutiveIntegers( pid ); + auto const sep = stablePartition( pid.begin(), pid.end(), inexflag ); + // At the end of this step, `pid` contains the indices that should be used to + // reorder the particles, and `sep` is the position in the array that + // separates the particles that deposit/gather on the fine patch (first part) + // and the particles that deposit/gather in the buffers (last part) + long const n_fine = iteratorDistance(pid.begin(), sep); + // Number of particles on fine patch, i.e. outside of the larger buffer + + // Second, among particles that are in the larger buffer, partition + // particles into the smaller buffer if (WarpX::n_current_deposition_buffer == WarpX::n_field_gather_buffer) { - nfine_current = nfine_gather = std::distance(pid.begin(), sep); - } else if (sep != pid.end()) { + // No need to do anything if the buffers have the same size + nfine_current = nfine_gather = iteratorDistance(pid.begin(), sep); + } else if (sep == pid.end()) { + // No need to do anything if there are no particles in the larger buffer + nfine_current = nfine_gather = np; + } else { int n_buf; if (bmasks == gather_masks) { - nfine_gather = std::distance(pid.begin(), sep); + nfine_gather = n_fine; bmasks = current_masks; n_buf = WarpX::n_current_deposition_buffer; } else { - nfine_current = std::distance(pid.begin(), sep); + nfine_current = n_fine; bmasks = gather_masks; n_buf = WarpX::n_field_gather_buffer; } if (n_buf > 0) { - const auto& msk2 = (*bmasks)[pti]; - for (auto it = sep; it != pid.end(); ++it) { - const long id = *it; - const IntVect& iv = Index(aos[id], lev); - inexflag[id] = msk2(iv); - } + // - For each particle in the large buffer, find whether it is in + // the smaller buffer, by looking up the mask. Store the answer in `inexflag`. + amrex::ParallelFor( np - n_fine, + fillBufferFlagRemainingParticles(pti, bmasks, inexflag, Geom(lev), pid, n_fine) ); + auto const sep2 = stablePartition( sep, pid.end(), inexflag ); - auto sep2 = std::stable_partition(sep, pid.end(), - [&inexflag](long id) {return inexflag[id];}); if (bmasks == gather_masks) { - nfine_gather = std::distance(pid.begin(), sep2); + nfine_gather = iteratorDistance(pid.begin(), sep2); } else { - nfine_current = std::distance(pid.begin(), sep2); + nfine_current = iteratorDistance(pid.begin(), sep2); } } } @@ -103,39 +112,38 @@ PhysicalParticleContainer::PartitionParticlesInBuffers( nfine_gather = 0; } + // Reorder the actual particle array, using the `pid` indices if (nfine_current != np || nfine_gather != np) { - + // Temporary array for particle AoS ParticleVector particle_tmp; particle_tmp.resize(np); - for (long ip = 0; ip < np; ++ip) { - particle_tmp[ip] = aos[pid[ip]]; - } + // Copy particle AoS + auto& aos = pti.GetArrayOfStructs(); + amrex::ParallelFor( np, + copyAndReorder<ParticleType>( aos(), particle_tmp, pid ) ); std::swap(aos(), particle_tmp); + // Temporary array for particle individual attributes RealVector tmp; tmp.resize(np); - for (long ip = 0; ip < np; ++ip) { - tmp[ip] = wp[pid[ip]]; - } + // Copy individual attributes + amrex::ParallelFor( np, copyAndReorder<Real>( wp, tmp, pid ) ); std::swap(wp, tmp); - - for (long ip = 0; ip < np; ++ip) { - tmp[ip] = uxp[pid[ip]]; - } + amrex::ParallelFor( np, copyAndReorder<Real>( uxp, tmp, pid ) ); std::swap(uxp, tmp); - - for (long ip = 0; ip < np; ++ip) { - tmp[ip] = uyp[pid[ip]]; - } + amrex::ParallelFor( np, copyAndReorder<Real>( uyp, tmp, pid ) ); std::swap(uyp, tmp); - - for (long ip = 0; ip < np; ++ip) { - tmp[ip] = uzp[pid[ip]]; - } + amrex::ParallelFor( np, copyAndReorder<Real>( uzp, tmp, pid ) ); std::swap(uzp, tmp); - } + // Make sure that the temporary arrays are not destroyed before + // the GPU kernels finish running + Gpu::streamSynchronize(); + } + // Make sure that the temporary arrays are not destroyed before + // the GPU kernels finish running + Gpu::streamSynchronize(); } |