aboutsummaryrefslogtreecommitdiff
path: root/Source/Particles/Sorting
diff options
context:
space:
mode:
Diffstat (limited to 'Source/Particles/Sorting')
-rw-r--r--Source/Particles/Sorting/Make.package1
-rw-r--r--Source/Particles/Sorting/Partition.cpp108
-rw-r--r--Source/Particles/Sorting/SortingUtils.H222
3 files changed, 281 insertions, 50 deletions
diff --git a/Source/Particles/Sorting/Make.package b/Source/Particles/Sorting/Make.package
index 750d2607e..f9c708e5b 100644
--- a/Source/Particles/Sorting/Make.package
+++ b/Source/Particles/Sorting/Make.package
@@ -1,3 +1,4 @@
+CEXE_headers += SortingUtils.H
CEXE_sources += Partition.cpp
INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Sorting
VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles/Sorting
diff --git a/Source/Particles/Sorting/Partition.cpp b/Source/Particles/Sorting/Partition.cpp
index 683dbbd04..e88af017f 100644
--- a/Source/Particles/Sorting/Partition.cpp
+++ b/Source/Particles/Sorting/Partition.cpp
@@ -1,5 +1,6 @@
-#include <WarpX.H>
+#include <SortingUtils.H>
#include <PhysicalParticleContainer.H>
+#include <WarpX.H>
#include <AMReX_Particles.H>
using namespace amrex;
@@ -41,56 +42,64 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
{
BL_PROFILE("PPC::Evolve::partition");
- std::vector<bool> inexflag;
+ // Initialize temporary arrays
+ Gpu::DeviceVector<int> inexflag;
inexflag.resize(np);
+ Gpu::DeviceVector<long> pid;
+ pid.resize(np);
- auto& aos = pti.GetArrayOfStructs();
+ // First, partition particles into the larger buffer
- // We need to partition the large buffer first
+ // - Select the larger buffer
iMultiFab const* bmasks =
(WarpX::n_field_gather_buffer >= WarpX::n_current_deposition_buffer) ?
gather_masks : current_masks;
- int i = 0;
- const auto& msk = (*bmasks)[pti];
- for (const auto& p : aos) {
- const IntVect& iv = Index(p, lev);
- inexflag[i++] = msk(iv);
- }
-
- Vector<long> pid;
- pid.resize(np);
- std::iota(pid.begin(), pid.end(), 0L);
- auto sep = std::stable_partition(pid.begin(), pid.end(),
- [&inexflag](long id) { return inexflag[id]; });
+ // - For each particle, find whether it is in the larger buffer,
+ // by looking up the mask. Store the answer in `inexflag`.
+ amrex::ParallelFor( np, fillBufferFlag(pti, bmasks, inexflag, Geom(lev)) );
+ // - Find the indices that reorder particles so that the last particles
+ // are in the larger buffer
+ fillWithConsecutiveIntegers( pid );
+ auto const sep = stablePartition( pid.begin(), pid.end(), inexflag );
+ // At the end of this step, `pid` contains the indices that should be used to
+ // reorder the particles, and `sep` is the position in the array that
+ // separates the particles that deposit/gather on the fine patch (first part)
+ // and the particles that deposit/gather in the buffers (last part)
+ long const n_fine = iteratorDistance(pid.begin(), sep);
+ // Number of particles on fine patch, i.e. outside of the larger buffer
+
+ // Second, among particles that are in the larger buffer, partition
+ // particles into the smaller buffer
if (WarpX::n_current_deposition_buffer == WarpX::n_field_gather_buffer) {
- nfine_current = nfine_gather = std::distance(pid.begin(), sep);
- } else if (sep != pid.end()) {
+ // No need to do anything if the buffers have the same size
+ nfine_current = nfine_gather = iteratorDistance(pid.begin(), sep);
+ } else if (sep == pid.end()) {
+ // No need to do anything if there are no particles in the larger buffer
+ nfine_current = nfine_gather = np;
+ } else {
int n_buf;
if (bmasks == gather_masks) {
- nfine_gather = std::distance(pid.begin(), sep);
+ nfine_gather = n_fine;
bmasks = current_masks;
n_buf = WarpX::n_current_deposition_buffer;
} else {
- nfine_current = std::distance(pid.begin(), sep);
+ nfine_current = n_fine;
bmasks = gather_masks;
n_buf = WarpX::n_field_gather_buffer;
}
if (n_buf > 0)
{
- const auto& msk2 = (*bmasks)[pti];
- for (auto it = sep; it != pid.end(); ++it) {
- const long id = *it;
- const IntVect& iv = Index(aos[id], lev);
- inexflag[id] = msk2(iv);
- }
+ // - For each particle in the large buffer, find whether it is in
+ // the smaller buffer, by looking up the mask. Store the answer in `inexflag`.
+ amrex::ParallelFor( np - n_fine,
+ fillBufferFlagRemainingParticles(pti, bmasks, inexflag, Geom(lev), pid, n_fine) );
+ auto const sep2 = stablePartition( sep, pid.end(), inexflag );
- auto sep2 = std::stable_partition(sep, pid.end(),
- [&inexflag](long id) {return inexflag[id];});
if (bmasks == gather_masks) {
- nfine_gather = std::distance(pid.begin(), sep2);
+ nfine_gather = iteratorDistance(pid.begin(), sep2);
} else {
- nfine_current = std::distance(pid.begin(), sep2);
+ nfine_current = iteratorDistance(pid.begin(), sep2);
}
}
}
@@ -103,39 +112,38 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
nfine_gather = 0;
}
+ // Reorder the actual particle array, using the `pid` indices
if (nfine_current != np || nfine_gather != np)
{
-
+ // Temporary array for particle AoS
ParticleVector particle_tmp;
particle_tmp.resize(np);
- for (long ip = 0; ip < np; ++ip) {
- particle_tmp[ip] = aos[pid[ip]];
- }
+ // Copy particle AoS
+ auto& aos = pti.GetArrayOfStructs();
+ amrex::ParallelFor( np,
+ copyAndReorder<ParticleType>( aos(), particle_tmp, pid ) );
std::swap(aos(), particle_tmp);
+ // Temporary array for particle individual attributes
RealVector tmp;
tmp.resize(np);
- for (long ip = 0; ip < np; ++ip) {
- tmp[ip] = wp[pid[ip]];
- }
+ // Copy individual attributes
+ amrex::ParallelFor( np, copyAndReorder<Real>( wp, tmp, pid ) );
std::swap(wp, tmp);
-
- for (long ip = 0; ip < np; ++ip) {
- tmp[ip] = uxp[pid[ip]];
- }
+ amrex::ParallelFor( np, copyAndReorder<Real>( uxp, tmp, pid ) );
std::swap(uxp, tmp);
-
- for (long ip = 0; ip < np; ++ip) {
- tmp[ip] = uyp[pid[ip]];
- }
+ amrex::ParallelFor( np, copyAndReorder<Real>( uyp, tmp, pid ) );
std::swap(uyp, tmp);
-
- for (long ip = 0; ip < np; ++ip) {
- tmp[ip] = uzp[pid[ip]];
- }
+ amrex::ParallelFor( np, copyAndReorder<Real>( uzp, tmp, pid ) );
std::swap(uzp, tmp);
- }
+ // Make sure that the temporary arrays are not destroyed before
+ // the GPU kernels finish running
+ Gpu::streamSynchronize();
+ }
+ // Make sure that the temporary arrays are not destroyed before
+ // the GPU kernels finish running
+ Gpu::streamSynchronize();
}
diff --git a/Source/Particles/Sorting/SortingUtils.H b/Source/Particles/Sorting/SortingUtils.H
new file mode 100644
index 000000000..28a1b73b7
--- /dev/null
+++ b/Source/Particles/Sorting/SortingUtils.H
@@ -0,0 +1,222 @@
+#ifndef WARPX_PARTICLES_SORTING_SORTINGUTILS_H_
+#define WARPX_PARTICLES_SORTING_SORTINGUTILS_H_
+
+#include <WarpXParticleContainer.H>
+#include <AMReX_CudaContainers.H>
+#include <AMReX_Gpu.H>
+#ifdef AMREX_USE_GPU
+ #include <thrust/partition.h>
+ #include <thrust/distance.h>
+#endif
+
+/* \brief Fill the elements of the input vector with consecutive integer,
+ * starting from 0
+ *
+ * \param[inout] v Vector of integers, to be filled by this routine
+ */
+void fillWithConsecutiveIntegers( amrex::Gpu::DeviceVector<long>& v )
+{
+#ifdef AMREX_USE_GPU
+ // On GPU: Use thrust
+ thrust::sequence( v.begin(), v.end() );
+#else
+ // On CPU: Use std library
+ std::iota( v.begin(), v.end(), 0L );
+#endif
+}
+
+/* \brief Find the indices that would reorder the elements of `predicate`
+ * so that the elements with non-zero value precede the other elements
+ *
+ * \param[in, out] index_begin Point to the beginning of the vector which is
+ * to be filled with these indices
+ * \param[in, out] index_begin Point to the end of the vector which is
+ * to be filled with these indices
+ * \param[in] Vector that indicates the elements that need to be reordered first
+ */
+template< typename ForwardIterator >
+ForwardIterator stablePartition(ForwardIterator const index_begin,
+ ForwardIterator const index_end,
+ amrex::Gpu::DeviceVector<int> const& predicate)
+{
+#ifdef AMREX_USE_GPU
+ // On GPU: Use thrust
+ int const* AMREX_RESTRICT predicate_ptr = predicate.dataPtr();
+ ForwardIterator const sep = thrust::stable_partition(
+ thrust::cuda::par(amrex::Cuda::The_ThrustCachedAllocator()),
+ index_begin, index_end,
+ [predicate_ptr] AMREX_GPU_DEVICE (long i) { return predicate_ptr[i]; }
+ );
+#else
+ // On CPU: Use std library
+ ForwardIterator const sep = std::stable_partition(
+ index_begin, index_end,
+ [&predicate](long i) { return predicate[i]; }
+ );
+#endif
+ return sep;
+}
+
+/* \brief Return the number of elements between `first` and `last`
+ *
+ * \param[in] fist Points to a position in a vector
+ * \param[in] last Points to another position in a vector
+ * \return The number of elements between `first` and `last`
+ */
+template< typename ForwardIterator >
+int iteratorDistance(ForwardIterator const first,
+ ForwardIterator const last)
+{
+#ifdef AMREX_USE_GPU
+ // On GPU: Use thrust
+ return thrust::distance( first, last );
+#else
+ return std::distance( first, last );
+#endif
+}
+
+/* \brief Functor that fills the elements of the particle array `inexflag`
+ * with the value of the spatial array `bmasks`, at the corresponding particle position.
+ *
+ * \param[in] pti Contains information on the particle positions
+ * \param[in] bmasks Spatial array, that contains a flag indicating
+ * whether each cell is part of the gathering/deposition buffers
+ * \param[out] inexflag Vector to be filled with the value of `bmasks`
+ * \param[in] geom Geometry object, necessary to locate particles within the array `bmasks`
+ *
+ */
+class fillBufferFlag
+{
+ public:
+ fillBufferFlag( WarpXParIter const& pti, amrex::iMultiFab const* bmasks,
+ amrex::Gpu::DeviceVector<int>& inexflag,
+ amrex::Geometry const& geom ) {
+
+ // Extract simple structure that can be used directly on the GPU
+ m_particles = &(pti.GetArrayOfStructs()[0]);
+ m_buffer_mask = (*bmasks)[pti].array();
+ m_inexflag_ptr = inexflag.dataPtr();
+ m_domain = geom.Domain();
+ for (int idim=0; idim<AMREX_SPACEDIM; idim++) {
+ m_prob_lo[idim] = geom.ProbLo(idim);
+ m_inv_cell_size[idim] = geom.InvCellSize(idim);
+ }
+ };
+
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ void operator()( const long i ) const {
+ // Select a particle
+ auto const& p = m_particles[i];
+ // Find the index of the cell where this particle is located
+ amrex::IntVect const iv = amrex::getParticleCell( p,
+ m_prob_lo, m_inv_cell_size, m_domain );
+ // Find the value of the buffer flag in this cell and
+ // store it at the corresponding particle position in the array `inexflag`
+ m_inexflag_ptr[i] = m_buffer_mask(iv);
+ };
+
+ private:
+ amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> m_prob_lo;
+ amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> m_inv_cell_size;
+ amrex::Box m_domain;
+ int* m_inexflag_ptr;
+ WarpXParticleContainer::ParticleType const* m_particles;
+ amrex::Array4<int const> m_buffer_mask;
+};
+
+/* \brief Functor that fills the elements of the particle array `inexflag`
+ * with the value of the spatial array `bmasks`, at the corresponding particle position.
+ *
+ * Contrary to `fillBufferFlag`, here this is done only for the particles that
+ * the last elements of `particle_indices` point to (from the element at
+ * index `start_index` in `particle_indices`, to the last element of `particle_indices`)
+ *
+ * \param[in] pti Contains information on the particle positions
+ * \param[in] bmasks Spatial array, that contains a flag indicating
+ * whether each cell is part of the gathering/deposition buffers
+ * \param[out] inexflag Vector to be filled with the value of `bmasks`
+ * \param[in] geom Geometry object, necessary to locate particles within the array `bmasks`
+ * \param[in] start_index Index that which elements start to be modified
+ */
+class fillBufferFlagRemainingParticles
+{
+ public:
+ fillBufferFlagRemainingParticles(
+ WarpXParIter const& pti,
+ amrex::iMultiFab const* bmasks,
+ amrex::Gpu::DeviceVector<int>& inexflag,
+ amrex::Geometry const& geom,
+ amrex::Gpu::DeviceVector<long> const& particle_indices,
+ long const start_index ) :
+ m_start_index(start_index) {
+
+ // Extract simple structure that can be used directly on the GPU
+ m_particles = &(pti.GetArrayOfStructs()[0]);
+ m_buffer_mask = (*bmasks)[pti].array();
+ m_inexflag_ptr = inexflag.dataPtr();
+ m_indices_ptr = particle_indices.dataPtr();
+ m_domain = geom.Domain();
+ for (int idim=0; idim<AMREX_SPACEDIM; idim++) {
+ m_prob_lo[idim] = geom.ProbLo(idim);
+ m_inv_cell_size[idim] = geom.InvCellSize(idim);
+ }
+ };
+
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ void operator()( const long i ) const {
+ // Select a particle
+ auto const& p = m_particles[m_indices_ptr[i+m_start_index]];
+ // Find the index of the cell where this particle is located
+ amrex::IntVect const iv = amrex::getParticleCell( p,
+ m_prob_lo, m_inv_cell_size, m_domain );
+ // Find the value of the buffer flag in this cell and
+ // store it at the corresponding particle position in the array `inexflag`
+ m_inexflag_ptr[m_indices_ptr[i+m_start_index]] = m_buffer_mask(iv);
+ };
+
+ private:
+ amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> m_prob_lo;
+ amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> m_inv_cell_size;
+ amrex::Box m_domain;
+ int* m_inexflag_ptr;
+ WarpXParticleContainer::ParticleType const* m_particles;
+ amrex::Array4<int const> m_buffer_mask;
+ long const m_start_index;
+ long const* m_indices_ptr;
+};
+
+/* \brief Functor that copies the elements of `src` into `dst`,
+ * while reordering them according to `indices`
+ *
+ * \param[in] src Source vector
+ * \param[out] dst Destination vector
+ * \param[in] indices Array of indices that indicate how to reorder elements
+ */
+template <typename T>
+class copyAndReorder
+{
+ public:
+ copyAndReorder(
+ amrex::Gpu::ManagedDeviceVector<T> const& src,
+ amrex::Gpu::ManagedDeviceVector<T>& dst,
+ amrex::Gpu::DeviceVector<long> const& indices ) {
+ // Extract simple structure that can be used directly on the GPU
+ m_src_ptr = src.dataPtr();
+ m_dst_ptr = dst.dataPtr();
+ m_indices_ptr = indices.dataPtr();
+ };
+
+ AMREX_GPU_DEVICE AMREX_FORCE_INLINE
+ void operator()( const long ip ) const {
+ m_dst_ptr[ip] = m_src_ptr[ m_indices_ptr[ip] ];
+ };
+
+ private:
+ T const* m_src_ptr;
+ T* m_dst_ptr;
+ long const* m_indices_ptr;
+};
+
+#endif // WARPX_PARTICLES_SORTING_SORTINGUTILS_H_