diff options
author | 2019-11-01 17:20:55 -0700 | |
---|---|---|
committer | 2019-11-01 17:20:55 -0700 | |
commit | 947211be5797f83c7b11e2f626b46576d933d259 (patch) | |
tree | 411e6df9231f4019e7ed326055a391ef16ca1557 /Source/Particles/Sorting | |
parent | 2b1c5a17e0afdf0aebd008a652540956174eb286 (diff) | |
parent | f42fc99efc105dcb99213d02b31c6bf5183d18a7 (diff) | |
download | WarpX-947211be5797f83c7b11e2f626b46576d933d259.tar.gz WarpX-947211be5797f83c7b11e2f626b46576d933d259.tar.zst WarpX-947211be5797f83c7b11e2f626b46576d933d259.zip |
Merge branch 'dev' into generalize_nodal_deposition
Diffstat (limited to 'Source/Particles/Sorting')
-rw-r--r-- | Source/Particles/Sorting/Make.package | 1 | ||||
-rw-r--r-- | Source/Particles/Sorting/Partition.cpp | 108 | ||||
-rw-r--r-- | Source/Particles/Sorting/SortingUtils.H | 222 |
3 files changed, 281 insertions, 50 deletions
diff --git a/Source/Particles/Sorting/Make.package b/Source/Particles/Sorting/Make.package index 750d2607e..f9c708e5b 100644 --- a/Source/Particles/Sorting/Make.package +++ b/Source/Particles/Sorting/Make.package @@ -1,3 +1,4 @@ +CEXE_headers += SortingUtils.H CEXE_sources += Partition.cpp INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Sorting VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles/Sorting diff --git a/Source/Particles/Sorting/Partition.cpp b/Source/Particles/Sorting/Partition.cpp index 683dbbd04..e88af017f 100644 --- a/Source/Particles/Sorting/Partition.cpp +++ b/Source/Particles/Sorting/Partition.cpp @@ -1,5 +1,6 @@ -#include <WarpX.H> +#include <SortingUtils.H> #include <PhysicalParticleContainer.H> +#include <WarpX.H> #include <AMReX_Particles.H> using namespace amrex; @@ -41,56 +42,64 @@ PhysicalParticleContainer::PartitionParticlesInBuffers( { BL_PROFILE("PPC::Evolve::partition"); - std::vector<bool> inexflag; + // Initialize temporary arrays + Gpu::DeviceVector<int> inexflag; inexflag.resize(np); + Gpu::DeviceVector<long> pid; + pid.resize(np); - auto& aos = pti.GetArrayOfStructs(); + // First, partition particles into the larger buffer - // We need to partition the large buffer first + // - Select the larger buffer iMultiFab const* bmasks = (WarpX::n_field_gather_buffer >= WarpX::n_current_deposition_buffer) ? gather_masks : current_masks; - int i = 0; - const auto& msk = (*bmasks)[pti]; - for (const auto& p : aos) { - const IntVect& iv = Index(p, lev); - inexflag[i++] = msk(iv); - } - - Vector<long> pid; - pid.resize(np); - std::iota(pid.begin(), pid.end(), 0L); - auto sep = std::stable_partition(pid.begin(), pid.end(), - [&inexflag](long id) { return inexflag[id]; }); + // - For each particle, find whether it is in the larger buffer, + // by looking up the mask. Store the answer in `inexflag`. + amrex::ParallelFor( np, fillBufferFlag(pti, bmasks, inexflag, Geom(lev)) ); + // - Find the indices that reorder particles so that the last particles + // are in the larger buffer + fillWithConsecutiveIntegers( pid ); + auto const sep = stablePartition( pid.begin(), pid.end(), inexflag ); + // At the end of this step, `pid` contains the indices that should be used to + // reorder the particles, and `sep` is the position in the array that + // separates the particles that deposit/gather on the fine patch (first part) + // and the particles that deposit/gather in the buffers (last part) + long const n_fine = iteratorDistance(pid.begin(), sep); + // Number of particles on fine patch, i.e. outside of the larger buffer + + // Second, among particles that are in the larger buffer, partition + // particles into the smaller buffer if (WarpX::n_current_deposition_buffer == WarpX::n_field_gather_buffer) { - nfine_current = nfine_gather = std::distance(pid.begin(), sep); - } else if (sep != pid.end()) { + // No need to do anything if the buffers have the same size + nfine_current = nfine_gather = iteratorDistance(pid.begin(), sep); + } else if (sep == pid.end()) { + // No need to do anything if there are no particles in the larger buffer + nfine_current = nfine_gather = np; + } else { int n_buf; if (bmasks == gather_masks) { - nfine_gather = std::distance(pid.begin(), sep); + nfine_gather = n_fine; bmasks = current_masks; n_buf = WarpX::n_current_deposition_buffer; } else { - nfine_current = std::distance(pid.begin(), sep); + nfine_current = n_fine; bmasks = gather_masks; n_buf = WarpX::n_field_gather_buffer; } if (n_buf > 0) { - const auto& msk2 = (*bmasks)[pti]; - for (auto it = sep; it != pid.end(); ++it) { - const long id = *it; - const IntVect& iv = Index(aos[id], lev); - inexflag[id] = msk2(iv); - } + // - For each particle in the large buffer, find whether it is in + // the smaller buffer, by looking up the mask. Store the answer in `inexflag`. + amrex::ParallelFor( np - n_fine, + fillBufferFlagRemainingParticles(pti, bmasks, inexflag, Geom(lev), pid, n_fine) ); + auto const sep2 = stablePartition( sep, pid.end(), inexflag ); - auto sep2 = std::stable_partition(sep, pid.end(), - [&inexflag](long id) {return inexflag[id];}); if (bmasks == gather_masks) { - nfine_gather = std::distance(pid.begin(), sep2); + nfine_gather = iteratorDistance(pid.begin(), sep2); } else { - nfine_current = std::distance(pid.begin(), sep2); + nfine_current = iteratorDistance(pid.begin(), sep2); } } } @@ -103,39 +112,38 @@ PhysicalParticleContainer::PartitionParticlesInBuffers( nfine_gather = 0; } + // Reorder the actual particle array, using the `pid` indices if (nfine_current != np || nfine_gather != np) { - + // Temporary array for particle AoS ParticleVector particle_tmp; particle_tmp.resize(np); - for (long ip = 0; ip < np; ++ip) { - particle_tmp[ip] = aos[pid[ip]]; - } + // Copy particle AoS + auto& aos = pti.GetArrayOfStructs(); + amrex::ParallelFor( np, + copyAndReorder<ParticleType>( aos(), particle_tmp, pid ) ); std::swap(aos(), particle_tmp); + // Temporary array for particle individual attributes RealVector tmp; tmp.resize(np); - for (long ip = 0; ip < np; ++ip) { - tmp[ip] = wp[pid[ip]]; - } + // Copy individual attributes + amrex::ParallelFor( np, copyAndReorder<Real>( wp, tmp, pid ) ); std::swap(wp, tmp); - - for (long ip = 0; ip < np; ++ip) { - tmp[ip] = uxp[pid[ip]]; - } + amrex::ParallelFor( np, copyAndReorder<Real>( uxp, tmp, pid ) ); std::swap(uxp, tmp); - - for (long ip = 0; ip < np; ++ip) { - tmp[ip] = uyp[pid[ip]]; - } + amrex::ParallelFor( np, copyAndReorder<Real>( uyp, tmp, pid ) ); std::swap(uyp, tmp); - - for (long ip = 0; ip < np; ++ip) { - tmp[ip] = uzp[pid[ip]]; - } + amrex::ParallelFor( np, copyAndReorder<Real>( uzp, tmp, pid ) ); std::swap(uzp, tmp); - } + // Make sure that the temporary arrays are not destroyed before + // the GPU kernels finish running + Gpu::streamSynchronize(); + } + // Make sure that the temporary arrays are not destroyed before + // the GPU kernels finish running + Gpu::streamSynchronize(); } diff --git a/Source/Particles/Sorting/SortingUtils.H b/Source/Particles/Sorting/SortingUtils.H new file mode 100644 index 000000000..28a1b73b7 --- /dev/null +++ b/Source/Particles/Sorting/SortingUtils.H @@ -0,0 +1,222 @@ +#ifndef WARPX_PARTICLES_SORTING_SORTINGUTILS_H_ +#define WARPX_PARTICLES_SORTING_SORTINGUTILS_H_ + +#include <WarpXParticleContainer.H> +#include <AMReX_CudaContainers.H> +#include <AMReX_Gpu.H> +#ifdef AMREX_USE_GPU + #include <thrust/partition.h> + #include <thrust/distance.h> +#endif + +/* \brief Fill the elements of the input vector with consecutive integer, + * starting from 0 + * + * \param[inout] v Vector of integers, to be filled by this routine + */ +void fillWithConsecutiveIntegers( amrex::Gpu::DeviceVector<long>& v ) +{ +#ifdef AMREX_USE_GPU + // On GPU: Use thrust + thrust::sequence( v.begin(), v.end() ); +#else + // On CPU: Use std library + std::iota( v.begin(), v.end(), 0L ); +#endif +} + +/* \brief Find the indices that would reorder the elements of `predicate` + * so that the elements with non-zero value precede the other elements + * + * \param[in, out] index_begin Point to the beginning of the vector which is + * to be filled with these indices + * \param[in, out] index_begin Point to the end of the vector which is + * to be filled with these indices + * \param[in] Vector that indicates the elements that need to be reordered first + */ +template< typename ForwardIterator > +ForwardIterator stablePartition(ForwardIterator const index_begin, + ForwardIterator const index_end, + amrex::Gpu::DeviceVector<int> const& predicate) +{ +#ifdef AMREX_USE_GPU + // On GPU: Use thrust + int const* AMREX_RESTRICT predicate_ptr = predicate.dataPtr(); + ForwardIterator const sep = thrust::stable_partition( + thrust::cuda::par(amrex::Cuda::The_ThrustCachedAllocator()), + index_begin, index_end, + [predicate_ptr] AMREX_GPU_DEVICE (long i) { return predicate_ptr[i]; } + ); +#else + // On CPU: Use std library + ForwardIterator const sep = std::stable_partition( + index_begin, index_end, + [&predicate](long i) { return predicate[i]; } + ); +#endif + return sep; +} + +/* \brief Return the number of elements between `first` and `last` + * + * \param[in] fist Points to a position in a vector + * \param[in] last Points to another position in a vector + * \return The number of elements between `first` and `last` + */ +template< typename ForwardIterator > +int iteratorDistance(ForwardIterator const first, + ForwardIterator const last) +{ +#ifdef AMREX_USE_GPU + // On GPU: Use thrust + return thrust::distance( first, last ); +#else + return std::distance( first, last ); +#endif +} + +/* \brief Functor that fills the elements of the particle array `inexflag` + * with the value of the spatial array `bmasks`, at the corresponding particle position. + * + * \param[in] pti Contains information on the particle positions + * \param[in] bmasks Spatial array, that contains a flag indicating + * whether each cell is part of the gathering/deposition buffers + * \param[out] inexflag Vector to be filled with the value of `bmasks` + * \param[in] geom Geometry object, necessary to locate particles within the array `bmasks` + * + */ +class fillBufferFlag +{ + public: + fillBufferFlag( WarpXParIter const& pti, amrex::iMultiFab const* bmasks, + amrex::Gpu::DeviceVector<int>& inexflag, + amrex::Geometry const& geom ) { + + // Extract simple structure that can be used directly on the GPU + m_particles = &(pti.GetArrayOfStructs()[0]); + m_buffer_mask = (*bmasks)[pti].array(); + m_inexflag_ptr = inexflag.dataPtr(); + m_domain = geom.Domain(); + for (int idim=0; idim<AMREX_SPACEDIM; idim++) { + m_prob_lo[idim] = geom.ProbLo(idim); + m_inv_cell_size[idim] = geom.InvCellSize(idim); + } + }; + + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void operator()( const long i ) const { + // Select a particle + auto const& p = m_particles[i]; + // Find the index of the cell where this particle is located + amrex::IntVect const iv = amrex::getParticleCell( p, + m_prob_lo, m_inv_cell_size, m_domain ); + // Find the value of the buffer flag in this cell and + // store it at the corresponding particle position in the array `inexflag` + m_inexflag_ptr[i] = m_buffer_mask(iv); + }; + + private: + amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> m_prob_lo; + amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> m_inv_cell_size; + amrex::Box m_domain; + int* m_inexflag_ptr; + WarpXParticleContainer::ParticleType const* m_particles; + amrex::Array4<int const> m_buffer_mask; +}; + +/* \brief Functor that fills the elements of the particle array `inexflag` + * with the value of the spatial array `bmasks`, at the corresponding particle position. + * + * Contrary to `fillBufferFlag`, here this is done only for the particles that + * the last elements of `particle_indices` point to (from the element at + * index `start_index` in `particle_indices`, to the last element of `particle_indices`) + * + * \param[in] pti Contains information on the particle positions + * \param[in] bmasks Spatial array, that contains a flag indicating + * whether each cell is part of the gathering/deposition buffers + * \param[out] inexflag Vector to be filled with the value of `bmasks` + * \param[in] geom Geometry object, necessary to locate particles within the array `bmasks` + * \param[in] start_index Index that which elements start to be modified + */ +class fillBufferFlagRemainingParticles +{ + public: + fillBufferFlagRemainingParticles( + WarpXParIter const& pti, + amrex::iMultiFab const* bmasks, + amrex::Gpu::DeviceVector<int>& inexflag, + amrex::Geometry const& geom, + amrex::Gpu::DeviceVector<long> const& particle_indices, + long const start_index ) : + m_start_index(start_index) { + + // Extract simple structure that can be used directly on the GPU + m_particles = &(pti.GetArrayOfStructs()[0]); + m_buffer_mask = (*bmasks)[pti].array(); + m_inexflag_ptr = inexflag.dataPtr(); + m_indices_ptr = particle_indices.dataPtr(); + m_domain = geom.Domain(); + for (int idim=0; idim<AMREX_SPACEDIM; idim++) { + m_prob_lo[idim] = geom.ProbLo(idim); + m_inv_cell_size[idim] = geom.InvCellSize(idim); + } + }; + + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void operator()( const long i ) const { + // Select a particle + auto const& p = m_particles[m_indices_ptr[i+m_start_index]]; + // Find the index of the cell where this particle is located + amrex::IntVect const iv = amrex::getParticleCell( p, + m_prob_lo, m_inv_cell_size, m_domain ); + // Find the value of the buffer flag in this cell and + // store it at the corresponding particle position in the array `inexflag` + m_inexflag_ptr[m_indices_ptr[i+m_start_index]] = m_buffer_mask(iv); + }; + + private: + amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> m_prob_lo; + amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> m_inv_cell_size; + amrex::Box m_domain; + int* m_inexflag_ptr; + WarpXParticleContainer::ParticleType const* m_particles; + amrex::Array4<int const> m_buffer_mask; + long const m_start_index; + long const* m_indices_ptr; +}; + +/* \brief Functor that copies the elements of `src` into `dst`, + * while reordering them according to `indices` + * + * \param[in] src Source vector + * \param[out] dst Destination vector + * \param[in] indices Array of indices that indicate how to reorder elements + */ +template <typename T> +class copyAndReorder +{ + public: + copyAndReorder( + amrex::Gpu::ManagedDeviceVector<T> const& src, + amrex::Gpu::ManagedDeviceVector<T>& dst, + amrex::Gpu::DeviceVector<long> const& indices ) { + // Extract simple structure that can be used directly on the GPU + m_src_ptr = src.dataPtr(); + m_dst_ptr = dst.dataPtr(); + m_indices_ptr = indices.dataPtr(); + }; + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + void operator()( const long ip ) const { + m_dst_ptr[ip] = m_src_ptr[ m_indices_ptr[ip] ]; + }; + + private: + T const* m_src_ptr; + T* m_dst_ptr; + long const* m_indices_ptr; +}; + +#endif // WARPX_PARTICLES_SORTING_SORTINGUTILS_H_ |