aboutsummaryrefslogtreecommitdiff
path: root/Source/Particles
diff options
context:
space:
mode:
authorGravatar Remi Lehe <remi.lehe@normalesup.org> 2019-09-26 10:35:48 -0700
committerGravatar Remi Lehe <remi.lehe@normalesup.org> 2019-10-01 16:32:38 -0700
commit2b08e3082ba46361b943b7ce67f8d21eb835ece9 (patch)
tree61928d4c83389e652732bb804e87aa90e29a2757 /Source/Particles
parenta95c8f5445b7e8b1fc8808ea5bc30ba3c3d02351 (diff)
downloadWarpX-2b08e3082ba46361b943b7ce67f8d21eb835ece9.tar.gz
WarpX-2b08e3082ba46361b943b7ce67f8d21eb835ece9.tar.zst
WarpX-2b08e3082ba46361b943b7ce67f8d21eb835ece9.zip
Implement particle copy
Diffstat (limited to 'Source/Particles')
-rw-r--r--Source/Particles/Sorting/Partition.cpp47
-rw-r--r--Source/Particles/Sorting/SortingUtils.H41
2 files changed, 57 insertions, 31 deletions
diff --git a/Source/Particles/Sorting/Partition.cpp b/Source/Particles/Sorting/Partition.cpp
index 138f28105..b4f96476e 100644
--- a/Source/Particles/Sorting/Partition.cpp
+++ b/Source/Particles/Sorting/Partition.cpp
@@ -42,8 +42,6 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
{
BL_PROFILE("PPC::Evolve::partition");
- auto& aos = pti.GetArrayOfStructs();
-
// Initialize temporary arrays
Gpu::ManagedDeviceVector<int> inexflag;
inexflag.resize(np);
@@ -67,10 +65,12 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
// reorder the particles, and `sep` is the position in the array that
// separates the particles that deposit/gather on the fine patch (first part)
// and the particles that deposit/gather in the buffers (last part)
- long n_fine = iteratorDistance(pid.begin(), sep); // Number of particles on fine patch
+ long n_fine = iteratorDistance(pid.begin(), sep);
+ // Number of particles on fine patch, i.e. outside of the larger buffer
// Second, among particles that are in the larger buffer, partition
// particles into the smaller buffer
+
if (WarpX::n_current_deposition_buffer == WarpX::n_field_gather_buffer) {
// No need to do anything if the buffers have the same size
nfine_current = nfine_gather = iteratorDistance(pid.begin(), sep);
@@ -95,6 +95,7 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
amrex::ParallelFor( np - n_fine,
fillBufferFlag(pti, bmasks, inexflag, Geom(lev), n_fine) );
auto sep2 = stablePartition( sep, pid.end(), inexflag );
+
if (bmasks == gather_masks) {
nfine_gather = iteratorDistance(pid.begin(), sep2);
} else {
@@ -111,39 +112,39 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
nfine_gather = 0;
}
+ // Reorder the actual particle array, using the `pid` indices
if (nfine_current != np || nfine_gather != np)
{
+ // Extract simple pointer
+ long* AMREX_RESTRICT pid_ptr = pid.dataPtr();
+ // Temporary array for particle AoS
ParticleVector particle_tmp;
particle_tmp.resize(np);
-
- for (long ip = 0; ip < np; ++ip) {
- particle_tmp[ip] = aos[pid[ip]];
- }
+ ParticleType* AMREX_RESTRICT particle_tmp_ptr = particle_tmp.dataPtr();
+
+ // Copy the particle AoS
+ auto& aos = pti.GetArrayOfStructs();
+ ParticleType* AMREX_RESTRICT aos_ptr = &(aos()[0]);
+ amrex::ParallelFor( np,
+ [=] AMREX_GPU_DEVICE (long ip) {
+ particle_tmp_ptr[ip] = aos_ptr[pid_ptr[ip]];
+ }
+ );
std::swap(aos(), particle_tmp);
+ // Temporary array for particle individual attributes
RealVector tmp;
tmp.resize(np);
- for (long ip = 0; ip < np; ++ip) {
- tmp[ip] = wp[pid[ip]];
- }
+ // Copy individual attributes
+ amrex::ParallelFor( np, copyAndReorder<Real>( wp, tmp, pid ) );
std::swap(wp, tmp);
-
- for (long ip = 0; ip < np; ++ip) {
- tmp[ip] = uxp[pid[ip]];
- }
+ amrex::ParallelFor( np, copyAndReorder<Real>( uxp, tmp, pid ) );
std::swap(uxp, tmp);
-
- for (long ip = 0; ip < np; ++ip) {
- tmp[ip] = uyp[pid[ip]];
- }
+ amrex::ParallelFor( np, copyAndReorder<Real>( uyp, tmp, pid ) );
std::swap(uyp, tmp);
-
- for (long ip = 0; ip < np; ++ip) {
- tmp[ip] = uzp[pid[ip]];
- }
+ amrex::ParallelFor( np, copyAndReorder<Real>( uzp, tmp, pid ) );
std::swap(uzp, tmp);
}
-
}
diff --git a/Source/Particles/Sorting/SortingUtils.H b/Source/Particles/Sorting/SortingUtils.H
index a17fe85db..80fc4aab6 100644
--- a/Source/Particles/Sorting/SortingUtils.H
+++ b/Source/Particles/Sorting/SortingUtils.H
@@ -27,17 +27,16 @@ ForwardIterator stablePartition(ForwardIterator index_begin,
// On GPU: Use thrust
int* AMREX_RESTRICT predicate_ptr = predicate.dataPtr();
ForwardIterator sep = thrust::stable_partition(
- thrust::cuda::par(Cuda::The_ThrustCachedAllocator()),
- index_begin, index_end,
- AMREX_GPU_HOST_DEVICE
- [predicate_ptr](long i) { return predicate_ptr[i]; }
- );
+ thrust::cuda::par(Cuda::The_ThrustCachedAllocator()),
+ index_begin, index_end,
+ [predicate_ptr] AMREX_GPU_DEVICE (long i) { return predicate_ptr[i]; }
+ );
#else
// On CPU: Use std library
ForwardIterator sep = std::stable_partition(
- index_begin, index_end,
- [&predicate](long i) { return predicate[i]; }
- );
+ index_begin, index_end,
+ [&predicate](long i) { return predicate[i]; }
+ );
#endif
return sep;
}
@@ -102,4 +101,30 @@ class fillBufferFlag
long m_start_index;
};
+// TODO: Add documentation
+template <typename T>
+class copyAndReorder
+{
+ public:
+ copyAndReorder(
+ amrex::Gpu::ManagedDeviceVector<T>& src,
+ amrex::Gpu::ManagedDeviceVector<T>& dst,
+ amrex::Gpu::ManagedDeviceVector<long>& indices ) {
+ // Extract simple structure that can be used directly on the GPU
+ m_src_ptr = src.dataPtr();
+ m_dst_ptr = dst.dataPtr();
+ m_indices_ptr = indices.dataPtr();
+ };
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ void operator()( const long ip ) const {
+ m_dst_ptr[ip] = m_src_ptr[ m_indices_ptr[ip] ];
+ };
+
+ private:
+ T* m_src_ptr;
+ T* m_dst_ptr;
+ long* m_indices_ptr;
+};
+
#endif // WARPX_PARTICLES_SORTING_SORTINGUTILS_H_