1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
#ifndef WARPX_PARTICLES_SORTING_SORTINGUTILS_H_
#define WARPX_PARTICLES_SORTING_SORTINGUTILS_H_
#include <WarpXParticleContainer.H>
#include <AMReX_CudaContainers.H>
#include <AMReX_Gpu.H>
// TODO: Add documentation
void fillWithConsecutiveIntegers( amrex::Gpu::ManagedDeviceVector<long>& v )
{
#ifdef AMREX_USE_GPU
// On GPU: Use thrust
thrust::sequence( v.begin(), v.end() );
#else
// On CPU: Use std library
std::iota( v.begin(), v.end(), 0L );
#endif
}
// TODO: Add documentation
template< typename ForwardIterator >
ForwardIterator stablePartition(ForwardIterator index_begin,
ForwardIterator index_end,
amrex::Gpu::ManagedDeviceVector<int>& predicate)
{
#ifdef AMREX_USE_GPU
// On GPU: Use thrust
int* AMREX_RESTRICT predicate_ptr = predicate.dataPtr();
ForwardIterator sep = thrust::stable_partition(
thrust::cuda::par(amrex::Cuda::The_ThrustCachedAllocator()),
index_begin, index_end,
[predicate_ptr] AMREX_GPU_DEVICE (long i) { return predicate_ptr[i]; }
);
#else
// On CPU: Use std library
ForwardIterator sep = std::stable_partition(
index_begin, index_end,
[&predicate](long i) { return predicate[i]; }
);
#endif
return sep;
}
// TODO: Add documentation
template< typename ForwardIterator >
int iteratorDistance(ForwardIterator first,
ForwardIterator last)
{
#ifdef AMREX_USE_GPU
// On GPU: Use thrust
return thrust::distance( first, last );
#else
return std::distance( first, last );
#endif
}
// TODO: Add documentation
class fillBufferFlag
{
public:
fillBufferFlag( WarpXParIter& pti, const amrex::iMultiFab* bmasks,
amrex::Gpu::ManagedDeviceVector<int>& inexflag,
const amrex::Geometry& geom, long start_index=0 ) {
// Extract simple structure that can be used directly on the GPU
m_particles = &(pti.GetArrayOfStructs()[0]);
m_buffer_mask = (*bmasks)[pti].array();
m_inexflag_ptr = inexflag.dataPtr();
m_domain_small_end = geom.Domain().smallEnd();
for (int idim=0; idim<AMREX_SPACEDIM; idim++) {
m_prob_lo[idim] = geom.ProbLo(idim);
m_inv_cell_size[idim] = geom.InvCellSize(idim);
}
m_start_index = start_index;
};
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void operator()( const long i ) const {
// Select a particle
auto& p = m_particles[i+m_start_index];
// Find the index of the cell where this particle is located
amrex::IntVect iv;
for (int idim=0; idim<AMREX_SPACEDIM; idim++) {
iv[idim] = static_cast<int>(floor(
(p.m_rdata.pos[idim]-m_prob_lo[idim])*m_inv_cell_size[idim]
));
}
// Find the value of the buffer flag in this cell and
// store it at the corresponding particle position in the array `inexflag`
m_inexflag_ptr[i+m_start_index] = m_buffer_mask(iv);
};
private:
amrex::Real m_prob_lo[AMREX_SPACEDIM];
amrex::Real m_inv_cell_size[AMREX_SPACEDIM];
amrex::IntVect m_domain_small_end;
int* m_inexflag_ptr;
WarpXParticleContainer::ParticleType* m_particles;
amrex::Array4<const int> m_buffer_mask;
long m_start_index;
};
// TODO: Add documentation
template <typename T>
class copyAndReorder
{
public:
copyAndReorder(
amrex::Gpu::ManagedDeviceVector<T>& src,
amrex::Gpu::ManagedDeviceVector<T>& dst,
amrex::Gpu::ManagedDeviceVector<long>& indices ) {
// Extract simple structure that can be used directly on the GPU
m_src_ptr = src.dataPtr();
m_dst_ptr = dst.dataPtr();
m_indices_ptr = indices.dataPtr();
};
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void operator()( const long ip ) const {
m_dst_ptr[ip] = m_src_ptr[ m_indices_ptr[ip] ];
};
private:
T* m_src_ptr;
T* m_dst_ptr;
long* m_indices_ptr;
};
#endif // WARPX_PARTICLES_SORTING_SORTINGUTILS_H_
|