1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
|
/* Copyright 2020 Luca Fedeli, Neil Zaim
*
* This file is part of WarpX.
*
* License: BSD-3-Clause-LBNL
*/
#ifndef FILTER_CREATE_TRANSFORM_FROM_FAB_H_
#define FILTER_CREATE_TRANSFORM_FROM_FAB_H_
#include "WarpX.H"
#include <AMReX_REAL.H>
#include <AMReX_TypeTraits.H>
/**
* \brief Apply a filter on a list of FABs, then create and apply a transform
* operation to the particles depending on the output of the filter.
*
* This version of the function takes as inputs a mask and a FAB that can be
* used in the transform function, both of which can be obtained using another version
* of filterCreateTransformFromFAB that takes a filter function as input.
*
* \tparam N number of particles created in the dst(s) in each cell
* \tparam DstTile the dst particle tile type
* \tparam FAB the src FAB type
* \tparam Index the index type, e.g. unsigned int
* \tparam CreateFunc1 the create function type for dst1
* \tparam CreateFunc2 the create function type for dst2
* \tparam TransFunc the transform function type
*
* \param[in,out] dst1 the first destination tile
* \param[in,out] dst2 the second destination tile
* \param[in] box the box where the particles are created
* \param[in] src_FAB A FAB defined on box that is used in the transform function
* \param[in] mask pointer to the mask: 1 means create, 0 means don't create
* \param[in] dst1_index the location at which to starting writing the result to dst1
* \param[in] dst2_index the location at which to starting writing the result to dst2
* \param[in] create1 callable that defines what will be done for the create step for dst1.
* \param[in] create2 callable that defines what will be done for the create step for dst2.
* \param[in] transform callable that defines the transformation to apply on dst1 and dst2.
*
* \return num_added the number of particles that were written to dst1 and dst2.
*/
template <int N, typename DstTile, typename FAB, typename Index,
typename CreateFunc1, typename CreateFunc2, typename TransFunc,
amrex::EnableIf_t<std::is_integral<Index>::value, int> foo = 0>
Index filterCreateTransformFromFAB (DstTile& dst1, DstTile& dst2, const amrex::Box box,
const FAB *src_FAB, const Index* mask,
const Index dst1_index, const Index dst2_index,
CreateFunc1&& create1, CreateFunc2&& create2,
TransFunc&& transform) noexcept
{
using namespace amrex;
const auto ncells = box.volume();
if (ncells == 0) return 0;
auto & warpx = WarpX::GetInstance();
const int level_0 = 0;
Geometry const & geom = warpx.Geom(level_0);
constexpr int spacedim = AMREX_SPACEDIM;
#if defined(WARPX_DIM_1D_Z)
const Real zlo_global = geom.ProbLo(0);
const Real dz = geom.CellSize(0);
#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
const Real xlo_global = geom.ProbLo(0);
const Real dx = geom.CellSize(0);
const Real zlo_global = geom.ProbLo(1);
const Real dz = geom.CellSize(1);
#elif defined(WARPX_DIM_3D)
const Real xlo_global = geom.ProbLo(0);
const Real dx = geom.CellSize(0);
const Real ylo_global = geom.ProbLo(1);
const Real dy = geom.CellSize(1);
const Real zlo_global = geom.ProbLo(2);
const Real dz = geom.CellSize(2);
#endif
const auto arrNumPartCreation = src_FAB->array();
Gpu::DeviceVector<Index> offsets(ncells);
auto total = amrex::Scan::ExclusiveSum(ncells, mask, offsets.data());
const Index num_added = N*total;
dst1.resize(std::max(dst1_index + num_added, dst1.numParticles()));
dst2.resize(std::max(dst2_index + num_added, dst2.numParticles()));
auto p_offsets = offsets.dataPtr();
const auto dst1_data = dst1.getParticleTileData();
const auto dst2_data = dst2.getParticleTileData();
// For loop over all cells in the box. If mask is true in the given cell,
// we create the particles in the cell and apply a transform function to the
// created particles.
amrex::ParallelForRNG(ncells,
[=] AMREX_GPU_DEVICE (int i, amrex::RandomEngine const& engine) noexcept
{
if (mask[i])
{
const IntVect iv = box.atOffset(i);
const int j = iv[0];
const int k = (spacedim >= 2) ? iv[1] : 0;
const int l = (spacedim == 3) ? iv[2] : 0;
// Currently all particles are created on nodes. This makes it useless
// to use N>1 (for now).
#if defined(WARPX_DIM_1D_Z)
Real const x = 0.0;
Real const y = 0.0;
Real const z = zlo_global + j*dz;
#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
Real const x = xlo_global + j*dx;
Real const y = 0.0;
Real const z = zlo_global + k*dz;
#elif defined(WARPX_DIM_3D)
Real const x = xlo_global + j*dx;
Real const y = ylo_global + k*dy;
Real const z = zlo_global + l*dz;
#endif
for (int n = 0; n < N; ++n)
{
create1(dst1_data, N*p_offsets[i] + dst1_index + n, engine, x, y, z);
create2(dst2_data, N*p_offsets[i] + dst2_index + n, engine, x, y, z);
}
transform(dst1_data, dst2_data, N*p_offsets[i] + dst1_index,
N*p_offsets[i] + dst2_index, N, arrNumPartCreation(j,k,l));
}
});
Gpu::synchronize();
return num_added;
}
/**
* \brief Apply a filter on a list of FABs, then create and apply a transform
* operation to the particles depending on the output of the filter.
*
* This version of the function takes as input a filter functor (and an array of FABs that
* can be used in the filter functor), uses it to obtain a mask and a FAB and then calls another
* version of filterCreateTransformFromFAB that takes the mask and the FAB as inputs.
*
* \tparam N number of particles created in the dst(s) in each cell
* \tparam DstTile the dst particle tile type
* \tparam FABs the src array of Array4 type
* \tparam Index the index type, e.g. unsigned int
* \tparam FilterFunc the filter function type
* \tparam CreateFunc1 the create function type for dst1
* \tparam CreateFunc2 the create function type for dst2
* \tparam TransFunc the transform function type
*
* \param[in,out] dst1 the first destination tile
* \param[in,out] dst2 the second destination tile
* \param[in] box the box where the particles are created
* \param[in] src_FABs A collection of source data, e.g. a class with Array4 to the EM fields,
* defined on box on which the filter operation is applied
* \param[in] dst1_index the location at which to starting writing the result to dst1
* \param[in] dst2_index the location at which to starting writing the result to dst2
* \param[in] filter a callable returning a value > 0 if particles are to be created
* in the considered cell.
* \param[in] create1 callable that defines what will be done for the create step for dst1.
* \param[in] create2 callable that defines what will be done for the create step for dst2.
* \param[in] transform callable that defines the transformation to apply on dst1 and dst2.
*
* \return num_added the number of particles that were written to dst1 and dst2.
*/
template <int N, typename DstTile, typename FABs, typename Index,
typename FilterFunc, typename CreateFunc1, typename CreateFunc2,
typename TransFunc>
Index filterCreateTransformFromFAB (DstTile& dst1, DstTile& dst2, const amrex::Box box,
const FABs& src_FABs, const Index dst1_index,
const Index dst2_index, FilterFunc&& filter,
CreateFunc1&& create1, CreateFunc2&& create2,
TransFunc && transform) noexcept
{
using namespace amrex;
FArrayBox NumPartCreation(box, 1);
// This may be unnecessary because of the Gpu::streamSynchronize() in
// filterCreateTransformFromFAB called below, but let us keep it for safety.
const Elixir tmp_eli = NumPartCreation.elixir();
auto arrNumPartCreation = NumPartCreation.array();
const auto ncells = box.volume();
if (ncells == 0) return 0;
Gpu::DeviceVector<Index> mask(ncells);
auto p_mask = mask.dataPtr();
// for loop over all cells in the box. We apply the filter function to each cell
// and store the result in arrNumPartCreation. If the result is strictly greater than
// 0, the mask is set to true at the given cell position.
amrex::ParallelForRNG(box,
[=] AMREX_GPU_DEVICE (int i, int j, int k, amrex::RandomEngine const& engine){
arrNumPartCreation(i,j,k) = filter(src_FABs,i,j,k,engine);
const IntVect iv(AMREX_D_DECL(i,j,k));
const auto mask_position = box.index(iv);
p_mask[mask_position] = (arrNumPartCreation(i,j,k) > 0);
});
return filterCreateTransformFromFAB<N>(dst1, dst2, box, &NumPartCreation,
mask.dataPtr(), dst1_index, dst2_index,
std::forward<CreateFunc1>(create1),
std::forward<CreateFunc2>(create2),
std::forward<TransFunc>(transform));
}
#endif // FILTER_CREATE_TRANSFORM_FROM_FAB_H_
|