Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316

/* Copyright 2019-2020 Andrew Myers, Maxence Thevenet, Remi Lehe
 * Revathi Jambunathan
 *
 * This file is part of WarpX.
 *
 * License: BSD-3-Clause-LBNL
 */
#include "SpectralKSpace.H"

#include "Utils/WarpXConst.H"

#include <AMReX_BLassert.H>
#include <AMReX_Box.H>
#include <AMReX_BoxList.H>
#include <AMReX_GpuComplex.H>
#include <AMReX_GpuDevice.H>
#include <AMReX_GpuLaunch.H>
#include <AMReX_GpuQualifiers.H>
#include <AMReX_IndexType.H>
#include <AMReX_IntVect.H>
#include <AMReX_MFIter.H>

#include <array>
#include <cmath>
#include <vector>

using namespace amrex;

/* \brief Initialize k space object.
 *
 * \param realspace_ba Box array that corresponds to the decomposition
 * of the fields in real space (cell-centered ; includes guard cells)
 * \param dm Indicates which MPI proc owns which box, in realspace_ba.
 * \param realspace_dx Cell size of the grid in real space
 */
SpectralKSpace::SpectralKSpace( const BoxArray& realspace_ba,
                                const DistributionMapping& dm,
                                const RealVect realspace_dx )
    : dx(realspace_dx)  // Store the cell size as member `dx`
{
    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
        realspace_ba.ixType()==IndexType::TheCellType(),
        "SpectralKSpace expects a cell-centered box.");

    // Create the box array that corresponds to spectral space
    BoxList spectral_bl; // Create empty box list
    // Loop over boxes and fill the box list
    for (int i=0; i < realspace_ba.size(); i++ ) {
        // For local FFTs, boxes in spectral space start at 0 in
        // each direction and have the same number of points as the
        // (cell-centered) real space box
        Box realspace_bx = realspace_ba[i];
        IntVect fft_size = realspace_bx.length();
        // Because the spectral solver uses real-to-complex FFTs, we only
        // need the positive k values along the fastest axis
        // (first axis for AMReX Fortran-order arrays) in spectral space.
        // This effectively reduces the size of the spectral space by half
        // see e.g. the FFTW documentation for real-to-complex FFTs
        IntVect spectral_bx_size = fft_size;
        spectral_bx_size[0] = fft_size[0]/2 + 1;
        // Define the corresponding box
        Box spectral_bx = Box( IntVect::TheZeroVector(),
                               spectral_bx_size - IntVect::TheUnitVector() );
        spectral_bl.push_back( spectral_bx );
    }
    spectralspace_ba.define( spectral_bl );

    // Allocate the components of the k vector: kx, ky (only in 3D), kz
    bool only_positive_k;
    for (int i_dim=0; i_dim<AMREX_SPACEDIM; i_dim++) {
        if (i_dim==0) {
            // Real-to-complex FFTs: first axis contains only the positive k
            only_positive_k = true;
        } else {
            only_positive_k = false;
        }
        k_vec[i_dim] = getKComponent(dm, realspace_ba, i_dim, only_positive_k);
    }
}

/* For each box, in `spectralspace_ba`, which is owned by the local MPI rank
 * (as indicated by the argument `dm`), compute the values of the
 * corresponding k coordinate along the dimension specified by `i_dim`
 */
KVectorComponent
SpectralKSpace::getKComponent( const DistributionMapping& dm,
                               const BoxArray& realspace_ba,
                               const int i_dim,
                               const bool only_positive_k ) const
{
    // Initialize an empty DeviceVector in each box
    KVectorComponent k_comp(spectralspace_ba, dm);
    // Loop over boxes and allocate the corresponding DeviceVector
    // for each box owned by the local MPI proc
    for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
        Box bx = spectralspace_ba[mfi];
        Gpu::DeviceVector<Real>& k = k_comp[mfi];

        // Allocate k to the right size
        int N = bx.length( i_dim );
        k.resize( N );
        Real* pk = k.data();

        // Fill the k vector
        IntVect fft_size = realspace_ba[mfi].length();
        const Real dk = 2*MathConst::pi/(fft_size[i_dim]*dx[i_dim]);
        AMREX_ALWAYS_ASSERT_WITH_MESSAGE( bx.smallEnd(i_dim) == 0,
            "Expected box to start at 0, in spectral space.");
        AMREX_ALWAYS_ASSERT_WITH_MESSAGE( bx.bigEnd(i_dim) == N-1,
            "Expected different box end index in spectral space.");
        if (only_positive_k){
            // Fill the full axis with positive k values
            // (typically: first axis, in a real-to-complex FFT)
            amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
            {
                pk[i] = i*dk;
            });
        } else {
            const int mid_point = (N+1)/2;
            amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
            {
                if (i < mid_point) {
                    // Fill positive values of k
                    // (FFT conventions: first half is positive)
                    pk[i] = i*dk;
                } else {
                    // Fill negative values of k
                    // (FFT conventions: second half is negative)
                    pk[i] = (i-N)*dk;
                }
            });
        }
    }
    return k_comp;
}

/* For each box, in `spectralspace_ba`, which is owned by the local MPI rank
 * (as indicated by the argument `dm`), compute the values of the
 * corresponding correcting "shift" factor, along the dimension
 * specified by `i_dim`.
 *
 * (By default, we assume the FFT is done from/to a nodal grid in real space
 * It the FFT is performed from/to a cell-centered grid in real space,
 * a correcting "shift" factor must be applied in spectral space.)
 */
SpectralShiftFactor
SpectralKSpace::getSpectralShiftFactor( const DistributionMapping& dm,
                                        const int i_dim,
                                        const int shift_type ) const
{
    // Initialize an empty DeviceVector in each box
    SpectralShiftFactor shift_factor( spectralspace_ba, dm );
   // Loop over boxes and allocate the corresponding DeviceVector
    // for each box owned by the local MPI proc
    for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
        const Gpu::DeviceVector<Real>& k = k_vec[i_dim][mfi];
        Gpu::DeviceVector<Complex>& shift = shift_factor[mfi];

        // Allocate shift coefficients
        const int N = k.size();
        shift.resize(N);
        Real const* pk = k.data();
        Complex* pshift = shift.data();

        // Fill the shift coefficients
        Real sign = 0;
        switch (shift_type){
            case ShiftType::TransformFromCellCentered: sign = -1.; break;
            case ShiftType::TransformToCellCentered: sign = 1.;
        }
        const Complex I{0,1};
        const auto t_dx_idim = dx[i_dim];
        amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
        {
            pshift[i] = amrex::exp( I*sign*pk[i]*0.5_rt*t_dx_idim);
        });
    }
    return shift_factor;
}

/* \brief For each box, in `spectralspace_ba`, which is owned by the local MPI
 * rank (as indicated by the argument `dm`), compute the values of the
 * corresponding finite-order modified k vector, along the
 * dimension specified by `i_dim`
 *
 * The finite-order modified k vector is the spectral-space representation
 * of a finite-order stencil in real space.
 *
 * \param n_order Order of accuracy of the stencil, in discretizing
 *                a spatial derivative
 * \param nodal Whether the stencil is to be applied to a nodal or
                staggered set of fields
 */
KVectorComponent
SpectralKSpace::getModifiedKComponent( const DistributionMapping& dm,
                                       const int i_dim,
                                       const int n_order,
                                       const bool nodal ) const
{
    // Initialize an empty DeviceVector in each box
    KVectorComponent modified_k_comp(spectralspace_ba, dm);

    if (n_order == -1) { // Infinite-order case
        for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
            const Gpu::DeviceVector<Real>& k = k_vec[i_dim][mfi];
            Gpu::DeviceVector<Real>& modified_k = modified_k_comp[mfi];

            // Allocate modified_k to the same size as k
            const int N = k.size();
            modified_k.resize(N);

            // Fill the modified k vector
            Gpu::copyAsync(Gpu::deviceToDevice, k.begin(), k.end(), modified_k.begin());
        }
    } else {

        // Compute real-space stencil coefficients
        Vector<Real> h_stencil_coef = getFornbergStencilCoefficients(n_order, nodal);
        Gpu::DeviceVector<Real> d_stencil_coef(h_stencil_coef.size());
        Gpu::copyAsync(Gpu::hostToDevice, h_stencil_coef.begin(), h_stencil_coef.end(),
                       d_stencil_coef.begin());
        Gpu::synchronize();
        const int nstencil = d_stencil_coef.size();
        Real const* p_stencil_coef = d_stencil_coef.data();

        // Loop over boxes and allocate the corresponding DeviceVector
        // for each box owned by the local MPI proc
        for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
            Real delta_x = dx[i_dim];
            const Gpu::DeviceVector<Real>& k = k_vec[i_dim][mfi];
            Gpu::DeviceVector<Real>& modified_k = modified_k_comp[mfi];

            // Allocate modified_k to the same size as k
            const int N = k.size();
            modified_k.resize(N);
            Real const* p_k = k.data();
            Real * p_modified_k = modified_k.data();

            // Fill the modified k vector
            amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
            {
                p_modified_k[i] = 0;
                for (int n=0; n<nstencil; n++){
                    if (nodal){
                        p_modified_k[i] += p_stencil_coef[n]*
                            std::sin( p_k[i]*(n+1)*delta_x )/( (n+1)*delta_x );
                    } else {
                        p_modified_k[i] += p_stencil_coef[n]* \
                            std::sin( p_k[i]*(n+0.5)*delta_x )/( (n+0.5)*delta_x );
                    }
                }

                // By construction, at finite order and for a nodal grid,
                // the *modified* k corresponding to the Nyquist frequency
                // (i.e. highest *real* k) is 0. However, the above calculation
                // based on stencil coefficients does not give 0 to machine precision.
                // Therefore, we need to enforce the fact that the modified k be 0 here.
                if (nodal){
                    if (i_dim == 0){
                        // Because of the real-to-complex FFTs, the first axis (idim=0)
                        // contains only the positive k, and the Nyquist frequency is
                        // the last element of the array.
                        if (i == N-1) {
                            p_modified_k[i] = 0.0_rt;
                        }
                    } else {
                        // The other axes contains both positive and negative k ;
                        // the Nyquist frequency is in the middle of the array.
                        if (i == N/2) {
                            p_modified_k[i] = 0.0_rt;
                        }
                    }
                }
            });
        }
    }
    return modified_k_comp;
}

Vector<Real>
getFornbergStencilCoefficients(const int n_order, const bool nodal)
{
    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(n_order % 2 == 0, "n_order must be even");

    const int m = n_order / 2;
    Vector<Real> coefs;
    coefs.resize(m);

    // There are closed-form formula for these coefficients, but they result in
    // an overflow when evaluated numerically. One way to avoid the overflow is
    // to calculate the coefficients by recurrence.

    // Coefficients for nodal (that is, centered) finite-difference approximation
    if (nodal == true) {
       // First coefficient
       coefs[0] = m * 2. / (m+1);
       // Other coefficients by recurrence
       for (int n = 1; n < m; n++) {
           coefs[n] = - (m-n) * 1. / (m+n+1) * coefs[n-1];
       }
    }
    // Coefficients for staggered finite-difference approximation
    else {
       Real prod = 1.;
       for (int k = 1; k < m+1; k++) {
           prod *= (m + k) / (4. * k);
       }
       // First coefficient
       coefs[0] = 4 * m * prod * prod;
       // Other coefficients by recurrence
       for (int n = 1; n < m; n++) {
           coefs[n] = - ((2*n-1) * (m-n)) * 1. / ((2*n+1) * (m+n)) * coefs[n-1];
       }
    }
    return coefs;
}