aboutsummaryrefslogtreecommitdiff
path: root/Source/FieldSolver/SpectralSolver
diff options
context:
space:
mode:
authorGravatar WeiqunZhang <WeiqunZhang@lbl.gov> 2020-09-01 14:11:35 -0700
committerGravatar GitHub <noreply@github.com> 2020-09-01 14:11:35 -0700
commit3c2eadfde21745eae87c7d13ebaacb1732a8619d (patch)
tree730bc2878800058a66fa308a67e4ca53c0715e71 /Source/FieldSolver/SpectralSolver
parent1652cfa796c9f258832254906ef4148a32768c2b (diff)
downloadWarpX-3c2eadfde21745eae87c7d13ebaacb1732a8619d.tar.gz
WarpX-3c2eadfde21745eae87c7d13ebaacb1732a8619d.tar.zst
WarpX-3c2eadfde21745eae87c7d13ebaacb1732a8619d.zip
Remove ManagedVector from spectral solver (#1270)
Diffstat (limited to 'Source/FieldSolver/SpectralSolver')
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.H4
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.cpp18
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H2
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.H6
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp51
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralHankelTransform/SpectralHankelTransformer.cpp1
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralKSpace.H6
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp147
8 files changed, 131 insertions, 104 deletions
diff --git a/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.H b/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.H
index 9d3ab70f5..e15cc38e8 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.H
@@ -19,9 +19,9 @@ class SpectralBinomialFilter
{
public:
- // `KFilterArray` holds a one 1D array ("ManagedVector") that
+ // `KFilterArray` holds a one 1D array ("DeviceVector") that
// implements the filter.
- using KFilterArray = amrex::Gpu::ManagedVector<amrex::Real>;
+ using KFilterArray = amrex::Gpu::DeviceVector<amrex::Real>;
SpectralBinomialFilter () {};
void InitFilterArray (RealKVector const & kvec,
diff --git a/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.cpp b/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.cpp
index c4ecd0969..e0ac3e886 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.cpp
@@ -20,19 +20,21 @@ SpectralBinomialFilter::InitFilterArray (HankelTransform::RealVector const & kve
bool const compensation,
KFilterArray & filter)
{
-
- filter.resize(kvec.size());
-
- for (int i=0 ; i < kvec.size() ; i++) {
- amrex::Real const ss = std::sin(0.5_rt*kvec[i]*dels);
+ const int N = kvec.size();
+ filter.resize(N);
+ amrex::Real* p_filter = filter.data();
+ amrex::Real const* p_kvec = kvec.data();
+
+ amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
+ {
+ amrex::Real const ss = std::sin(0.5_rt*p_kvec[i]*dels);
amrex::Real const ss2 = ss*ss;
amrex::Real filt = std::pow(1._rt - ss2, npasses);
if (compensation) {
filt *= (1._rt + npasses*ss2);
}
- filter[i] = filt;
- }
-
+ p_filter[i] = filt;
+ });
}
/* \brief Initialize the radial and longitudinal filter arrays */
diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H
index c7a816c61..8f569ec4e 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H
@@ -51,7 +51,7 @@ void SecantRootFinder(int n, int nitmx, amrex::Real tol, amrex::Real *zeroj, int
* abramowitz m. & stegun irene a.
* handbook of mathematical functions
*/
-void GetBesselRoots(int n, int nk, HankelTransform::RealVector& roots, amrex::Vector<int> &ier) {
+void GetBesselRoots(int n, int nk, amrex::Vector<amrex::Real>& roots, amrex::Vector<int> &ier) {
amrex::Real zeroj;
int ierror, ik, k;
diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.H b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.H
index 46fb28447..5a87031c4 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.H
@@ -21,7 +21,7 @@ class HankelTransform
{
public:
- using RealVector = amrex::Gpu::ManagedVector<amrex::Real>;
+ using RealVector = amrex::Gpu::DeviceVector<amrex::Real>;
// Constructor
HankelTransform(const int hankel_order,
@@ -43,8 +43,8 @@ class HankelTransform
RealVector m_kr;
- RealVector invM;
- RealVector M;
+ RealVector m_invM;
+ RealVector m_M;
};
#endif
diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp
index c5249d54f..24e9d7f81 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp
@@ -25,7 +25,7 @@ HankelTransform::HankelTransform (int const hankel_order,
AMREX_ALWAYS_ASSERT_WITH_MESSAGE(hankel_order-1 <= azimuthal_mode && azimuthal_mode <= hankel_order+1,
"azimuthal_mode must be either hankel_order-1, hankel_order or hankel_order+1");
- RealVector alphas;
+ amrex::Vector<amrex::Real> alphas;
amrex::Vector<int> alpha_errors;
GetBesselRoots(azimuthal_mode, m_nk, alphas, alpha_errors);
@@ -33,13 +33,13 @@ HankelTransform::HankelTransform (int const hankel_order,
AMREX_ALWAYS_ASSERT(std::all_of(alpha_errors.begin(), alpha_errors.end(), [](int i) { return i == 0; }));
// Calculate the spectral grid
- m_kr.resize(m_nk);
+ amrex::Vector<amrex::Real> kr(m_nk);
for (int ik=0 ; ik < m_nk ; ik++) {
- m_kr[ik] = alphas[ik]/rmax;
+ kr[ik] = alphas[ik]/rmax;
}
// Calculate the spatial grid (Uniform grid with a half-cell offset)
- RealVector rmesh(m_nr);
+ amrex::Vector<amrex::Real> rmesh(m_nr);
amrex::Real dr = rmax/m_nr;
for (int ir=0 ; ir < m_nr ; ir++) {
rmesh[ir] = dr*(ir + 0.5_rt);
@@ -57,22 +57,22 @@ HankelTransform::HankelTransform (int const hankel_order,
p_denom = hankel_order;
}
- RealVector denom(m_nk);
+ amrex::Vector<amrex::Real> denom(m_nk);
for (int ik=0 ; ik < m_nk ; ik++) {
const amrex::Real jna = jn(p_denom, alphas[ik]);
denom[ik] = MathConst::pi*rmax*rmax*jna*jna;
}
- RealVector num(m_nk*m_nr);
+ amrex::Vector<amrex::Real> num(m_nk*m_nr);
for (int ir=0 ; ir < m_nr ; ir++) {
for (int ik=0 ; ik < m_nk ; ik++) {
int const ii = ik + ir*m_nk;
- num[ii] = jn(hankel_order, rmesh[ir]*m_kr[ik]);
+ num[ii] = jn(hankel_order, rmesh[ir]*kr[ik]);
}
}
// Get the inverse matrix
- invM.resize(m_nk*m_nr);
+ amrex::Vector<amrex::Real> invM(m_nk*m_nr);
if (azimuthal_mode > 0) {
for (int ir=0 ; ir < m_nr ; ir++) {
for (int ik=1 ; ik < m_nk ; ik++) {
@@ -107,18 +107,20 @@ HankelTransform::HankelTransform (int const hankel_order,
}
}
+ amrex::Vector<amrex::Real> M;
+
// Calculate the matrix M by inverting invM
if (azimuthal_mode !=0 && hankel_order != azimuthal_mode-1) {
// In this case, invM is singular, thus we calculate the pseudo-inverse.
// The Moore-Penrose psuedo-inverse is calculated using the SVD method.
M.resize(m_nk*m_nr, 0.);
- RealVector invMcopy(invM);
- RealVector sdiag(m_nk-1, 0.);
- RealVector u((m_nk-1)*(m_nk-1), 0.);
- RealVector vt((m_nr)*(m_nr), 0.);
- RealVector sp((m_nr)*(m_nk-1), 0.);
- RealVector temp((m_nr)*(m_nk-1), 0.);
+ amrex::Vector<amrex::Real> invMcopy(invM);
+ amrex::Vector<amrex::Real> sdiag(m_nk-1, 0.);
+ amrex::Vector<amrex::Real> u((m_nk-1)*(m_nk-1), 0.);
+ amrex::Vector<amrex::Real> vt((m_nr)*(m_nr), 0.);
+ amrex::Vector<amrex::Real> sp((m_nr)*(m_nk-1), 0.);
+ amrex::Vector<amrex::Real> temp((m_nr)*(m_nk-1), 0.);
// Calculate the singlular-value-decomposition of invM (leaving out the first row).
// invM = u*sdiag*vt
@@ -169,6 +171,13 @@ HankelTransform::HankelTransform (int const hankel_order,
}
+ m_kr.resize(kr.size());
+ m_invM.resize(invM.size());
+ m_M.resize(M.size());
+ amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, kr.begin(), kr.end(), m_kr.begin());
+ amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, invM.begin(), invM.end(), m_invM.begin());
+ amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, M.begin(), M.end(), m_M.begin());
+ amrex::Gpu::synchronize();
}
void
@@ -193,7 +202,7 @@ HankelTransform::HankelForwardTransform (amrex::FArrayBox const& F, int const F_
// Note that M is flagged to be transposed since it has dimensions (m_nr, m_nk)
blas::gemm(blas::Layout::ColMajor, blas::Op::Trans, blas::Op::NoTrans,
m_nk, nz, m_nr, 1._rt,
- M.dataPtr(), m_nk,
+ m_M.dataPtr(), m_nk,
F.dataPtr(F_icomp)+ngr, nrF, 0._rt,
G.dataPtr(G_icomp), m_nk);
@@ -202,13 +211,13 @@ HankelTransform::HankelForwardTransform (amrex::FArrayBox const& F, int const F_
// It is not clear if the GPU gemm wasn't build properly, it is cycling data out and back
// in to the device, or if it is because gemm is launching its own threads.
- amrex::Real const * M_arr = M.dataPtr();
+ amrex::Real const * M_arr = m_M.dataPtr();
amrex::Array4<const amrex::Real> const & F_arr = F.array();
amrex::Array4< amrex::Real> const & G_arr = G.array();
int const nr = m_nr;
- ParallelFor(G_box,
+ amrex::ParallelFor(G_box,
[=] AMREX_GPU_DEVICE(int ik, int iz, int inotused) noexcept {
G_arr(ik,iz,G_icomp) = 0.;
for (int ir=0 ; ir < nr ; ir++) {
@@ -240,10 +249,10 @@ HankelTransform::HankelInverseTransform (amrex::FArrayBox const& G, int const G_
#ifndef AMREX_USE_GPU
// On CPU, the blas::gemm is significantly faster
- // Note that invM is flagged to be transposed since it has dimensions (m_nk, m_nr)
+ // Note that m_invM is flagged to be transposed since it has dimensions (m_nk, m_nr)
blas::gemm(blas::Layout::ColMajor, blas::Op::Trans, blas::Op::NoTrans,
m_nr, nz, m_nk, 1._rt,
- invM.dataPtr(), m_nr,
+ m_invM.dataPtr(), m_nr,
G.dataPtr(G_icomp), m_nk, 0._rt,
F.dataPtr(F_icomp)+ngr, nrF);
@@ -252,13 +261,13 @@ HankelTransform::HankelInverseTransform (amrex::FArrayBox const& G, int const G_
// It is not clear if the GPU gemm wasn't build properly, it is cycling data out and back
// in to the device, or if it is because gemm is launching its own threads.
- amrex::Real const * invM_arr = invM.dataPtr();
+ amrex::Real const * invM_arr = m_invM.dataPtr();
amrex::Array4<const amrex::Real> const & G_arr = G.array();
amrex::Array4< amrex::Real> const & F_arr = F.array();
int const nk = m_nk;
- ParallelFor(G_box,
+ amrex::ParallelFor(G_box,
[=] AMREX_GPU_DEVICE(int ir, int iz, int inotused) noexcept {
F_arr(ir,iz,F_icomp) = 0.;
for (int ik=0 ; ik < nk ; ik++) {
diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/SpectralHankelTransformer.cpp b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/SpectralHankelTransformer.cpp
index 7dd297418..518eea03e 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/SpectralHankelTransformer.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/SpectralHankelTransformer.cpp
@@ -49,6 +49,7 @@ SpectralHankelTransformer::ExtractKrArray ()
kr_array[ii] = kr_m_array[ir];
});
}
+ amrex::Gpu::synchronize();
}
/* \brief Converts a scalar field from the physical to the spectral space for all modes */
diff --git a/Source/FieldSolver/SpectralSolver/SpectralKSpace.H b/Source/FieldSolver/SpectralSolver/SpectralKSpace.H
index 01bbb4b35..5816a3477 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralKSpace.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralKSpace.H
@@ -15,12 +15,12 @@
// `KVectorComponent` and `SpectralShiftFactor` hold one 1D array
-// ("ManagedVector") for each box ("LayoutData"). The arrays are
+// ("DeviceVector") for each box ("LayoutData"). The arrays are
// only allocated if the corresponding box is owned by the local MPI rank.
-using RealKVector = amrex::Gpu::ManagedVector<amrex::Real>;
+using RealKVector = amrex::Gpu::DeviceVector<amrex::Real>;
using KVectorComponent = amrex::LayoutData< RealKVector >;
using SpectralShiftFactor = amrex::LayoutData<
- amrex::Gpu::ManagedVector<Complex> >;
+ amrex::Gpu::DeviceVector<Complex> >;
// Indicate the type of correction "shift" factor to apply
// when the FFT is performed from/to a cell-centered grid in real space.
diff --git a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
index 2c60cdc38..dd12fb2b8 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
@@ -75,17 +75,18 @@ SpectralKSpace::getKComponent( const DistributionMapping& dm,
const int i_dim,
const bool only_positive_k ) const
{
- // Initialize an empty ManagedVector in each box
+ // Initialize an empty DeviceVector in each box
KVectorComponent k_comp(spectralspace_ba, dm);
- // Loop over boxes and allocate the corresponding ManagedVector
+ // Loop over boxes and allocate the corresponding DeviceVector
// for each box owned by the local MPI proc
for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
Box bx = spectralspace_ba[mfi];
- ManagedVector<Real>& k = k_comp[mfi];
+ DeviceVector<Real>& k = k_comp[mfi];
// Allocate k to the right size
int N = bx.length( i_dim );
k.resize( N );
+ Real* pk = k.data();
// Fill the k vector
IntVect fft_size = realspace_ba[mfi].length();
@@ -97,21 +98,24 @@ SpectralKSpace::getKComponent( const DistributionMapping& dm,
if (only_positive_k){
// Fill the full axis with positive k values
// (typically: first axis, in a real-to-complex FFT)
- for (int i=0; i<N; i++ ){
- k[i] = i*dk;
- }
+ amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
+ {
+ pk[i] = i*dk;
+ });
} else {
const int mid_point = (N+1)/2;
- // Fill positive values of k
- // (FFT conventions: first half is positive)
- for (int i=0; i<mid_point; i++ ){
- k[i] = i*dk;
- }
- // Fill negative values of k
- // (FFT conventions: second half is negative)
- for (int i=mid_point; i<N; i++){
- k[i] = (i-N)*dk;
- }
+ amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
+ {
+ if (i < mid_point) {
+ // Fill positive values of k
+ // (FFT conventions: first half is positive)
+ pk[i] = i*dk;
+ } else {
+ // Fill negative values of k
+ // (FFT conventions: second half is negative)
+ pk[i] = (i-N)*dk;
+ }
+ });
}
}
return k_comp;
@@ -131,16 +135,19 @@ SpectralKSpace::getSpectralShiftFactor( const DistributionMapping& dm,
const int i_dim,
const int shift_type ) const
{
- // Initialize an empty ManagedVector in each box
+ // Initialize an empty DeviceVector in each box
SpectralShiftFactor shift_factor( spectralspace_ba, dm );
- // Loop over boxes and allocate the corresponding ManagedVector
+ // Loop over boxes and allocate the corresponding DeviceVector
// for each box owned by the local MPI proc
for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
- const ManagedVector<Real>& k = k_vec[i_dim][mfi];
- ManagedVector<Complex>& shift = shift_factor[mfi];
+ const DeviceVector<Real>& k = k_vec[i_dim][mfi];
+ DeviceVector<Complex>& shift = shift_factor[mfi];
// Allocate shift coefficients
- shift.resize( k.size() );
+ const int N = k.size();
+ shift.resize(N);
+ Real const* pk = k.data();
+ Complex* pshift = shift.data();
// Fill the shift coefficients
Real sign = 0;
@@ -149,11 +156,10 @@ SpectralKSpace::getSpectralShiftFactor( const DistributionMapping& dm,
case ShiftType::TransformToCellCentered: sign = 1.;
}
const Complex I{0,1};
- int i = 0;
- for (auto const& kv : k){
- shift[i] = exp( I*sign*kv*0.5_rt*dx[i_dim]);
- i++;
- }
+ amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
+ {
+ pshift[i] = amrex::exp( I*sign*pk[i]*0.5_rt*dx[i_dim]);
+ });
}
return shift_factor;
}
@@ -177,72 +183,81 @@ SpectralKSpace::getModifiedKComponent( const DistributionMapping& dm,
const int n_order,
const bool nodal ) const
{
- // Initialize an empty ManagedVector in each box
+ // Initialize an empty DeviceVector in each box
KVectorComponent modified_k_comp(spectralspace_ba, dm);
if (n_order == -1) { // Infinite-order case
for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
- const ManagedVector<Real>& k = k_vec[i_dim][mfi];
- ManagedVector<Real>& modified_k = modified_k_comp[mfi];
+ const DeviceVector<Real>& k = k_vec[i_dim][mfi];
+ DeviceVector<Real>& modified_k = modified_k_comp[mfi];
// Allocate modified_k to the same size as k
+ const int N = k.size();
modified_k.resize( k.size() );
// Fill the modified k vector
- int i = 0;
- for (auto const& kv : k){
- modified_k[i] = k[i]; // infinite-order case.
- i++;
- }
+ Gpu::copyAsync(Gpu::deviceToDevice, k.begin(), k.end(), modified_k.begin());
}
} else {
// Compute real-space stencil coefficients
- Vector<Real> stencil_coef = getFonbergStencilCoefficients(n_order, nodal);
+ Vector<Real> h_stencil_coef = getFonbergStencilCoefficients(n_order, nodal);
+ DeviceVector<Real> d_stencil_coef(h_stencil_coef.size());
+ Gpu::copyAsync(Gpu::hostToDevice, h_stencil_coef.begin(), h_stencil_coef.end(),
+ d_stencil_coef.begin());
+ Gpu::synchronize();
+ const int nstencil = d_stencil_coef.size();
+ Real const* p_stencil_coef = d_stencil_coef.data();
- // Loop over boxes and allocate the corresponding ManagedVector
+ // Loop over boxes and allocate the corresponding DeviceVector
// for each box owned by the local MPI proc
for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
Real delta_x = dx[i_dim];
- const ManagedVector<Real>& k = k_vec[i_dim][mfi];
- ManagedVector<Real>& modified_k = modified_k_comp[mfi];
+ const DeviceVector<Real>& k = k_vec[i_dim][mfi];
+ DeviceVector<Real>& modified_k = modified_k_comp[mfi];
// Allocate modified_k to the same size as k
- modified_k.resize( k.size() );
+ const int N = k.size();
+ modified_k.resize(N);
+ Real const* p_k = k.data();
+ Real * p_modified_k = modified_k.data();
// Fill the modified k vector
- int i = 0;
- for (auto const& kv : k){
- modified_k[i] = 0;
- for (int n=1; n<stencil_coef.size(); n++){
+ amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
+ {
+ p_modified_k[i] = 0;
+ for (int n=1; n<nstencil; n++){
if (nodal){
- modified_k[i] += stencil_coef[n]* \
- std::sin( k[i]*n*delta_x )/( n*delta_x );
+ p_modified_k[i] += p_stencil_coef[n]*
+ std::sin( p_k[i]*n*delta_x )/( n*delta_x );
} else {
- modified_k[i] += stencil_coef[n]* \
- std::sin( k[i]*(n-0.5)*delta_x )/( (n-0.5)*delta_x );
+ p_modified_k[i] += p_stencil_coef[n]* \
+ std::sin( p_k[i]*(n-0.5)*delta_x )/( (n-0.5)*delta_x );
}
}
- i++;
- }
- // By construction, at finite order and for a nodal grid,
- // the *modified* k corresponding to the Nyquist frequency
- // (i.e. highest *real* k) is 0. However, the above calculation
- // based on stencil coefficients does not give 0 to machine precision.
- // Therefore, we need to enforce the fact that the modified k be 0 here.
- if (nodal){
- if (i_dim == 0){
- // Because of the real-to-complex FFTs, the first axis (idim=0)
- // contains only the positive k, and the Nyquist frequency is
- // the last element of the array.
- modified_k[k.size()-1] = 0.0_rt;
- } else {
- // The other axes contains both positive and negative k ;
- // the Nyquist frequency is in the middle of the array.
- modified_k[k.size()/2] = 0.0_rt;
- }
- }
+ // By construction, at finite order and for a nodal grid,
+ // the *modified* k corresponding to the Nyquist frequency
+ // (i.e. highest *real* k) is 0. However, the above calculation
+ // based on stencil coefficients does not give 0 to machine precision.
+ // Therefore, we need to enforce the fact that the modified k be 0 here.
+ if (nodal){
+ if (i_dim == 0){
+ // Because of the real-to-complex FFTs, the first axis (idim=0)
+ // contains only the positive k, and the Nyquist frequency is
+ // the last element of the array.
+ if (i == N-1) {
+ p_modified_k[i] = 0.0_rt;
+ }
+ } else {
+ // The other axes contains both positive and negative k ;
+ // the Nyquist frequency is in the middle of the array.
+ if (i == N/2) {
+ p_modified_k[i] = 0.0_rt;
+ }
+ }
+ }
+ });
}
}
return modified_k_comp;