Remove ManagedVector from spectral solver (#1270)

author: WeiqunZhang <WeiqunZhang@lbl.gov> 2020-09-01 14:11:35 -0700
committer: GitHub <noreply@github.com> 2020-09-01 14:11:35 -0700
commit: 3c2eadfde21745eae87c7d13ebaacb1732a8619d (patch)
tree: 730bc2878800058a66fa308a67e4ca53c0715e71 /Source/FieldSolver/SpectralSolver
parent: 1652cfa796c9f258832254906ef4148a32768c2b (diff)
download: WarpX-3c2eadfde21745eae87c7d13ebaacb1732a8619d.tar.gz
WarpX-3c2eadfde21745eae87c7d13ebaacb1732a8619d.tar.zst
WarpX-3c2eadfde21745eae87c7d13ebaacb1732a8619d.zip
8 files changed, 131 insertions, 104 deletions
diff --git a/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.H b/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.H
index 9d3ab70f5..e15cc38e8 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.H
@@ -19,9 +19,9 @@ class SpectralBinomialFilter
 {
     public:
 
-        // `KFilterArray` holds a one 1D array ("ManagedVector") that
+        // `KFilterArray` holds a one 1D array ("DeviceVector") that
         // implements the filter.
-        using KFilterArray = amrex::Gpu::ManagedVector<amrex::Real>;
+        using KFilterArray = amrex::Gpu::DeviceVector<amrex::Real>;
 
         SpectralBinomialFilter () {};
         void InitFilterArray (RealKVector const & kvec,
diff --git a/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.cpp b/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.cpp
index c4ecd0969..e0ac3e886 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralBinomialFilter.cpp
@@ -20,19 +20,21 @@ SpectralBinomialFilter::InitFilterArray (HankelTransform::RealVector const & kve
                                          bool const compensation,
                                          KFilterArray & filter)
 {
-
-    filter.resize(kvec.size());
-
-    for (int i=0 ; i < kvec.size() ; i++) {
-        amrex::Real const ss = std::sin(0.5_rt*kvec[i]*dels);
+    const int N = kvec.size();
+    filter.resize(N);
+    amrex::Real* p_filter = filter.data();
+    amrex::Real const* p_kvec = kvec.data();
+
+    amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
+    {
+        amrex::Real const ss = std::sin(0.5_rt*p_kvec[i]*dels);
         amrex::Real const ss2 = ss*ss;
         amrex::Real filt = std::pow(1._rt - ss2, npasses);
         if (compensation) {
             filt *= (1._rt + npasses*ss2);
         }
-        filter[i] = filt;
-    }
-
+        p_filter[i] = filt;
+    });
 }
 
 /* \brief Initialize the radial and longitudinal filter arrays */
diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H
index c7a816c61..8f569ec4e 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H
@@ -51,7 +51,7 @@ void SecantRootFinder(int n, int nitmx, amrex::Real tol, amrex::Real *zeroj, int
  *    abramowitz m. & stegun irene a.
  *    handbook of mathematical functions
  */
-void GetBesselRoots(int n, int nk, HankelTransform::RealVector& roots, amrex::Vector<int> &ier)  {
+void GetBesselRoots(int n, int nk, amrex::Vector<amrex::Real>& roots, amrex::Vector<int> &ier)  {
     amrex::Real zeroj;
     int ierror, ik, k;
 
diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.H b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.H
index 46fb28447..5a87031c4 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.H
@@ -21,7 +21,7 @@ class HankelTransform
 {
     public:
 
-        using RealVector = amrex::Gpu::ManagedVector<amrex::Real>;
+        using RealVector = amrex::Gpu::DeviceVector<amrex::Real>;
 
         // Constructor
         HankelTransform(const int hankel_order,
@@ -43,8 +43,8 @@ class HankelTransform
 
         RealVector m_kr;
 
-        RealVector invM;
-        RealVector M;
+        RealVector m_invM;
+        RealVector m_M;
 };
 
 #endif
diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp
index c5249d54f..24e9d7f81 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp
@@ -25,7 +25,7 @@ HankelTransform::HankelTransform (int const hankel_order,
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(hankel_order-1 <= azimuthal_mode && azimuthal_mode <= hankel_order+1,
                                      "azimuthal_mode must be either hankel_order-1, hankel_order or hankel_order+1");
 
-    RealVector alphas;
+    amrex::Vector<amrex::Real> alphas;
     amrex::Vector<int> alpha_errors;
 
     GetBesselRoots(azimuthal_mode, m_nk, alphas, alpha_errors);
@@ -33,13 +33,13 @@ HankelTransform::HankelTransform (int const hankel_order,
     AMREX_ALWAYS_ASSERT(std::all_of(alpha_errors.begin(), alpha_errors.end(), [](int i) { return i == 0; }));
 
     // Calculate the spectral grid
-    m_kr.resize(m_nk);
+    amrex::Vector<amrex::Real> kr(m_nk);
     for (int ik=0 ; ik < m_nk ; ik++) {
-        m_kr[ik] = alphas[ik]/rmax;
+        kr[ik] = alphas[ik]/rmax;
     }
 
     // Calculate the spatial grid (Uniform grid with a half-cell offset)
-    RealVector rmesh(m_nr);
+    amrex::Vector<amrex::Real> rmesh(m_nr);
     amrex::Real dr = rmax/m_nr;
     for (int ir=0 ; ir < m_nr ; ir++) {
         rmesh[ir] = dr*(ir + 0.5_rt);
@@ -57,22 +57,22 @@ HankelTransform::HankelTransform (int const hankel_order,
         p_denom = hankel_order;
     }
 
-    RealVector denom(m_nk);
+    amrex::Vector<amrex::Real> denom(m_nk);
     for (int ik=0 ; ik < m_nk ; ik++) {
         const amrex::Real jna = jn(p_denom, alphas[ik]);
         denom[ik] = MathConst::pi*rmax*rmax*jna*jna;
     }
 
-    RealVector num(m_nk*m_nr);
+    amrex::Vector<amrex::Real> num(m_nk*m_nr);
     for (int ir=0 ; ir < m_nr ; ir++) {
         for (int ik=0 ; ik < m_nk ; ik++) {
             int const ii = ik + ir*m_nk;
-            num[ii] = jn(hankel_order, rmesh[ir]*m_kr[ik]);
+            num[ii] = jn(hankel_order, rmesh[ir]*kr[ik]);
         }
     }
 
     // Get the inverse matrix
-    invM.resize(m_nk*m_nr);
+    amrex::Vector<amrex::Real> invM(m_nk*m_nr);
     if (azimuthal_mode > 0) {
         for (int ir=0 ; ir < m_nr ; ir++) {
             for (int ik=1 ; ik < m_nk ; ik++) {
@@ -107,18 +107,20 @@ HankelTransform::HankelTransform (int const hankel_order,
         }
     }
 
+    amrex::Vector<amrex::Real> M;
+
     // Calculate the matrix M by inverting invM
     if (azimuthal_mode !=0 && hankel_order != azimuthal_mode-1) {
         // In this case, invM is singular, thus we calculate the pseudo-inverse.
         // The Moore-Penrose psuedo-inverse is calculated using the SVD method.
 
         M.resize(m_nk*m_nr, 0.);
-        RealVector invMcopy(invM);
-        RealVector sdiag(m_nk-1, 0.);
-        RealVector u((m_nk-1)*(m_nk-1), 0.);
-        RealVector vt((m_nr)*(m_nr), 0.);
-        RealVector sp((m_nr)*(m_nk-1), 0.);
-        RealVector temp((m_nr)*(m_nk-1), 0.);
+        amrex::Vector<amrex::Real> invMcopy(invM);
+        amrex::Vector<amrex::Real> sdiag(m_nk-1, 0.);
+        amrex::Vector<amrex::Real> u((m_nk-1)*(m_nk-1), 0.);
+        amrex::Vector<amrex::Real> vt((m_nr)*(m_nr), 0.);
+        amrex::Vector<amrex::Real> sp((m_nr)*(m_nk-1), 0.);
+        amrex::Vector<amrex::Real> temp((m_nr)*(m_nk-1), 0.);
 
         // Calculate the singlular-value-decomposition of invM (leaving out the first row).
         // invM = u*sdiag*vt
@@ -169,6 +171,13 @@ HankelTransform::HankelTransform (int const hankel_order,
 
     }
 
+    m_kr.resize(kr.size());
+    m_invM.resize(invM.size());
+    m_M.resize(M.size());
+    amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, kr.begin(), kr.end(), m_kr.begin());
+    amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, invM.begin(), invM.end(), m_invM.begin());
+    amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, M.begin(), M.end(), m_M.begin());
+    amrex::Gpu::synchronize();
 }
 
 void
@@ -193,7 +202,7 @@ HankelTransform::HankelForwardTransform (amrex::FArrayBox const& F, int const F_
     // Note that M is flagged to be transposed since it has dimensions (m_nr, m_nk)
     blas::gemm(blas::Layout::ColMajor, blas::Op::Trans, blas::Op::NoTrans,
                m_nk, nz, m_nr, 1._rt,
-               M.dataPtr(), m_nk,
+               m_M.dataPtr(), m_nk,
                F.dataPtr(F_icomp)+ngr, nrF, 0._rt,
                G.dataPtr(G_icomp), m_nk);
 
@@ -202,13 +211,13 @@ HankelTransform::HankelForwardTransform (amrex::FArrayBox const& F, int const F_
     // It is not clear if the GPU gemm wasn't build properly, it is cycling data out and back
     // in to the device, or if it is because gemm is launching its own threads.
 
-    amrex::Real const * M_arr = M.dataPtr();
+    amrex::Real const * M_arr = m_M.dataPtr();
     amrex::Array4<const amrex::Real> const & F_arr = F.array();
     amrex::Array4<      amrex::Real> const & G_arr = G.array();
 
     int const nr = m_nr;
 
-    ParallelFor(G_box,
+    amrex::ParallelFor(G_box,
     [=] AMREX_GPU_DEVICE(int ik, int iz, int inotused) noexcept {
         G_arr(ik,iz,G_icomp) = 0.;
         for (int ir=0 ; ir < nr ; ir++) {
@@ -240,10 +249,10 @@ HankelTransform::HankelInverseTransform (amrex::FArrayBox const& G, int const G_
 #ifndef AMREX_USE_GPU
     // On CPU, the blas::gemm is significantly faster
 
-    // Note that invM is flagged to be transposed since it has dimensions (m_nk, m_nr)
+    // Note that m_invM is flagged to be transposed since it has dimensions (m_nk, m_nr)
     blas::gemm(blas::Layout::ColMajor, blas::Op::Trans, blas::Op::NoTrans,
                m_nr, nz, m_nk, 1._rt,
-               invM.dataPtr(), m_nr,
+               m_invM.dataPtr(), m_nr,
                G.dataPtr(G_icomp), m_nk, 0._rt,
                F.dataPtr(F_icomp)+ngr, nrF);
 
@@ -252,13 +261,13 @@ HankelTransform::HankelInverseTransform (amrex::FArrayBox const& G, int const G_
     // It is not clear if the GPU gemm wasn't build properly, it is cycling data out and back
     // in to the device, or if it is because gemm is launching its own threads.
 
-    amrex::Real const * invM_arr = invM.dataPtr();
+    amrex::Real const * invM_arr = m_invM.dataPtr();
     amrex::Array4<const amrex::Real> const & G_arr = G.array();
     amrex::Array4<      amrex::Real> const & F_arr = F.array();
 
     int const nk = m_nk;
 
-    ParallelFor(G_box,
+    amrex::ParallelFor(G_box,
     [=] AMREX_GPU_DEVICE(int ir, int iz, int inotused) noexcept {
         F_arr(ir,iz,F_icomp) = 0.;
         for (int ik=0 ; ik < nk ; ik++) {
diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/SpectralHankelTransformer.cpp b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/SpectralHankelTransformer.cpp
index 7dd297418..518eea03e 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/SpectralHankelTransformer.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/SpectralHankelTransformer.cpp
@@ -49,6 +49,7 @@ SpectralHankelTransformer::ExtractKrArray ()
             kr_array[ii] = kr_m_array[ir];
         });
     }
+    amrex::Gpu::synchronize();
 }
 
 /* \brief Converts a scalar field from the physical to the spectral space for all modes */
diff --git a/Source/FieldSolver/SpectralSolver/SpectralKSpace.H b/Source/FieldSolver/SpectralSolver/SpectralKSpace.H
index 01bbb4b35..5816a3477 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralKSpace.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralKSpace.H
@@ -15,12 +15,12 @@
 
 
 // `KVectorComponent` and `SpectralShiftFactor` hold one 1D array
-// ("ManagedVector") for each box ("LayoutData"). The arrays are
+// ("DeviceVector") for each box ("LayoutData"). The arrays are
 // only allocated if the corresponding box is owned by the local MPI rank.
-using RealKVector = amrex::Gpu::ManagedVector<amrex::Real>;
+using RealKVector = amrex::Gpu::DeviceVector<amrex::Real>;
 using KVectorComponent = amrex::LayoutData< RealKVector >;
 using SpectralShiftFactor = amrex::LayoutData<
-                           amrex::Gpu::ManagedVector<Complex> >;
+                           amrex::Gpu::DeviceVector<Complex> >;
 
 // Indicate the type of correction "shift" factor to apply
 // when the FFT is performed from/to a cell-centered grid in real space.
diff --git a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
index 2c60cdc38..dd12fb2b8 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
@@ -75,17 +75,18 @@ SpectralKSpace::getKComponent( const DistributionMapping& dm,
                                const int i_dim,
                                const bool only_positive_k ) const
 {
-    // Initialize an empty ManagedVector in each box
+    // Initialize an empty DeviceVector in each box
     KVectorComponent k_comp(spectralspace_ba, dm);
-    // Loop over boxes and allocate the corresponding ManagedVector
+    // Loop over boxes and allocate the corresponding DeviceVector
     // for each box owned by the local MPI proc
     for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
         Box bx = spectralspace_ba[mfi];
-        ManagedVector<Real>& k = k_comp[mfi];
+        DeviceVector<Real>& k = k_comp[mfi];
 
         // Allocate k to the right size
         int N = bx.length( i_dim );
         k.resize( N );
+        Real* pk = k.data();
 
         // Fill the k vector
         IntVect fft_size = realspace_ba[mfi].length();
@@ -97,21 +98,24 @@ SpectralKSpace::getKComponent( const DistributionMapping& dm,
         if (only_positive_k){
             // Fill the full axis with positive k values
             // (typically: first axis, in a real-to-complex FFT)
-            for (int i=0; i<N; i++ ){
-                k[i] = i*dk;
-            }
+            amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
+            {
+                pk[i] = i*dk;
+            });
         } else {
             const int mid_point = (N+1)/2;
-            // Fill positive values of k
-            // (FFT conventions: first half is positive)
-            for (int i=0; i<mid_point; i++ ){
-                k[i] = i*dk;
-            }
-            // Fill negative values of k
-            // (FFT conventions: second half is negative)
-            for (int i=mid_point; i<N; i++){
-                k[i] = (i-N)*dk;
-            }
+            amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
+            {
+                if (i < mid_point) {
+                    // Fill positive values of k
+                    // (FFT conventions: first half is positive)
+                    pk[i] = i*dk;
+                } else {
+                    // Fill negative values of k
+                    // (FFT conventions: second half is negative)
+                    pk[i] = (i-N)*dk;
+                }
+            });
         }
     }
     return k_comp;
@@ -131,16 +135,19 @@ SpectralKSpace::getSpectralShiftFactor( const DistributionMapping& dm,
                                         const int i_dim,
                                         const int shift_type ) const
 {
-    // Initialize an empty ManagedVector in each box
+    // Initialize an empty DeviceVector in each box
     SpectralShiftFactor shift_factor( spectralspace_ba, dm );
-    // Loop over boxes and allocate the corresponding ManagedVector
+   // Loop over boxes and allocate the corresponding DeviceVector
     // for each box owned by the local MPI proc
     for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
-        const ManagedVector<Real>& k = k_vec[i_dim][mfi];
-        ManagedVector<Complex>& shift = shift_factor[mfi];
+        const DeviceVector<Real>& k = k_vec[i_dim][mfi];
+        DeviceVector<Complex>& shift = shift_factor[mfi];
 
         // Allocate shift coefficients
-        shift.resize( k.size() );
+        const int N = k.size();
+        shift.resize(N);
+        Real const* pk = k.data();
+        Complex* pshift = shift.data();
 
         // Fill the shift coefficients
         Real sign = 0;
@@ -149,11 +156,10 @@ SpectralKSpace::getSpectralShiftFactor( const DistributionMapping& dm,
             case ShiftType::TransformToCellCentered: sign = 1.;
         }
         const Complex I{0,1};
-        int i = 0;
-        for (auto const& kv : k){
-            shift[i] = exp( I*sign*kv*0.5_rt*dx[i_dim]);
-            i++;
-        }
+        amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
+        {
+            pshift[i] = amrex::exp( I*sign*pk[i]*0.5_rt*dx[i_dim]);
+        });
     }
     return shift_factor;
 }
@@ -177,72 +183,81 @@ SpectralKSpace::getModifiedKComponent( const DistributionMapping& dm,
                                        const int n_order,
                                        const bool nodal ) const
 {
-    // Initialize an empty ManagedVector in each box
+    // Initialize an empty DeviceVector in each box
     KVectorComponent modified_k_comp(spectralspace_ba, dm);
 
     if (n_order == -1) { // Infinite-order case
         for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
-            const ManagedVector<Real>& k = k_vec[i_dim][mfi];
-            ManagedVector<Real>& modified_k = modified_k_comp[mfi];
+            const DeviceVector<Real>& k = k_vec[i_dim][mfi];
+            DeviceVector<Real>& modified_k = modified_k_comp[mfi];
 
             // Allocate modified_k to the same size as k
+            const int N = k.size();
             modified_k.resize( k.size() );
 
             // Fill the modified k vector
-            int i = 0;
-            for (auto const& kv : k){
-                modified_k[i] = k[i]; // infinite-order case.
-                i++;
-            }
+            Gpu::copyAsync(Gpu::deviceToDevice, k.begin(), k.end(), modified_k.begin());
         }
     } else {
 
         // Compute real-space stencil coefficients
-        Vector<Real> stencil_coef = getFonbergStencilCoefficients(n_order, nodal);
+        Vector<Real> h_stencil_coef = getFonbergStencilCoefficients(n_order, nodal);
+        DeviceVector<Real> d_stencil_coef(h_stencil_coef.size());
+        Gpu::copyAsync(Gpu::hostToDevice, h_stencil_coef.begin(), h_stencil_coef.end(),
+                       d_stencil_coef.begin());
+        Gpu::synchronize();
+        const int nstencil = d_stencil_coef.size();
+        Real const* p_stencil_coef = d_stencil_coef.data();
 
-        // Loop over boxes and allocate the corresponding ManagedVector
+        // Loop over boxes and allocate the corresponding DeviceVector
         // for each box owned by the local MPI proc
         for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
             Real delta_x = dx[i_dim];
-            const ManagedVector<Real>& k = k_vec[i_dim][mfi];
-            ManagedVector<Real>& modified_k = modified_k_comp[mfi];
+            const DeviceVector<Real>& k = k_vec[i_dim][mfi];
+            DeviceVector<Real>& modified_k = modified_k_comp[mfi];
 
             // Allocate modified_k to the same size as k
-            modified_k.resize( k.size() );
+            const int N = k.size();
+            modified_k.resize(N);
+            Real const* p_k = k.data();
+            Real * p_modified_k = modified_k.data();
 
             // Fill the modified k vector
-            int i = 0;
-            for (auto const& kv : k){
-                modified_k[i] = 0;
-                for (int n=1; n<stencil_coef.size(); n++){
+            amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
+            {
+                p_modified_k[i] = 0;
+                for (int n=1; n<nstencil; n++){
                     if (nodal){
-                        modified_k[i] += stencil_coef[n]* \
-                            std::sin( k[i]*n*delta_x )/( n*delta_x );
+                        p_modified_k[i] += p_stencil_coef[n]*
+                            std::sin( p_k[i]*n*delta_x )/( n*delta_x );
                     } else {
-                        modified_k[i] += stencil_coef[n]* \
-                            std::sin( k[i]*(n-0.5)*delta_x )/( (n-0.5)*delta_x );
+                        p_modified_k[i] += p_stencil_coef[n]* \
+                            std::sin( p_k[i]*(n-0.5)*delta_x )/( (n-0.5)*delta_x );
                     }
                 }
-                i++;
-            }
 
-            // By construction, at finite order and for a nodal grid,
-            // the *modified* k corresponding to the Nyquist frequency
-            // (i.e. highest *real* k) is 0. However, the above calculation
-            // based on stencil coefficients does not give 0 to machine precision.
-            // Therefore, we need to enforce the fact that the modified k be 0 here.
-            if (nodal){
-                if (i_dim == 0){
-                    // Because of the real-to-complex FFTs, the first axis (idim=0)
-                    // contains only the positive k, and the Nyquist frequency is
-                    // the last element of the array.
-                    modified_k[k.size()-1] = 0.0_rt;
-              }   else {
-                    // The other axes contains both positive and negative k ;
-                    // the Nyquist frequency is in the middle of the array.
-                    modified_k[k.size()/2] = 0.0_rt;
-              }
-            }
+                // By construction, at finite order and for a nodal grid,
+                // the *modified* k corresponding to the Nyquist frequency
+                // (i.e. highest *real* k) is 0. However, the above calculation
+                // based on stencil coefficients does not give 0 to machine precision.
+                // Therefore, we need to enforce the fact that the modified k be 0 here.
+                if (nodal){
+                    if (i_dim == 0){
+                        // Because of the real-to-complex FFTs, the first axis (idim=0)
+                        // contains only the positive k, and the Nyquist frequency is
+                        // the last element of the array.
+                        if (i == N-1) {
+                            p_modified_k[i] = 0.0_rt;
+                        }
+                    } else {
+                        // The other axes contains both positive and negative k ;
+                        // the Nyquist frequency is in the middle of the array.
+                        if (i == N/2) {
+                            p_modified_k[i] = 0.0_rt;
+                        }
+                    }
+                }
+            });
         }
     }
     return modified_k_comp;
author	WeiqunZhang <WeiqunZhang@lbl.gov>	2020-09-01 14:11:35 -0700
committer	GitHub <noreply@github.com>	2020-09-01 14:11:35 -0700
commit	3c2eadfde21745eae87c7d13ebaacb1732a8619d (patch)
tree	730bc2878800058a66fa308a67e4ca53c0715e71 /Source/FieldSolver/SpectralSolver
parent	1652cfa796c9f258832254906ef4148a32768c2b (diff)
download	WarpX-3c2eadfde21745eae87c7d13ebaacb1732a8619d.tar.gz WarpX-3c2eadfde21745eae87c7d13ebaacb1732a8619d.tar.zst WarpX-3c2eadfde21745eae87c7d13ebaacb1732a8619d.zip