Added cuFFT kernels -- debugging error in rho values before forward transform

author: Revathi Jambunathan <revanathan@login5.summit.olcf.ornl.gov> 2019-05-06 18:13:13 -0400
committer: Revathi Jambunathan <revanathan@login5.summit.olcf.ornl.gov> 2019-05-06 18:13:13 -0400
commit: 4a34f2ea9ab825a0af92fc0c03017043951032e7 (patch)
tree: 534ce2941909557c9611990acc5ce0294307cca0 /Source/FieldSolver/SpectralSolver
parent: df73577bc750d6ca49458c2365e761ab7067aa7b (diff)
download: WarpX-4a34f2ea9ab825a0af92fc0c03017043951032e7.tar.gz
WarpX-4a34f2ea9ab825a0af92fc0c03017043951032e7.tar.zst
WarpX-4a34f2ea9ab825a0af92fc0c03017043951032e7.zip
5 files changed, 203 insertions, 55 deletions
diff --git a/Source/FieldSolver/SpectralSolver/PsatdAlgorithm.H b/Source/FieldSolver/SpectralSolver/PsatdAlgorithm.H
index acefcc466..7cf31b97b 100644
--- a/Source/FieldSolver/SpectralSolver/PsatdAlgorithm.H
+++ b/Source/FieldSolver/SpectralSolver/PsatdAlgorithm.H
@@ -19,6 +19,9 @@ class PsatdAlgorithm
         PsatdAlgorithm() = default; // Default constructor
         PsatdAlgorithm& operator=(PsatdAlgorithm&& algorithm) = default;
         void pushSpectralFields(SpectralFieldData& f) const;
+        void InitializeCoefficience(const SpectralKSpace& spectral_kspace,
+                                    const amrex::DistributionMapping& dm, 
+                                    const amrex::Real dt);
 
     private:
         // Modified finite-order vectors
diff --git a/Source/FieldSolver/SpectralSolver/PsatdAlgorithm.cpp b/Source/FieldSolver/SpectralSolver/PsatdAlgorithm.cpp
index ada7506c3..9e0bbfe06 100644
--- a/Source/FieldSolver/SpectralSolver/PsatdAlgorithm.cpp
+++ b/Source/FieldSolver/SpectralSolver/PsatdAlgorithm.cpp
@@ -27,58 +27,60 @@ PsatdAlgorithm::PsatdAlgorithm(const SpectralKSpace& spectral_kspace,
     X2_coef = SpectralCoefficients(ba, dm, 1, 0);
     X3_coef = SpectralCoefficients(ba, dm, 1, 0);
 
-    // Fill them with the right values:
-    // Loop over boxes and allocate the corresponding coefficients
-    // for each box owned by the local MPI proc
-    for (MFIter mfi(ba, dm); mfi.isValid(); ++mfi){
-
-        const Box& bx = ba[mfi];
-
-        // Extract pointers for the k vectors
-        const Real* modified_kx = modified_kx_vec[mfi].dataPtr();
-#if (AMREX_SPACEDIM==3)
-        const Real* modified_ky = modified_ky_vec[mfi].dataPtr();
-#endif
-        const Real* modified_kz = modified_kz_vec[mfi].dataPtr();
-        // Extract arrays for the coefficients
-        Array4<Real> C = C_coef[mfi].array();
-        Array4<Real> S_ck = S_ck_coef[mfi].array();
-        Array4<Real> X1 = X1_coef[mfi].array();
-        Array4<Real> X2 = X2_coef[mfi].array();
-        Array4<Real> X3 = X3_coef[mfi].array();
-
-        // Loop over indices within one box
-        ParallelFor(bx,
-        [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
-        {
-            // Calculate norm of vector
-            const Real k_norm = std::sqrt(
-                std::pow(modified_kx[i], 2) +
-#if (AMREX_SPACEDIM==3)
-                std::pow(modified_ky[j], 2) +
-                std::pow(modified_kz[k], 2));
-#else
-                std::pow(modified_kz[j], 2));
-#endif
-
-            // Calculate coefficients
-            constexpr Real c = PhysConst::c;
-            constexpr Real ep0 = PhysConst::ep0;
-            if (k_norm != 0){
-                C(i,j,k) = std::cos(c*k_norm*dt);
-                S_ck(i,j,k) = std::sin(c*k_norm*dt)/(c*k_norm);
-                X1(i,j,k) = (1. - C(i,j,k))/(ep0 * c*c * k_norm*k_norm);
-                X2(i,j,k) = (1. - S_ck(i,j,k)/dt)/(ep0 * k_norm*k_norm);
-                X3(i,j,k) = (C(i,j,k) - S_ck(i,j,k)/dt)/(ep0 * k_norm*k_norm);
-            } else { // Handle k_norm = 0, by using the analytical limit
-                C(i,j,k) = 1.;
-                S_ck(i,j,k) = dt;
-                X1(i,j,k) = 0.5 * dt*dt / ep0;
-                X2(i,j,k) = c*c * dt*dt / (6.*ep0);
-                X3(i,j,k) = - c*c * dt*dt / (3.*ep0);
-            }
-        });
-    }
+    InitializeCoefficience(spectral_kspace, dm, dt);
+//    // Fill them with the right values:
+//    // Loop over boxes and allocate the corresponding coefficients
+//    // for each box owned by the local MPI proc
+//    for (MFIter mfi(ba, dm); mfi.isValid(); ++mfi){
+//
+//        //const Box& bx = ba[mfi];
+//        const Box bx = ba[mfi];
+//
+//        // Extract pointers for the k vectors
+//        const Real* modified_kx = modified_kx_vec[mfi].dataPtr();
+//#if (AMREX_SPACEDIM==3)
+//        const Real* modified_ky = modified_ky_vec[mfi].dataPtr();
+//#endif
+//        const Real* modified_kz = modified_kz_vec[mfi].dataPtr();
+//        // Extract arrays for the coefficients
+//        Array4<Real> C = C_coef[mfi].array();
+//        Array4<Real> S_ck = S_ck_coef[mfi].array();
+//        Array4<Real> X1 = X1_coef[mfi].array();
+//        Array4<Real> X2 = X2_coef[mfi].array();
+//        Array4<Real> X3 = X3_coef[mfi].array();
+//
+//        // Loop over indices within one box
+//        ParallelFor(bx,
+//        [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
+//        {
+//            // Calculate norm of vector
+//            const Real k_norm = std::sqrt(
+//                std::pow(modified_kx[i], 2) +
+//#if (AMREX_SPACEDIM==3)
+//                std::pow(modified_ky[j], 2) +
+//                std::pow(modified_kz[k], 2));
+//#else
+//                std::pow(modified_kz[j], 2));
+//#endif
+//
+//            // Calculate coefficients
+//            constexpr Real c = PhysConst::c;
+//            constexpr Real ep0 = PhysConst::ep0;
+//            if (k_norm != 0){
+//                C(i,j,k) = std::cos(c*k_norm*dt);
+//                S_ck(i,j,k) = std::sin(c*k_norm*dt)/(c*k_norm);
+//                X1(i,j,k) = (1. - C(i,j,k))/(ep0 * c*c * k_norm*k_norm);
+//                X2(i,j,k) = (1. - S_ck(i,j,k)/dt)/(ep0 * k_norm*k_norm);
+//                X3(i,j,k) = (C(i,j,k) - S_ck(i,j,k)/dt)/(ep0 * k_norm*k_norm);
+//            } else { // Handle k_norm = 0, by using the analytical limit
+//                C(i,j,k) = 1.;
+//                S_ck(i,j,k) = dt;
+//                X1(i,j,k) = 0.5 * dt*dt / ep0;
+//                X2(i,j,k) = c*c * dt*dt / (6.*ep0);
+//                X3(i,j,k) = - c*c * dt*dt / (3.*ep0);
+//            }
+//        });
+//    }
 };
 
 /* Advance the E and B field in spectral space (stored in `f`)
@@ -135,7 +137,7 @@ PsatdAlgorithm::pushSpectralFields(SpectralFieldData& f) const{
 #endif
             constexpr Real c2 = PhysConst::c*PhysConst::c;
             constexpr Real inv_ep0 = 1./PhysConst::ep0;
-            constexpr Complex I = Complex{0,1};
+            const Complex I = Complex{0,1};
             const Real C = C_arr(i,j,k);
             const Real S_ck = S_ck_arr(i,j,k);
             const Real X1 = X1_arr(i,j,k);
@@ -165,3 +167,64 @@ PsatdAlgorithm::pushSpectralFields(SpectralFieldData& f) const{
         });
     }
 };
+
+void PsatdAlgorithm::InitializeCoefficience(const SpectralKSpace& spectral_kspace,
+                                    const amrex::DistributionMapping& dm,
+                                    const amrex::Real dt)
+{
+    const BoxArray& ba = spectral_kspace.spectralspace_ba;
+    // Fill them with the right values:
+    // Loop over boxes and allocate the corresponding coefficients
+    // for each box owned by the local MPI proc
+    for (MFIter mfi(ba, dm); mfi.isValid(); ++mfi){
+
+        //const Box& bx = ba[mfi];
+        const Box bx = ba[mfi];
+
+        // Extract pointers for the k vectors
+        const Real* modified_kx = modified_kx_vec[mfi].dataPtr();
+#if (AMREX_SPACEDIM==3)
+        const Real* modified_ky = modified_ky_vec[mfi].dataPtr();
+#endif
+        const Real* modified_kz = modified_kz_vec[mfi].dataPtr();
+        // Extract arrays for the coefficients
+        Array4<Real> C = C_coef[mfi].array();
+        Array4<Real> S_ck = S_ck_coef[mfi].array();
+        Array4<Real> X1 = X1_coef[mfi].array();
+        Array4<Real> X2 = X2_coef[mfi].array();
+        Array4<Real> X3 = X3_coef[mfi].array();
+
+        // Loop over indices within one box
+        ParallelFor(bx,
+        [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
+        {
+            // Calculate norm of vector
+            const Real k_norm = std::sqrt(
+                std::pow(modified_kx[i], 2) +
+#if (AMREX_SPACEDIM==3)
+                std::pow(modified_ky[j], 2) +
+                std::pow(modified_kz[k], 2));
+#else
+                std::pow(modified_kz[j], 2));
+#endif
+
+
+            // Calculate coefficients
+            constexpr Real c = PhysConst::c;
+            constexpr Real ep0 = PhysConst::ep0;
+            if (k_norm != 0){
+                C(i,j,k) = std::cos(c*k_norm*dt);
+                S_ck(i,j,k) = std::sin(c*k_norm*dt)/(c*k_norm);
+                X1(i,j,k) = (1. - C(i,j,k))/(ep0 * c*c * k_norm*k_norm);
+                X2(i,j,k) = (1. - S_ck(i,j,k)/dt)/(ep0 * k_norm*k_norm);
+                X3(i,j,k) = (C(i,j,k) - S_ck(i,j,k)/dt)/(ep0 * k_norm*k_norm);
+            } else { // Handle k_norm = 0, by using the analytical limit
+                C(i,j,k) = 1.;
+                S_ck(i,j,k) = dt;
+                X1(i,j,k) = 0.5 * dt*dt / ep0;
+                X2(i,j,k) = c*c * dt*dt / (6.*ep0);
+                X3(i,j,k) = - c*c * dt*dt / (3.*ep0);
+            }
+        });
+     }
+}
diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.H b/Source/FieldSolver/SpectralSolver/SpectralFieldData.H
index 8e58aa1d8..1d64817ef 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.H
@@ -26,6 +26,7 @@ class SpectralFieldData
     // the local MPI rank)
 #ifdef AMREX_USE_GPU
     // Add cuFFT-specific code
+    using FFTplans = amrex::LayoutData<cufftHandle>;
 #else
     using FFTplans = amrex::LayoutData<fftw_plan>;
 #endif
diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
index 02fa2015f..7c2061f8d 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
@@ -54,6 +54,29 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba,
         IntVect fft_size = realspace_ba[mfi].length();
 #ifdef AMREX_USE_GPU
         // Add cuFFT-specific code
+        // Creating 3D plan for real to complex -- double precision
+        cufftResult result;
+        result = cufftPlan3d( &forward_plan[mfi], fft_size[2], 
+                              fft_size[1],fft_size[0], CUFFT_D2Z);
+        if ( result != CUFFT_SUCCESS ) {
+           amrex::Print() << " cufftplan3d forward failed! \n";
+        }
+        if ( result == CUFFT_SUCCESS ) {
+           amrex::Print() << " created cufft forward plan\n";
+        }
+        // Add 2D cuFFT-spacific code for D2Z
+        // Note that D2Z is inherently forward plan 
+
+        result = cufftPlan3d( &backward_plan[mfi], fft_size[2], 
+                              fft_size[1], fft_size[0], CUFFT_Z2D);
+        // Add 2D cuFFT-specific code for Z2D
+        if ( result != CUFFT_SUCCESS ) {
+           amrex::Print() << " cufftplan3d backward failed! \n";
+        }
+        if ( result == CUFFT_SUCCESS ) {
+           amrex::Print() << " created cufft backward plan\n";
+        }
+
 #else
         // Create FFTW plans
         forward_plan[mfi] =
@@ -87,6 +110,8 @@ SpectralFieldData::~SpectralFieldData()
         for ( MFIter mfi(tmpRealField); mfi.isValid(); ++mfi ){
 #ifdef AMREX_USE_GPU
             // Add cuFFT-specific code
+            cufftDestroy( forward_plan[mfi] );
+            cufftDestroy( backward_plan[mfi] );
 #else
             // Destroy FFTW plans
             fftw_destroy_plan( forward_plan[mfi] );
@@ -129,16 +154,41 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf,
             Array4<Real> tmp_arr = tmpRealField[mfi].array();
             ParallelFor( realspace_bx,
             [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
-                tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp);
+                tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp);                
             });
+#ifdef AMREX_USE_GPU
+            cudaDeviceSynchronize();
+#endif
+            amrex::Print() << " in forward trans icomp " << i_comp << " "  << tmp_arr(0,0,0) << " mf arr " ;
+            amrex::Print() << " " << mf_arr(0,0,0,0);
+            amrex::Print() << " " << mf_arr(15,15,15,0);
+            amrex::Print() << " " << mf_arr(0,0,0,1);
+            amrex::Print() << " " << mf_arr(15,15,15,1);
+            amrex::Print() << "\n";
         }
 
         // Perform Fourier transform from `tmpRealField` to `tmpSpectralField`
 #ifdef AMREX_USE_GPU
         // Add cuFFT-specific code ; make sure that this is done on the same
         // GPU stream as the above copy
+        cudaDeviceSynchronize();
+        cufftResult result;
+        //cudaStream_t stream = amrex::Cuda::Device::cudaStream(); 
+        //cufftSetStream ( forward_plan[mfi], stream);
+        result = cufftExecD2Z( forward_plan[mfi], 
+                               tmpRealField[mfi].dataPtr(), 
+                               reinterpret_cast<cuDoubleComplex*>(
+                               tmpSpectralField[mfi].dataPtr()) );
+        if ( result != CUFFT_SUCCESS ) {
+           amrex::Print() << " cufftplan3d execute failed ! \n";
+        }
+        if ( result == CUFFT_SUCCESS ) {
+           amrex::Print() << " created cufft forward transform\n";
+        }
+        cudaDeviceSynchronize();
 #else
         fftw_execute( forward_plan[mfi] );
+        amrex::Print() << " forward fft on cpu\n";
 #endif
 
         // Copy the spectral-space field `tmpSpectralField` to the appropriate
@@ -169,6 +219,8 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf,
                 // Copy field into the right index
                 fields_arr(i,j,k,field_index) = spectral_field_value;
             });
+//            amrex::Print() << " in forward trans after D2Z" << fields_arr(0,0,0,0) ;
+            amrex::Print() << "\n";
         }
     }
 }
@@ -227,8 +279,24 @@ SpectralFieldData::BackwardTransform( MultiFab& mf,
 #ifdef AMREX_USE_GPU
         // Add cuFFT-specific code ; make sure that this is done on the same
         // GPU stream as the above copy
+        cudaDeviceSynchronize(); 
+        cufftResult result;
+        //cudaStream_t stream = amrex::Cuda::Device::cudaStream(); 
+        //cufftSetStream ( backward_plan[mfi], stream);
+        result = cufftExecZ2D( backward_plan[mfi], 
+                               reinterpret_cast<cuDoubleComplex*>(
+                               tmpSpectralField[mfi].dataPtr()),
+                               tmpRealField[mfi].dataPtr() );
+        if ( result != CUFFT_SUCCESS ) {
+           amrex::Print() << " cufftplan3d execute inverse failed ! \n";
+        }
+        if ( result == CUFFT_SUCCESS ) {
+           amrex::Print() << " created cufft inverse transform\n";
+        }
+        cudaDeviceSynchronize(); 
 #else
         fftw_execute( backward_plan[mfi] );
+        amrex::Print() << " cpu inverse done\n";
 #endif
 
         // Copy the temporary field `tmpRealField` to the real-space field `mf`
@@ -245,6 +313,15 @@ SpectralFieldData::BackwardTransform( MultiFab& mf,
                 // Copy and normalize field
                 mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k);
             });
+#ifdef AMREX_USE_GPU
+            cudaDeviceSynchronize();
+#endif
+            amrex::Print() << " after backward plan in real space 0,0,0 " << mf_arr(0,0,0,0) << " tmp " << tmp_arr(0,0,0) << "\n";
+            amrex::Print() << " after backward plan in real space 15, 15, 15 " << mf_arr(15,15,15,0) << " tmp " << tmp_arr(0,0,0) << "\n";
+            amrex::Print() << "\n";
+#ifdef AMREX_USE_GPU
+            cudaDeviceSynchronize();
+#endif
         }
     }
 }
diff --git a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
index 2fe78cedd..6a88a52a3 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
@@ -142,9 +142,13 @@ SpectralKSpace::getSpectralShiftFactor( const DistributionMapping& dm,
             case ShiftType::TransformFromCellCentered: sign = -1.; break;
             case ShiftType::TransformToCellCentered: sign = 1.;
         }
-        constexpr Complex I{0,1};
+        const Complex I{0,1};
         for (int i=0; i<k.size(); i++ ){
+#ifdef AMREX_USE_GPU
+            shift[i] = thrust::exp( I*sign*k[i]*0.5*dx[i_dim] );
+#else
             shift[i] = std::exp( I*sign*k[i]*0.5*dx[i_dim] );
+#endif
         }
     }
     return shift_factor;
author	Revathi Jambunathan <revanathan@login5.summit.olcf.ornl.gov>	2019-05-06 18:13:13 -0400
committer	Revathi Jambunathan <revanathan@login5.summit.olcf.ornl.gov>	2019-05-06 18:13:13 -0400
commit	4a34f2ea9ab825a0af92fc0c03017043951032e7 (patch)
tree	534ce2941909557c9611990acc5ce0294307cca0 /Source/FieldSolver/SpectralSolver
parent	df73577bc750d6ca49458c2365e761ab7067aa7b (diff)
download	WarpX-4a34f2ea9ab825a0af92fc0c03017043951032e7.tar.gz WarpX-4a34f2ea9ab825a0af92fc0c03017043951032e7.tar.zst WarpX-4a34f2ea9ab825a0af92fc0c03017043951032e7.zip