diff options
Diffstat (limited to 'Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp')
-rw-r--r-- | Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp | 73 |
1 files changed, 66 insertions, 7 deletions
diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp index c45809dd5..8f0853484 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp @@ -53,7 +53,38 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, // the FFT plan, the valid dimensions are those of the real-space box. IntVect fft_size = realspace_ba[mfi].length(); #ifdef AMREX_USE_GPU - // Add cuFFT-specific code + // Create cuFFT plans + // Creating 3D plan for real to complex -- double precision + // Assuming CUDA is used for programming GPU + // Note that D2Z is inherently forward plan + // and Z2D is inherently backward plan + cufftResult result; +#if (AMREX_SPACEDIM == 3) + result = cufftPlan3d( &forward_plan[mfi], fft_size[2], + fft_size[1],fft_size[0], CUFFT_D2Z); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " cufftplan3d forward failed! \n"; + } + + result = cufftPlan3d( &backward_plan[mfi], fft_size[2], + fft_size[1], fft_size[0], CUFFT_Z2D); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " cufftplan3d backward failed! \n"; + } +#else + result = cufftPlan2d( &forward_plan[mfi], fft_size[1], + fft_size[0], CUFFT_D2Z ); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " cufftplan2d forward failed! \n"; + } + + result = cufftPlan2d( &backward_plan[mfi], fft_size[1], + fft_size[0], CUFFT_Z2D ); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " cufftplan2d backward failed! \n"; + } +#endif + #else // Create FFTW plans forward_plan[mfi] = @@ -86,7 +117,9 @@ SpectralFieldData::~SpectralFieldData() if (tmpRealField.size() > 0){ for ( MFIter mfi(tmpRealField); mfi.isValid(); ++mfi ){ #ifdef AMREX_USE_GPU - // Add cuFFT-specific code + // Destroy cuFFT plans + cufftDestroy( forward_plan[mfi] ); + cufftDestroy( backward_plan[mfi] ); #else // Destroy FFTW plans fftw_destroy_plan( forward_plan[mfi] ); @@ -135,8 +168,19 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, // Perform Fourier transform from `tmpRealField` to `tmpSpectralField` #ifdef AMREX_USE_GPU - // Add cuFFT-specific code ; make sure that this is done on the same + // Perform Fast Fourier Transform on GPU using cuFFT + // make sure that this is done on the same // GPU stream as the above copy + cufftResult result; + cudaStream_t stream = amrex::Gpu::Device::cudaStream(); + cufftSetStream ( forward_plan[mfi], stream); + result = cufftExecD2Z( forward_plan[mfi], + tmpRealField[mfi].dataPtr(), + reinterpret_cast<cuDoubleComplex*>( + tmpSpectralField[mfi].dataPtr()) ); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " forward transform using cufftExecD2Z failed ! \n"; + } #else fftw_execute( forward_plan[mfi] ); #endif @@ -155,6 +199,7 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, const Complex* zshift_arr = zshift_FFTfromCell[mfi].dataPtr(); // Loop over indices within one box const Box spectralspace_bx = tmpSpectralField[mfi].box(); + ParallelFor( spectralspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { Complex spectral_field_value = tmp_arr(i,j,k); @@ -207,6 +252,7 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, const Complex* zshift_arr = zshift_FFTtoCell[mfi].dataPtr(); // Loop over indices within one box const Box spectralspace_bx = tmpSpectralField[mfi].box(); + ParallelFor( spectralspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { Complex spectral_field_value = field_arr(i,j,k,field_index); @@ -225,22 +271,35 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, // Perform Fourier transform from `tmpSpectralField` to `tmpRealField` #ifdef AMREX_USE_GPU - // Add cuFFT-specific code ; make sure that this is done on the same + // Perform Fast Fourier Transform on GPU using cuFFT. + // make sure that this is done on the same // GPU stream as the above copy + cufftResult result; + cudaStream_t stream = amrex::Gpu::Device::cudaStream(); + cufftSetStream ( backward_plan[mfi], stream); + result = cufftExecZ2D( backward_plan[mfi], + reinterpret_cast<cuDoubleComplex*>( + tmpSpectralField[mfi].dataPtr()), + tmpRealField[mfi].dataPtr() ); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " Backward transform using cufftexecZ2D failed! \n"; + } #else fftw_execute( backward_plan[mfi] ); #endif // Copy the temporary field `tmpRealField` to the real-space field `mf` - + // (only in the valid cells ; not in the guard cells) // Normalize (divide by 1/N) since the FFT+IFFT results in a factor N { - const Box realspace_bx = tmpRealField[mfi].box(); Array4<Real> mf_arr = mf[mfi].array(); Array4<const Real> tmp_arr = tmpRealField[mfi].array(); // Normalization: divide by the number of points in realspace + // (includes the guard cells) + const Box realspace_bx = tmpRealField[mfi].box(); const Real inv_N = 1./realspace_bx.numPts(); - ParallelFor( realspace_bx, + + ParallelFor( mfi.validbox(), [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { // Copy and normalize field mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k); |