diff options
5 files changed, 35 insertions, 107 deletions
diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H index 36d5782e8..0a8614e54 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H +++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H @@ -17,19 +17,15 @@ class PsatdAlgorithm : public SpectralBaseAlgorithm const amrex::DistributionMapping& dm, const int norder_x, const int norder_y, const int norder_z, const bool nodal, const amrex::Real dt); - //PsatdAlgorithm() = default; // Default constructor - //PsatdAlgorithm& operator=(PsatdAlgorithm&& algorithm) = default; - void InitializeCoefficience(const SpectralKSpace& spectral_kspace, + + void InitializeSpectralCoefficients(const SpectralKSpace& spectral_kspace, const amrex::DistributionMapping& dm, const amrex::Real dt); + void pushSpectralFields(SpectralFieldData& f) const override final; private: // Modified finite-order vectors -// KVectorComponent modified_kx_vec, modified_kz_vec; -//#if (AMREX_SPACEDIM==3) -// KVectorComponent modified_ky_vec; -//#endif SpectralCoefficients C_coef, S_ck_coef, X1_coef, X2_coef, X3_coef; }; diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp index 3da0ef453..d45b01bda 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp @@ -22,61 +22,8 @@ PsatdAlgorithm::PsatdAlgorithm(const SpectralKSpace& spectral_kspace, X2_coef = SpectralCoefficients(ba, dm, 1, 0); X3_coef = SpectralCoefficients(ba, dm, 1, 0); - InitializeCoefficience(spectral_kspace, dm, dt); + InitializeSpectralCoefficients(spectral_kspace, dm, dt); } -// // Fill them with the right values: -// // Loop over boxes and allocate the corresponding coefficients -// // for each box owned by the local MPI proc -// for (MFIter mfi(ba, dm); mfi.isValid(); ++mfi){ -// -// //const Box& bx = ba[mfi]; -// const Box bx = ba[mfi]; -// -// // Extract pointers for the k vectors -// const Real* modified_kx = modified_kx_vec[mfi].dataPtr(); -//#if (AMREX_SPACEDIM==3) -// const Real* modified_ky = modified_ky_vec[mfi].dataPtr(); -//#endif -// const Real* modified_kz = modified_kz_vec[mfi].dataPtr(); -// // Extract arrays for the coefficients -// Array4<Real> C = C_coef[mfi].array(); -// Array4<Real> S_ck = S_ck_coef[mfi].array(); -// Array4<Real> X1 = X1_coef[mfi].array(); -// Array4<Real> X2 = X2_coef[mfi].array(); -// Array4<Real> X3 = X3_coef[mfi].array(); -// -// // Loop over indices within one box -// ParallelFor(bx, -// [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept -// { -// // Calculate norm of vector -// const Real k_norm = std::sqrt( -// std::pow(modified_kx[i], 2) + -//#if (AMREX_SPACEDIM==3) -// std::pow(modified_ky[j], 2) + -// std::pow(modified_kz[k], 2)); -//#else -// std::pow(modified_kz[j], 2)); -//#endif -// -// // Calculate coefficients -// constexpr Real c = PhysConst::c; -// constexpr Real ep0 = PhysConst::ep0; -// if (k_norm != 0){ -// C(i,j,k) = std::cos(c*k_norm*dt); -// S_ck(i,j,k) = std::sin(c*k_norm*dt)/(c*k_norm); -// X1(i,j,k) = (1. - C(i,j,k))/(ep0 * c*c * k_norm*k_norm); -// X2(i,j,k) = (1. - S_ck(i,j,k)/dt)/(ep0 * k_norm*k_norm); -// X3(i,j,k) = (C(i,j,k) - S_ck(i,j,k)/dt)/(ep0 * k_norm*k_norm); -// } else { // Handle k_norm = 0, by using the analytical limit -// C(i,j,k) = 1.; -// S_ck(i,j,k) = dt; -// X1(i,j,k) = 0.5 * dt*dt / ep0; -// X2(i,j,k) = c*c * dt*dt / (6.*ep0); -// X3(i,j,k) = - c*c * dt*dt / (3.*ep0); -// } -// }); -// } /* Advance the E and B field in spectral space (stored in `f`) * over one time step */ @@ -139,6 +86,7 @@ PsatdAlgorithm::pushSpectralFields(SpectralFieldData& f) const{ const Real X2 = X2_arr(i,j,k); const Real X3 = X3_arr(i,j,k); + // Update E (see WarpX online documentation: theory section) fields(i,j,k,Idx::Ex) = C*Ex_old + S_ck*(c2*I*(ky*Bz_old - kz*By_old) - inv_ep0*Jx) @@ -163,7 +111,7 @@ PsatdAlgorithm::pushSpectralFields(SpectralFieldData& f) const{ } }; -void PsatdAlgorithm::InitializeCoefficience(const SpectralKSpace& spectral_kspace, +void PsatdAlgorithm::InitializeSpectralCoefficients(const SpectralKSpace& spectral_kspace, const amrex::DistributionMapping& dm, const amrex::Real dt) { diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp index ca9e87f0f..cb26b19b7 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp @@ -55,23 +55,30 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, #ifdef AMREX_USE_GPU // Add cuFFT-specific code // Creating 3D plan for real to complex -- double precision - cudaDeviceSynchronize(); + cufftResult result; +#if (AMREX_SPACEDIM == 3) result = cufftPlan3d( &forward_plan[mfi], fft_size[2], fft_size[1],fft_size[0], CUFFT_D2Z); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d forward failed! \n"; } +#else // Add 2D cuFFT-spacific code for D2Z // Note that D2Z is inherently forward plan +#endif +#if (AMREX_SPACEDIM == 3) result = cufftPlan3d( &backward_plan[mfi], fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); - // Add 2D cuFFT-specific code for Z2D if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d backward failed! \n"; } - cudaDeviceSynchronize(); +#else + // Add 2D cuFFT-specific code for Z2D + // Note that Z2D is inherently backward plan + +#endif #else // Create FFTW plans @@ -152,29 +159,22 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp); }); -#ifdef AMREX_USE_GPU - cudaDeviceSynchronize(); -#endif - amrex::Print() << " before forward transform " << mf_arr(15,15,15,0) << " " << mf_arr(15,15,15,1); } // Perform Fourier transform from `tmpRealField` to `tmpSpectralField` #ifdef AMREX_USE_GPU // Add cuFFT-specific code ; make sure that this is done on the same // GPU stream as the above copy - cudaDeviceSynchronize(); cufftResult result; - cudaStream_t stream = amrex::Cuda::Device::cudaStream(); - amrex::Print() << " stream is " << stream << "\n"; -// cufftSetStream ( forward_plan[mfi], stream); + cudaStream_t stream = amrex::Gpu::Device::cudaStream(); + cufftSetStream ( forward_plan[mfi], stream); result = cufftExecD2Z( forward_plan[mfi], tmpRealField[mfi].dataPtr(), reinterpret_cast<cuDoubleComplex*>( tmpSpectralField[mfi].dataPtr()) ); if ( result != CUFFT_SUCCESS ) { - amrex::Print() << " cufftplan3d execute failed ! \n"; + amrex::Print() << " forward transform using cufftExecD2Z failed ! \n"; } - cudaDeviceSynchronize(); #else fftw_execute( forward_plan[mfi] ); #endif @@ -193,6 +193,7 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, const Complex* zshift_arr = zshift_FFTfromCell[mfi].dataPtr(); // Loop over indices within one box const Box spectralspace_bx = tmpSpectralField[mfi].box(); + ParallelFor( spectralspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { Complex spectral_field_value = tmp_arr(i,j,k); @@ -245,6 +246,7 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, const Complex* zshift_arr = zshift_FFTtoCell[mfi].dataPtr(); // Loop over indices within one box const Box spectralspace_bx = tmpSpectralField[mfi].box(); + ParallelFor( spectralspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { Complex spectral_field_value = field_arr(i,j,k,field_index); @@ -265,22 +267,16 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, #ifdef AMREX_USE_GPU // Add cuFFT-specific code ; make sure that this is done on the same // GPU stream as the above copy - cudaDeviceSynchronize(); cufftResult result; - cudaStream_t stream = amrex::Cuda::Device::cudaStream(); - amrex::Print() << " stream is " << stream << "\n"; -// cufftSetStream ( backward_plan[mfi], stream); + cudaStream_t stream = amrex::Gpu::Device::cudaStream(); + cufftSetStream ( backward_plan[mfi], stream); result = cufftExecZ2D( backward_plan[mfi], reinterpret_cast<cuDoubleComplex*>( tmpSpectralField[mfi].dataPtr()), tmpRealField[mfi].dataPtr() ); if ( result != CUFFT_SUCCESS ) { - amrex::Print() << " cufftplan3d execute inverse failed ! \n"; + amrex::Print() << " Backward transform using cufftexecZ2D failed! \n"; } - if ( result == CUFFT_SUCCESS ) { - amrex::Print() << " created cufft inverse transform\n"; - } - cudaDeviceSynchronize(); #else fftw_execute( backward_plan[mfi] ); #endif @@ -294,20 +290,12 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, Array4<const Real> tmp_arr = tmpRealField[mfi].array(); // Normalization: divide by the number of points in realspace const Real inv_N = 1./realspace_bx.numPts(); + ParallelFor( realspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { // Copy and normalize field mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k); }); -#ifdef AMREX_USE_GPU - cudaDeviceSynchronize(); -#endif - amrex::Print() << " mf_arr after BT " << mf_arr(15,15,15,0) << " " << mf_arr(15,15,15,1) << "\n"; - -#ifdef AMREX_USE_GPU - cudaDeviceSynchronize(); -#endif - amrex::Print() << " divided by 1/N \n"; } } } diff --git a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp index 6a88a52a3..6fe5e3939 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp @@ -149,6 +149,7 @@ SpectralKSpace::getSpectralShiftFactor( const DistributionMapping& dm, #else shift[i] = std::exp( I*sign*k[i]*0.5*dx[i_dim] ); #endif + } } return shift_factor; diff --git a/Source/FieldSolver/WarpXFFT.cpp b/Source/FieldSolver/WarpXFFT.cpp index d2cb83e60..13d92f6f3 100644 --- a/Source/FieldSolver/WarpXFFT.cpp +++ b/Source/FieldSolver/WarpXFFT.cpp @@ -56,6 +56,7 @@ BuildFFTOwnerMask (const MultiFab& mf, const Geometry& geom) for (const auto& b : bl) { fab.setVal(nonowner, b, 0, 1); } + } return mask; @@ -89,7 +90,7 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba const FArrayBox& srcfab = mf_fft[mfi]; const Box& srcbox = srcfab.box(); - + if (srcbox.contains(bx)) { // Copy the interior region (without guard cells) @@ -107,6 +108,8 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba // the cell that has non-zero mask is the one which is retained. mf.setVal(0.0, 0); mf.ParallelAdd(mftmp); + + } } @@ -449,34 +452,23 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) amrex::Abort("WarpX::PushPSATD: TODO"); } #else // AMREX_USE_CUDA is defined ; running on GPU - amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU.") + amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU."); #endif } else { // Not using the hybrid decomposition auto& solver = *spectral_solver_fp[lev]; // Perform forward Fourier transform - amrex::Print() << " FT Ex \n"; solver.ForwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex); - amrex::Print() << " FT Ey \n"; solver.ForwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey); - amrex::Print() << " FT Ez \n"; solver.ForwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez); - amrex::Print() << " FT Bx \n"; solver.ForwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx); - amrex::Print() << " FT By \n"; solver.ForwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By); - amrex::Print() << " FT Bz \n"; solver.ForwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz); - amrex::Print() << " FT jx \n"; solver.ForwardTransform(*current_fp_fft[lev][0], SpectralFieldIndex::Jx); - amrex::Print() << " FT jy \n"; solver.ForwardTransform(*current_fp_fft[lev][1], SpectralFieldIndex::Jy); - amrex::Print() << " FT jz \n"; solver.ForwardTransform(*current_fp_fft[lev][2], SpectralFieldIndex::Jz); - amrex::Print() << " FT rho old \n"; solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_old, 0); - amrex::Print() << " FT rho new \n"; solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_new, 1); // Advance fields in spectral space @@ -489,6 +481,7 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) solver.BackwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx); solver.BackwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By); solver.BackwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz); + } BL_PROFILE_VAR_STOP(blp_push_eb); @@ -505,5 +498,7 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) { amrex::Abort("WarpX::PushPSATD: TODO"); } - amrex::Print() << " coped data from fft to valid \n"; + } + + |