diff options
-rw-r--r-- | Source/Evolve/WarpXEvolveEM.cpp | 3 | ||||
-rw-r--r-- | Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H | 12 | ||||
-rw-r--r-- | Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp | 118 | ||||
-rw-r--r-- | Source/FieldSolver/SpectralSolver/SpectralFieldData.H | 2 | ||||
-rw-r--r-- | Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp | 70 | ||||
-rw-r--r-- | Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp | 7 | ||||
-rw-r--r-- | Source/FieldSolver/WarpXFFT.cpp | 13 | ||||
-rw-r--r-- | Source/Initialization/WarpXInitData.cpp | 2 | ||||
-rw-r--r-- | Source/Parallelization/WarpXComm.cpp | 2 | ||||
-rw-r--r-- | Source/Particles/WarpXParticleContainer.cpp | 2 |
10 files changed, 162 insertions, 69 deletions
diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp index a5d68e4f9..4f33694cd 100644 --- a/Source/Evolve/WarpXEvolveEM.cpp +++ b/Source/Evolve/WarpXEvolveEM.cpp @@ -84,12 +84,14 @@ WarpX::EvolveEM (int numsteps) *Bfield_aux[lev][0],*Bfield_aux[lev][1],*Bfield_aux[lev][2]); } is_synchronized = false; + } else { // Beyond one step, we have E^{n} and B^{n}. // Particles have p^{n-1/2} and x^{n}. FillBoundaryE(); FillBoundaryB(); UpdateAuxilaryData(); + } if (do_subcycling == 0 || finest_level == 0) { @@ -283,6 +285,7 @@ WarpX::OneStep_nosub (Real cur_time) if (warpx_py_beforedeposition) warpx_py_beforedeposition(); #endif PushParticlesandDepose(cur_time); + #ifdef WARPX_USE_PY if (warpx_py_afterdeposition) warpx_py_afterdeposition(); #endif diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H index 0487e5226..12718e38b 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H +++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H @@ -8,14 +8,18 @@ */ class PsatdAlgorithm : public SpectralBaseAlgorithm { + public: PsatdAlgorithm(const SpectralKSpace& spectral_kspace, const amrex::DistributionMapping& dm, const int norder_x, const int norder_y, - const int norder_z, const bool nodal, - const amrex::Real dt); - // Redefine update equation from base class - virtual void pushSpectralFields(SpectralFieldData& f) const override final; + const int norder_z, const bool nodal, const amrex::Real dt); + + void InitializeSpectralCoefficients(const SpectralKSpace& spectral_kspace, + const amrex::DistributionMapping& dm, + const amrex::Real dt); + + void pushSpectralFields(SpectralFieldData& f) const override final; private: SpectralCoefficients C_coef, S_ck_coef, X1_coef, X2_coef, X3_coef; diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp index 37892d35a..d45b01bda 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp @@ -22,59 +22,8 @@ PsatdAlgorithm::PsatdAlgorithm(const SpectralKSpace& spectral_kspace, X2_coef = SpectralCoefficients(ba, dm, 1, 0); X3_coef = SpectralCoefficients(ba, dm, 1, 0); - // Fill them with the right values: - // Loop over boxes and allocate the corresponding coefficients - // for each box owned by the local MPI proc - for (MFIter mfi(ba, dm); mfi.isValid(); ++mfi){ - - const Box& bx = ba[mfi]; - - // Extract pointers for the k vectors - const Real* modified_kx = modified_kx_vec[mfi].dataPtr(); -#if (AMREX_SPACEDIM==3) - const Real* modified_ky = modified_ky_vec[mfi].dataPtr(); -#endif - const Real* modified_kz = modified_kz_vec[mfi].dataPtr(); - // Extract arrays for the coefficients - Array4<Real> C = C_coef[mfi].array(); - Array4<Real> S_ck = S_ck_coef[mfi].array(); - Array4<Real> X1 = X1_coef[mfi].array(); - Array4<Real> X2 = X2_coef[mfi].array(); - Array4<Real> X3 = X3_coef[mfi].array(); - - // Loop over indices within one box - ParallelFor(bx, - [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept - { - // Calculate norm of vector - const Real k_norm = std::sqrt( - std::pow(modified_kx[i], 2) + -#if (AMREX_SPACEDIM==3) - std::pow(modified_ky[j], 2) + - std::pow(modified_kz[k], 2)); -#else - std::pow(modified_kz[j], 2)); -#endif - - // Calculate coefficients - constexpr Real c = PhysConst::c; - constexpr Real ep0 = PhysConst::ep0; - if (k_norm != 0){ - C(i,j,k) = std::cos(c*k_norm*dt); - S_ck(i,j,k) = std::sin(c*k_norm*dt)/(c*k_norm); - X1(i,j,k) = (1. - C(i,j,k))/(ep0 * c*c * k_norm*k_norm); - X2(i,j,k) = (1. - S_ck(i,j,k)/dt)/(ep0 * k_norm*k_norm); - X3(i,j,k) = (C(i,j,k) - S_ck(i,j,k)/dt)/(ep0 * k_norm*k_norm); - } else { // Handle k_norm = 0, by using the analytical limit - C(i,j,k) = 1.; - S_ck(i,j,k) = dt; - X1(i,j,k) = 0.5 * dt*dt / ep0; - X2(i,j,k) = c*c * dt*dt / (6.*ep0); - X3(i,j,k) = - c*c * dt*dt / (3.*ep0); - } - }); - } -}; + InitializeSpectralCoefficients(spectral_kspace, dm, dt); +} /* Advance the E and B field in spectral space (stored in `f`) * over one time step */ @@ -130,13 +79,14 @@ PsatdAlgorithm::pushSpectralFields(SpectralFieldData& f) const{ #endif constexpr Real c2 = PhysConst::c*PhysConst::c; constexpr Real inv_ep0 = 1./PhysConst::ep0; - constexpr Complex I = Complex{0,1}; + const Complex I = Complex{0,1}; const Real C = C_arr(i,j,k); const Real S_ck = S_ck_arr(i,j,k); const Real X1 = X1_arr(i,j,k); const Real X2 = X2_arr(i,j,k); const Real X3 = X3_arr(i,j,k); + // Update E (see WarpX online documentation: theory section) fields(i,j,k,Idx::Ex) = C*Ex_old + S_ck*(c2*I*(ky*Bz_old - kz*By_old) - inv_ep0*Jx) @@ -160,3 +110,63 @@ PsatdAlgorithm::pushSpectralFields(SpectralFieldData& f) const{ }); } }; + +void PsatdAlgorithm::InitializeSpectralCoefficients(const SpectralKSpace& spectral_kspace, + const amrex::DistributionMapping& dm, + const amrex::Real dt) +{ + const BoxArray& ba = spectral_kspace.spectralspace_ba; + // Fill them with the right values: + // Loop over boxes and allocate the corresponding coefficients + // for each box owned by the local MPI proc + for (MFIter mfi(ba, dm); mfi.isValid(); ++mfi){ + + const Box& bx = ba[mfi]; + + // Extract pointers for the k vectors + const Real* modified_kx = modified_kx_vec[mfi].dataPtr(); +#if (AMREX_SPACEDIM==3) + const Real* modified_ky = modified_ky_vec[mfi].dataPtr(); +#endif + const Real* modified_kz = modified_kz_vec[mfi].dataPtr(); + // Extract arrays for the coefficients + Array4<Real> C = C_coef[mfi].array(); + Array4<Real> S_ck = S_ck_coef[mfi].array(); + Array4<Real> X1 = X1_coef[mfi].array(); + Array4<Real> X2 = X2_coef[mfi].array(); + Array4<Real> X3 = X3_coef[mfi].array(); + + // Loop over indices within one box + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept + { + // Calculate norm of vector + const Real k_norm = std::sqrt( + std::pow(modified_kx[i], 2) + +#if (AMREX_SPACEDIM==3) + std::pow(modified_ky[j], 2) + + std::pow(modified_kz[k], 2)); +#else + std::pow(modified_kz[j], 2)); +#endif + + + // Calculate coefficients + constexpr Real c = PhysConst::c; + constexpr Real ep0 = PhysConst::ep0; + if (k_norm != 0){ + C(i,j,k) = std::cos(c*k_norm*dt); + S_ck(i,j,k) = std::sin(c*k_norm*dt)/(c*k_norm); + X1(i,j,k) = (1. - C(i,j,k))/(ep0 * c*c * k_norm*k_norm); + X2(i,j,k) = (1. - S_ck(i,j,k)/dt)/(ep0 * k_norm*k_norm); + X3(i,j,k) = (C(i,j,k) - S_ck(i,j,k)/dt)/(ep0 * k_norm*k_norm); + } else { // Handle k_norm = 0, by using the analytical limit + C(i,j,k) = 1.; + S_ck(i,j,k) = dt; + X1(i,j,k) = 0.5 * dt*dt / ep0; + X2(i,j,k) = c*c * dt*dt / (6.*ep0); + X3(i,j,k) = - c*c * dt*dt / (3.*ep0); + } + }); + } +} diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.H b/Source/FieldSolver/SpectralSolver/SpectralFieldData.H index 8e58aa1d8..7954414b8 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.H +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.H @@ -25,7 +25,7 @@ class SpectralFieldData // (plans are only initialized for the boxes that are owned by // the local MPI rank) #ifdef AMREX_USE_GPU - // Add cuFFT-specific code + using FFTplans = amrex::LayoutData<cufftHandle>; #else using FFTplans = amrex::LayoutData<fftw_plan>; #endif diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp index 02fa2015f..a2b695568 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp @@ -53,7 +53,38 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, // the FFT plan, the valid dimensions are those of the real-space box. IntVect fft_size = realspace_ba[mfi].length(); #ifdef AMREX_USE_GPU - // Add cuFFT-specific code + // Create cuFFT plans + // Creating 3D plan for real to complex -- double precision + // Assuming CUDA is used for programming GPU + // Note that D2Z is inherently forward plan + // and Z2D is inherently backward plan + cufftResult result; +#if (AMREX_SPACEDIM == 3) + result = cufftPlan3d( &forward_plan[mfi], fft_size[2], + fft_size[1],fft_size[0], CUFFT_D2Z); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " cufftplan3d forward failed! \n"; + } + + result = cufftPlan3d( &backward_plan[mfi], fft_size[2], + fft_size[1], fft_size[0], CUFFT_Z2D); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " cufftplan3d backward failed! \n"; + } +#else + result = cufftPlan2d( &forward_plan[mfi], fft_size[1], + fft_size[0], CUFFT_D2Z ); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " cufftplan2d forward failed! \n"; + } + + result = cufftPlan2d( &backward_plan[mfi], fft_size[1], + fft_size[0], CUFFT_Z2D ); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " cufftplan2d backward failed! \n"; + } +#endif + #else // Create FFTW plans forward_plan[mfi] = @@ -86,7 +117,9 @@ SpectralFieldData::~SpectralFieldData() if (tmpRealField.size() > 0){ for ( MFIter mfi(tmpRealField); mfi.isValid(); ++mfi ){ #ifdef AMREX_USE_GPU - // Add cuFFT-specific code + // Destroy cuFFT plans + cufftDestroy( forward_plan[mfi] ); + cufftDestroy( backward_plan[mfi] ); #else // Destroy FFTW plans fftw_destroy_plan( forward_plan[mfi] ); @@ -129,14 +162,25 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, Array4<Real> tmp_arr = tmpRealField[mfi].array(); ParallelFor( realspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { - tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp); + tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp); }); } // Perform Fourier transform from `tmpRealField` to `tmpSpectralField` #ifdef AMREX_USE_GPU - // Add cuFFT-specific code ; make sure that this is done on the same - // GPU stream as the above copy + // Perform Fast Fourier Transform on GPU using cuFFT + // make sure that this is done on the same + // GPU stream as the above copy + cufftResult result; + cudaStream_t stream = amrex::Gpu::Device::cudaStream(); + cufftSetStream ( forward_plan[mfi], stream); + result = cufftExecD2Z( forward_plan[mfi], + tmpRealField[mfi].dataPtr(), + reinterpret_cast<cuDoubleComplex*>( + tmpSpectralField[mfi].dataPtr()) ); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " forward transform using cufftExecD2Z failed ! \n"; + } #else fftw_execute( forward_plan[mfi] ); #endif @@ -155,6 +199,7 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, const Complex* zshift_arr = zshift_FFTfromCell[mfi].dataPtr(); // Loop over indices within one box const Box spectralspace_bx = tmpSpectralField[mfi].box(); + ParallelFor( spectralspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { Complex spectral_field_value = tmp_arr(i,j,k); @@ -207,6 +252,7 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, const Complex* zshift_arr = zshift_FFTtoCell[mfi].dataPtr(); // Loop over indices within one box const Box spectralspace_bx = tmpSpectralField[mfi].box(); + ParallelFor( spectralspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { Complex spectral_field_value = field_arr(i,j,k,field_index); @@ -225,8 +271,19 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, // Perform Fourier transform from `tmpSpectralField` to `tmpRealField` #ifdef AMREX_USE_GPU - // Add cuFFT-specific code ; make sure that this is done on the same + // Perform Fast Fourier Transform on GPU using cuFFT. + // make sure that this is done on the same // GPU stream as the above copy + cufftResult result; + cudaStream_t stream = amrex::Gpu::Device::cudaStream(); + cufftSetStream ( backward_plan[mfi], stream); + result = cufftExecZ2D( backward_plan[mfi], + reinterpret_cast<cuDoubleComplex*>( + tmpSpectralField[mfi].dataPtr()), + tmpRealField[mfi].dataPtr() ); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " Backward transform using cufftexecZ2D failed! \n"; + } #else fftw_execute( backward_plan[mfi] ); #endif @@ -240,6 +297,7 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, Array4<const Real> tmp_arr = tmpRealField[mfi].array(); // Normalization: divide by the number of points in realspace const Real inv_N = 1./realspace_bx.numPts(); + ParallelFor( realspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { // Copy and normalize field diff --git a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp index 2fe78cedd..6fe5e3939 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp @@ -142,9 +142,14 @@ SpectralKSpace::getSpectralShiftFactor( const DistributionMapping& dm, case ShiftType::TransformFromCellCentered: sign = -1.; break; case ShiftType::TransformToCellCentered: sign = 1.; } - constexpr Complex I{0,1}; + const Complex I{0,1}; for (int i=0; i<k.size(); i++ ){ +#ifdef AMREX_USE_GPU + shift[i] = thrust::exp( I*sign*k[i]*0.5*dx[i_dim] ); +#else shift[i] = std::exp( I*sign*k[i]*0.5*dx[i_dim] ); +#endif + } } return shift_factor; diff --git a/Source/FieldSolver/WarpXFFT.cpp b/Source/FieldSolver/WarpXFFT.cpp index 1cf5460f2..13d92f6f3 100644 --- a/Source/FieldSolver/WarpXFFT.cpp +++ b/Source/FieldSolver/WarpXFFT.cpp @@ -56,6 +56,7 @@ BuildFFTOwnerMask (const MultiFab& mf, const Geometry& geom) for (const auto& b : bl) { fab.setVal(nonowner, b, 0, 1); } + } return mask; @@ -89,7 +90,7 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba const FArrayBox& srcfab = mf_fft[mfi]; const Box& srcbox = srcfab.box(); - + if (srcbox.contains(bx)) { // Copy the interior region (without guard cells) @@ -107,6 +108,8 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba // the cell that has non-zero mask is the one which is retained. mf.setVal(0.0, 0); mf.ParallelAdd(mftmp); + + } } @@ -407,6 +410,7 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) BL_PROFILE_VAR_START(blp_push_eb); if (fft_hybrid_mpi_decomposition){ +#ifndef AMREX_USE_CUDA // When running on CPU: use PICSAR code if (Efield_fp_fft[lev][0]->local_size() == 1) //Only one FFT patch on this MPI { @@ -447,6 +451,9 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) { amrex::Abort("WarpX::PushPSATD: TODO"); } +#else // AMREX_USE_CUDA is defined ; running on GPU + amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU."); +#endif } else { // Not using the hybrid decomposition auto& solver = *spectral_solver_fp[lev]; @@ -474,6 +481,7 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) solver.BackwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx); solver.BackwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By); solver.BackwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz); + } BL_PROFILE_VAR_STOP(blp_push_eb); @@ -490,4 +498,7 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) { amrex::Abort("WarpX::PushPSATD: TODO"); } + } + + diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp index 0f33d1a0f..d583b2b0f 100644 --- a/Source/Initialization/WarpXInitData.cpp +++ b/Source/Initialization/WarpXInitData.cpp @@ -322,6 +322,7 @@ WarpX::InitLevelData (int lev, Real time) void WarpX::InitLevelDataFFT (int lev, Real time) { + Efield_fp_fft[lev][0]->setVal(0.0); Efield_fp_fft[lev][1]->setVal(0.0); Efield_fp_fft[lev][2]->setVal(0.0); @@ -346,6 +347,7 @@ WarpX::InitLevelDataFFT (int lev, Real time) current_cp_fft[lev][2]->setVal(0.0); rho_cp_fft[lev]->setVal(0.0); } + } #endif diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 5c9fa144f..00dcb85d0 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -246,7 +246,7 @@ void WarpX::FillBoundaryE (int lev, PatchType patch_type) { if (patch_type == PatchType::fine) - { + { if (do_pml && pml[lev]->ok()) { pml[lev]->ExchangeE(patch_type, diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 9791eee80..47d57294d 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -669,7 +669,7 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); #ifdef AMREX_USE_GPU - data_ptr = (*crhomf)[pti].dataPtr(); + data_ptr = (*crhomf)[pti].dataPtr(icomp); auto rholen = (*crhomf)[pti].length(); #else tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector()); |