diff options
Diffstat (limited to 'Source/FieldSolver')
-rw-r--r-- | Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp | 43 | ||||
-rw-r--r-- | Source/FieldSolver/WarpXFFT.cpp | 173 |
2 files changed, 103 insertions, 113 deletions
diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp index a2b695568..948baf0a6 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp @@ -55,30 +55,30 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, #ifdef AMREX_USE_GPU // Create cuFFT plans // Creating 3D plan for real to complex -- double precision - // Assuming CUDA is used for programming GPU - // Note that D2Z is inherently forward plan - // and Z2D is inherently backward plan + // Assuming CUDA is used for programming GPU + // Note that D2Z is inherently forward plan + // and Z2D is inherently backward plan cufftResult result; #if (AMREX_SPACEDIM == 3) - result = cufftPlan3d( &forward_plan[mfi], fft_size[2], + result = cufftPlan3d( &forward_plan[mfi], fft_size[2], fft_size[1],fft_size[0], CUFFT_D2Z); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d forward failed! \n"; } - result = cufftPlan3d( &backward_plan[mfi], fft_size[2], + result = cufftPlan3d( &backward_plan[mfi], fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d backward failed! \n"; } #else - result = cufftPlan2d( &forward_plan[mfi], fft_size[1], + result = cufftPlan2d( &forward_plan[mfi], fft_size[1], fft_size[0], CUFFT_D2Z ); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan2d forward failed! \n"; } - result = cufftPlan2d( &backward_plan[mfi], fft_size[1], + result = cufftPlan2d( &backward_plan[mfi], fft_size[1], fft_size[0], CUFFT_Z2D ); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan2d backward failed! \n"; @@ -162,20 +162,20 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, Array4<Real> tmp_arr = tmpRealField[mfi].array(); ParallelFor( realspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { - tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp); + tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp); }); } // Perform Fourier transform from `tmpRealField` to `tmpSpectralField` #ifdef AMREX_USE_GPU - // Perform Fast Fourier Transform on GPU using cuFFT - // make sure that this is done on the same - // GPU stream as the above copy + // Perform Fast Fourier Transform on GPU using cuFFT + // make sure that this is done on the same + // GPU stream as the above copy cufftResult result; - cudaStream_t stream = amrex::Gpu::Device::cudaStream(); + cudaStream_t stream = amrex::Gpu::Device::cudaStream(); cufftSetStream ( forward_plan[mfi], stream); - result = cufftExecD2Z( forward_plan[mfi], - tmpRealField[mfi].dataPtr(), + result = cufftExecD2Z( forward_plan[mfi], + tmpRealField[mfi].dataPtr(), reinterpret_cast<cuDoubleComplex*>( tmpSpectralField[mfi].dataPtr()) ); if ( result != CUFFT_SUCCESS ) { @@ -271,13 +271,13 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, // Perform Fourier transform from `tmpSpectralField` to `tmpRealField` #ifdef AMREX_USE_GPU - // Perform Fast Fourier Transform on GPU using cuFFT. - // make sure that this is done on the same + // Perform Fast Fourier Transform on GPU using cuFFT. + // make sure that this is done on the same // GPU stream as the above copy cufftResult result; - cudaStream_t stream = amrex::Gpu::Device::cudaStream(); + cudaStream_t stream = amrex::Gpu::Device::cudaStream(); cufftSetStream ( backward_plan[mfi], stream); - result = cufftExecZ2D( backward_plan[mfi], + result = cufftExecZ2D( backward_plan[mfi], reinterpret_cast<cuDoubleComplex*>( tmpSpectralField[mfi].dataPtr()), tmpRealField[mfi].dataPtr() ); @@ -289,16 +289,17 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, #endif // Copy the temporary field `tmpRealField` to the real-space field `mf` - + // (only in the valid cells ; not in the guard cells) // Normalize (divide by 1/N) since the FFT+IFFT results in a factor N { - const Box realspace_bx = tmpRealField[mfi].box(); Array4<Real> mf_arr = mf[mfi].array(); Array4<const Real> tmp_arr = tmpRealField[mfi].array(); // Normalization: divide by the number of points in realspace + // (includes the guard cells) + const Box realspace_bx = tmpRealField[mfi].box(); const Real inv_N = 1./realspace_bx.numPts(); - ParallelFor( realspace_bx, + ParallelFor( mfi.validbox(), [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { // Copy and normalize field mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k); diff --git a/Source/FieldSolver/WarpXFFT.cpp b/Source/FieldSolver/WarpXFFT.cpp index c7f03b5eb..f9cd7570a 100644 --- a/Source/FieldSolver/WarpXFFT.cpp +++ b/Source/FieldSolver/WarpXFFT.cpp @@ -90,7 +90,7 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba const FArrayBox& srcfab = mf_fft[mfi]; const Box& srcbox = srcfab.box(); - + if (srcbox.contains(bx)) { // Copy the interior region (without guard cells) @@ -109,7 +109,7 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba mf.setVal(0.0, 0); mf.ParallelAdd(mftmp); - + } } @@ -129,19 +129,6 @@ WarpX::AllocLevelDataFFT (int lev) FFTDomainDecomposition(lev, ba_fp_fft, dm_fp_fft, ba_valid_fp_fft[lev], domain_fp_fft[lev], geom[lev].Domain()); - if (fft_hybrid_mpi_decomposition == false){ - // Allocate and initialize objects for the spectral solver - // (all use the same distribution mapping) - std::array<Real,3> dx = CellSize(lev); -#if (AMREX_SPACEDIM == 3) - RealVect dx_vect(dx[0], dx[1], dx[2]); -#elif (AMREX_SPACEDIM == 2) - RealVect dx_vect(dx[0], dx[2]); -#endif - spectral_solver_fp[lev].reset( new SpectralSolver( ba_fp_fft, dm_fp_fft, - nox_fft, noy_fft, noz_fft, do_nodal, dx_vect, dt[lev] ) ); - } - // rho2 has one extra ghost cell, so that it's safe to deposit charge density after // pushing particle. @@ -383,13 +370,48 @@ WarpX::PushPSATD (amrex::Real a_dt) { for (int lev = 0; lev <= finest_level; ++lev) { AMREX_ALWAYS_ASSERT_WITH_MESSAGE(dt[lev] == a_dt, "dt must be consistent"); - PushPSATD(lev, a_dt); + if (fft_hybrid_mpi_decomposition){ + PushPSATD_hybridFFT(lev, a_dt); + } else { + PushPSATD_localFFT(lev, a_dt); + } } } +void WarpX::PushPSATD_localFFT (int lev, amrex::Real /* dt */) +{ + auto& solver = *spectral_solver_fp[lev]; + + // Perform forward Fourier transform + solver.ForwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex); + solver.ForwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey); + solver.ForwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez); + solver.ForwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx); + solver.ForwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By); + solver.ForwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz); + solver.ForwardTransform(*current_fp[lev][0], SpectralFieldIndex::Jx); + solver.ForwardTransform(*current_fp[lev][1], SpectralFieldIndex::Jy); + solver.ForwardTransform(*current_fp[lev][2], SpectralFieldIndex::Jz); + solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_old, 0); + solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_new, 1); + + // Advance fields in spectral space + solver.pushSpectralFields(); + + // Perform backward Fourier Transform + solver.BackwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex); + solver.BackwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey); + solver.BackwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez); + solver.BackwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx); + solver.BackwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By); + solver.BackwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz); +} + void -WarpX::PushPSATD (int lev, amrex::Real /* dt */) +WarpX::PushPSATD_hybridFFT (int lev, amrex::Real /* dt */) { +#ifndef AMREX_USE_CUDA // Running on CPU ; use PICSAR code for the hybrid FFT + BL_PROFILE_VAR_NS("WarpXFFT::CopyDualGrid", blp_copy); BL_PROFILE_VAR_NS("PICSAR::FftPushEB", blp_push_eb); @@ -409,79 +431,45 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) BL_PROFILE_VAR_STOP(blp_copy); BL_PROFILE_VAR_START(blp_push_eb); - if (fft_hybrid_mpi_decomposition){ -#ifndef AMREX_USE_CUDA // When running on CPU: use PICSAR code - if (Efield_fp_fft[lev][0]->local_size() == 1) - //Only one FFT patch on this MPI - { - for (MFIter mfi(*Efield_fp_fft[lev][0]); mfi.isValid(); ++mfi) - { - warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][0])[mfi]), - WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][1])[mfi]), - WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][2])[mfi]), - WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][0])[mfi]), - WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][1])[mfi]), - WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][2])[mfi]), - WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][0])[mfi]), - WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][1])[mfi]), - WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][2])[mfi]), - WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],0), - WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],1)); - } - } - else if (Efield_fp_fft[lev][0]->local_size() == 0) - // No FFT patch on this MPI rank - // Still need to call the MPI-FFT routine. - { - FArrayBox fab(Box(IntVect::TheZeroVector(), IntVect::TheUnitVector())); - warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab)); - } - else - // Multiple FFT patches on this MPI rank - { - amrex::Abort("WarpX::PushPSATD: TODO"); - } -#else // AMREX_USE_CUDA is defined ; running on GPU - amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU."); -#endif - } else { - // Not using the hybrid decomposition - auto& solver = *spectral_solver_fp[lev]; - - // Perform forward Fourier transform - solver.ForwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex); - solver.ForwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey); - solver.ForwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez); - solver.ForwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx); - solver.ForwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By); - solver.ForwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz); - solver.ForwardTransform(*current_fp_fft[lev][0], SpectralFieldIndex::Jx); - solver.ForwardTransform(*current_fp_fft[lev][1], SpectralFieldIndex::Jy); - solver.ForwardTransform(*current_fp_fft[lev][2], SpectralFieldIndex::Jz); - solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_old, 0); - solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_new, 1); - - // Advance fields in spectral space - solver.pushSpectralFields(); - - // Perform backward Fourier Transform - solver.BackwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex); - solver.BackwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey); - solver.BackwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez); - solver.BackwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx); - solver.BackwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By); - solver.BackwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz); - + if (Efield_fp_fft[lev][0]->local_size() == 1) + //Only one FFT patch on this MPI + { + for (MFIter mfi(*Efield_fp_fft[lev][0]); mfi.isValid(); ++mfi) + { + warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][0])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][1])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][2])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][0])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][1])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][2])[mfi]), + WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][0])[mfi]), + WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][1])[mfi]), + WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][2])[mfi]), + WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],0), + WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],1)); + } + } + else if (Efield_fp_fft[lev][0]->local_size() == 0) + // No FFT patch on this MPI rank + // Still need to call the MPI-FFT routine. + { + FArrayBox fab(Box(IntVect::TheZeroVector(), IntVect::TheUnitVector())); + warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab)); + } + else + // Multiple FFT patches on this MPI rank + { + amrex::Abort("WarpX::PushPSATD: TODO"); } BL_PROFILE_VAR_STOP(blp_push_eb); @@ -498,7 +486,8 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) { amrex::Abort("WarpX::PushPSATD: TODO"); } +#else // AMREX_USE_CUDA is defined ; running on GPU + amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU."); +#endif } - - |