aboutsummaryrefslogtreecommitdiff
path: root/Source/FieldSolver
diff options
context:
space:
mode:
Diffstat (limited to 'Source/FieldSolver')
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp43
-rw-r--r--Source/FieldSolver/WarpXFFT.cpp173
2 files changed, 103 insertions, 113 deletions
diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
index a2b695568..948baf0a6 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
@@ -55,30 +55,30 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba,
#ifdef AMREX_USE_GPU
// Create cuFFT plans
// Creating 3D plan for real to complex -- double precision
- // Assuming CUDA is used for programming GPU
- // Note that D2Z is inherently forward plan
- // and Z2D is inherently backward plan
+ // Assuming CUDA is used for programming GPU
+ // Note that D2Z is inherently forward plan
+ // and Z2D is inherently backward plan
cufftResult result;
#if (AMREX_SPACEDIM == 3)
- result = cufftPlan3d( &forward_plan[mfi], fft_size[2],
+ result = cufftPlan3d( &forward_plan[mfi], fft_size[2],
fft_size[1],fft_size[0], CUFFT_D2Z);
if ( result != CUFFT_SUCCESS ) {
amrex::Print() << " cufftplan3d forward failed! \n";
}
- result = cufftPlan3d( &backward_plan[mfi], fft_size[2],
+ result = cufftPlan3d( &backward_plan[mfi], fft_size[2],
fft_size[1], fft_size[0], CUFFT_Z2D);
if ( result != CUFFT_SUCCESS ) {
amrex::Print() << " cufftplan3d backward failed! \n";
}
#else
- result = cufftPlan2d( &forward_plan[mfi], fft_size[1],
+ result = cufftPlan2d( &forward_plan[mfi], fft_size[1],
fft_size[0], CUFFT_D2Z );
if ( result != CUFFT_SUCCESS ) {
amrex::Print() << " cufftplan2d forward failed! \n";
}
- result = cufftPlan2d( &backward_plan[mfi], fft_size[1],
+ result = cufftPlan2d( &backward_plan[mfi], fft_size[1],
fft_size[0], CUFFT_Z2D );
if ( result != CUFFT_SUCCESS ) {
amrex::Print() << " cufftplan2d backward failed! \n";
@@ -162,20 +162,20 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf,
Array4<Real> tmp_arr = tmpRealField[mfi].array();
ParallelFor( realspace_bx,
[=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
- tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp);
+ tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp);
});
}
// Perform Fourier transform from `tmpRealField` to `tmpSpectralField`
#ifdef AMREX_USE_GPU
- // Perform Fast Fourier Transform on GPU using cuFFT
- // make sure that this is done on the same
- // GPU stream as the above copy
+ // Perform Fast Fourier Transform on GPU using cuFFT
+ // make sure that this is done on the same
+ // GPU stream as the above copy
cufftResult result;
- cudaStream_t stream = amrex::Gpu::Device::cudaStream();
+ cudaStream_t stream = amrex::Gpu::Device::cudaStream();
cufftSetStream ( forward_plan[mfi], stream);
- result = cufftExecD2Z( forward_plan[mfi],
- tmpRealField[mfi].dataPtr(),
+ result = cufftExecD2Z( forward_plan[mfi],
+ tmpRealField[mfi].dataPtr(),
reinterpret_cast<cuDoubleComplex*>(
tmpSpectralField[mfi].dataPtr()) );
if ( result != CUFFT_SUCCESS ) {
@@ -271,13 +271,13 @@ SpectralFieldData::BackwardTransform( MultiFab& mf,
// Perform Fourier transform from `tmpSpectralField` to `tmpRealField`
#ifdef AMREX_USE_GPU
- // Perform Fast Fourier Transform on GPU using cuFFT.
- // make sure that this is done on the same
+ // Perform Fast Fourier Transform on GPU using cuFFT.
+ // make sure that this is done on the same
// GPU stream as the above copy
cufftResult result;
- cudaStream_t stream = amrex::Gpu::Device::cudaStream();
+ cudaStream_t stream = amrex::Gpu::Device::cudaStream();
cufftSetStream ( backward_plan[mfi], stream);
- result = cufftExecZ2D( backward_plan[mfi],
+ result = cufftExecZ2D( backward_plan[mfi],
reinterpret_cast<cuDoubleComplex*>(
tmpSpectralField[mfi].dataPtr()),
tmpRealField[mfi].dataPtr() );
@@ -289,16 +289,17 @@ SpectralFieldData::BackwardTransform( MultiFab& mf,
#endif
// Copy the temporary field `tmpRealField` to the real-space field `mf`
-
+ // (only in the valid cells ; not in the guard cells)
// Normalize (divide by 1/N) since the FFT+IFFT results in a factor N
{
- const Box realspace_bx = tmpRealField[mfi].box();
Array4<Real> mf_arr = mf[mfi].array();
Array4<const Real> tmp_arr = tmpRealField[mfi].array();
// Normalization: divide by the number of points in realspace
+ // (includes the guard cells)
+ const Box realspace_bx = tmpRealField[mfi].box();
const Real inv_N = 1./realspace_bx.numPts();
- ParallelFor( realspace_bx,
+ ParallelFor( mfi.validbox(),
[=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
// Copy and normalize field
mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k);
diff --git a/Source/FieldSolver/WarpXFFT.cpp b/Source/FieldSolver/WarpXFFT.cpp
index c7f03b5eb..f9cd7570a 100644
--- a/Source/FieldSolver/WarpXFFT.cpp
+++ b/Source/FieldSolver/WarpXFFT.cpp
@@ -90,7 +90,7 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba
const FArrayBox& srcfab = mf_fft[mfi];
const Box& srcbox = srcfab.box();
-
+
if (srcbox.contains(bx))
{
// Copy the interior region (without guard cells)
@@ -109,7 +109,7 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba
mf.setVal(0.0, 0);
mf.ParallelAdd(mftmp);
-
+
}
}
@@ -129,19 +129,6 @@ WarpX::AllocLevelDataFFT (int lev)
FFTDomainDecomposition(lev, ba_fp_fft, dm_fp_fft, ba_valid_fp_fft[lev], domain_fp_fft[lev],
geom[lev].Domain());
- if (fft_hybrid_mpi_decomposition == false){
- // Allocate and initialize objects for the spectral solver
- // (all use the same distribution mapping)
- std::array<Real,3> dx = CellSize(lev);
-#if (AMREX_SPACEDIM == 3)
- RealVect dx_vect(dx[0], dx[1], dx[2]);
-#elif (AMREX_SPACEDIM == 2)
- RealVect dx_vect(dx[0], dx[2]);
-#endif
- spectral_solver_fp[lev].reset( new SpectralSolver( ba_fp_fft, dm_fp_fft,
- nox_fft, noy_fft, noz_fft, do_nodal, dx_vect, dt[lev] ) );
- }
-
// rho2 has one extra ghost cell, so that it's safe to deposit charge density after
// pushing particle.
@@ -383,13 +370,48 @@ WarpX::PushPSATD (amrex::Real a_dt)
{
for (int lev = 0; lev <= finest_level; ++lev) {
AMREX_ALWAYS_ASSERT_WITH_MESSAGE(dt[lev] == a_dt, "dt must be consistent");
- PushPSATD(lev, a_dt);
+ if (fft_hybrid_mpi_decomposition){
+ PushPSATD_hybridFFT(lev, a_dt);
+ } else {
+ PushPSATD_localFFT(lev, a_dt);
+ }
}
}
+void WarpX::PushPSATD_localFFT (int lev, amrex::Real /* dt */)
+{
+ auto& solver = *spectral_solver_fp[lev];
+
+ // Perform forward Fourier transform
+ solver.ForwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex);
+ solver.ForwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey);
+ solver.ForwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez);
+ solver.ForwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx);
+ solver.ForwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By);
+ solver.ForwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz);
+ solver.ForwardTransform(*current_fp[lev][0], SpectralFieldIndex::Jx);
+ solver.ForwardTransform(*current_fp[lev][1], SpectralFieldIndex::Jy);
+ solver.ForwardTransform(*current_fp[lev][2], SpectralFieldIndex::Jz);
+ solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_old, 0);
+ solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_new, 1);
+
+ // Advance fields in spectral space
+ solver.pushSpectralFields();
+
+ // Perform backward Fourier Transform
+ solver.BackwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex);
+ solver.BackwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey);
+ solver.BackwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez);
+ solver.BackwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx);
+ solver.BackwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By);
+ solver.BackwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz);
+}
+
void
-WarpX::PushPSATD (int lev, amrex::Real /* dt */)
+WarpX::PushPSATD_hybridFFT (int lev, amrex::Real /* dt */)
{
+#ifndef AMREX_USE_CUDA // Running on CPU ; use PICSAR code for the hybrid FFT
+
BL_PROFILE_VAR_NS("WarpXFFT::CopyDualGrid", blp_copy);
BL_PROFILE_VAR_NS("PICSAR::FftPushEB", blp_push_eb);
@@ -409,79 +431,45 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */)
BL_PROFILE_VAR_STOP(blp_copy);
BL_PROFILE_VAR_START(blp_push_eb);
- if (fft_hybrid_mpi_decomposition){
-#ifndef AMREX_USE_CUDA // When running on CPU: use PICSAR code
- if (Efield_fp_fft[lev][0]->local_size() == 1)
- //Only one FFT patch on this MPI
- {
- for (MFIter mfi(*Efield_fp_fft[lev][0]); mfi.isValid(); ++mfi)
- {
- warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][0])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][1])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][2])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][0])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][1])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][2])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][0])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][1])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][2])[mfi]),
- WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],0),
- WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],1));
- }
- }
- else if (Efield_fp_fft[lev][0]->local_size() == 0)
- // No FFT patch on this MPI rank
- // Still need to call the MPI-FFT routine.
- {
- FArrayBox fab(Box(IntVect::TheZeroVector(), IntVect::TheUnitVector()));
- warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab));
- }
- else
- // Multiple FFT patches on this MPI rank
- {
- amrex::Abort("WarpX::PushPSATD: TODO");
- }
-#else // AMREX_USE_CUDA is defined ; running on GPU
- amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU.");
-#endif
- } else {
- // Not using the hybrid decomposition
- auto& solver = *spectral_solver_fp[lev];
-
- // Perform forward Fourier transform
- solver.ForwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex);
- solver.ForwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey);
- solver.ForwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez);
- solver.ForwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx);
- solver.ForwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By);
- solver.ForwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz);
- solver.ForwardTransform(*current_fp_fft[lev][0], SpectralFieldIndex::Jx);
- solver.ForwardTransform(*current_fp_fft[lev][1], SpectralFieldIndex::Jy);
- solver.ForwardTransform(*current_fp_fft[lev][2], SpectralFieldIndex::Jz);
- solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_old, 0);
- solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_new, 1);
-
- // Advance fields in spectral space
- solver.pushSpectralFields();
-
- // Perform backward Fourier Transform
- solver.BackwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex);
- solver.BackwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey);
- solver.BackwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez);
- solver.BackwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx);
- solver.BackwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By);
- solver.BackwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz);
-
+ if (Efield_fp_fft[lev][0]->local_size() == 1)
+ //Only one FFT patch on this MPI
+ {
+ for (MFIter mfi(*Efield_fp_fft[lev][0]); mfi.isValid(); ++mfi)
+ {
+ warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][0])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][1])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][2])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][0])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][1])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][2])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][0])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][1])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][2])[mfi]),
+ WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],0),
+ WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],1));
+ }
+ }
+ else if (Efield_fp_fft[lev][0]->local_size() == 0)
+ // No FFT patch on this MPI rank
+ // Still need to call the MPI-FFT routine.
+ {
+ FArrayBox fab(Box(IntVect::TheZeroVector(), IntVect::TheUnitVector()));
+ warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab));
+ }
+ else
+ // Multiple FFT patches on this MPI rank
+ {
+ amrex::Abort("WarpX::PushPSATD: TODO");
}
BL_PROFILE_VAR_STOP(blp_push_eb);
@@ -498,7 +486,8 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */)
{
amrex::Abort("WarpX::PushPSATD: TODO");
}
+#else // AMREX_USE_CUDA is defined ; running on GPU
+ amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU.");
+#endif
}
-
-