diff options
Diffstat (limited to 'Source')
-rw-r--r-- | Source/.DS_Store | bin | 0 -> 6148 bytes | |||
-rw-r--r-- | Source/Diagnostics/WarpXIO.cpp | 1 | ||||
-rw-r--r-- | Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp | 43 | ||||
-rw-r--r-- | Source/FieldSolver/WarpXFFT.cpp | 193 | ||||
-rw-r--r-- | Source/FortranInterface/WarpX_f.F90 | 33 | ||||
-rw-r--r-- | Source/FortranInterface/WarpX_f.H | 6 | ||||
-rw-r--r-- | Source/Laser/LaserParticleContainer.cpp | 12 | ||||
-rw-r--r-- | Source/Particles/Deposition/CurrentDeposition.H | 179 | ||||
-rw-r--r-- | Source/Particles/Deposition/Make.package | 3 | ||||
-rw-r--r-- | Source/Particles/Make.package | 1 | ||||
-rw-r--r-- | Source/Particles/PhysicalParticleContainer.H | 3 | ||||
-rw-r--r-- | Source/Particles/PhysicalParticleContainer.cpp | 658 | ||||
-rw-r--r-- | Source/Particles/RigidInjectedParticleContainer.cpp | 20 | ||||
-rw-r--r-- | Source/Particles/WarpXParticleContainer.H | 19 | ||||
-rw-r--r-- | Source/Particles/WarpXParticleContainer.cpp | 144 | ||||
-rw-r--r-- | Source/Python/WarpXWrappers.cpp | 50 | ||||
-rw-r--r-- | Source/Python/WarpXWrappers.h | 6 | ||||
-rw-r--r-- | Source/WarpX.H | 4 | ||||
-rw-r--r-- | Source/WarpX.cpp | 63 |
19 files changed, 903 insertions, 535 deletions
diff --git a/Source/.DS_Store b/Source/.DS_Store Binary files differnew file mode 100644 index 000000000..01640e062 --- /dev/null +++ b/Source/.DS_Store diff --git a/Source/Diagnostics/WarpXIO.cpp b/Source/Diagnostics/WarpXIO.cpp index 24272c588..38399bf9e 100644 --- a/Source/Diagnostics/WarpXIO.cpp +++ b/Source/Diagnostics/WarpXIO.cpp @@ -755,7 +755,6 @@ WarpX::WriteSlicePlotFile () const // writing plotfile // const std::string& slice_plotfilename = amrex::Concatenate(slice_plot_file,istep[0]); amrex::Print() << " Writing slice plotfile " << slice_plotfilename << "\n"; - const int ngrow = 0; Vector<std::string> rfs; VisMF::Header::Version current_version = VisMF::GetHeaderVersion(); diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp index a2b695568..948baf0a6 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp @@ -55,30 +55,30 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, #ifdef AMREX_USE_GPU // Create cuFFT plans // Creating 3D plan for real to complex -- double precision - // Assuming CUDA is used for programming GPU - // Note that D2Z is inherently forward plan - // and Z2D is inherently backward plan + // Assuming CUDA is used for programming GPU + // Note that D2Z is inherently forward plan + // and Z2D is inherently backward plan cufftResult result; #if (AMREX_SPACEDIM == 3) - result = cufftPlan3d( &forward_plan[mfi], fft_size[2], + result = cufftPlan3d( &forward_plan[mfi], fft_size[2], fft_size[1],fft_size[0], CUFFT_D2Z); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d forward failed! \n"; } - result = cufftPlan3d( &backward_plan[mfi], fft_size[2], + result = cufftPlan3d( &backward_plan[mfi], fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d backward failed! \n"; } #else - result = cufftPlan2d( &forward_plan[mfi], fft_size[1], + result = cufftPlan2d( &forward_plan[mfi], fft_size[1], fft_size[0], CUFFT_D2Z ); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan2d forward failed! \n"; } - result = cufftPlan2d( &backward_plan[mfi], fft_size[1], + result = cufftPlan2d( &backward_plan[mfi], fft_size[1], fft_size[0], CUFFT_Z2D ); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan2d backward failed! \n"; @@ -162,20 +162,20 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, Array4<Real> tmp_arr = tmpRealField[mfi].array(); ParallelFor( realspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { - tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp); + tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp); }); } // Perform Fourier transform from `tmpRealField` to `tmpSpectralField` #ifdef AMREX_USE_GPU - // Perform Fast Fourier Transform on GPU using cuFFT - // make sure that this is done on the same - // GPU stream as the above copy + // Perform Fast Fourier Transform on GPU using cuFFT + // make sure that this is done on the same + // GPU stream as the above copy cufftResult result; - cudaStream_t stream = amrex::Gpu::Device::cudaStream(); + cudaStream_t stream = amrex::Gpu::Device::cudaStream(); cufftSetStream ( forward_plan[mfi], stream); - result = cufftExecD2Z( forward_plan[mfi], - tmpRealField[mfi].dataPtr(), + result = cufftExecD2Z( forward_plan[mfi], + tmpRealField[mfi].dataPtr(), reinterpret_cast<cuDoubleComplex*>( tmpSpectralField[mfi].dataPtr()) ); if ( result != CUFFT_SUCCESS ) { @@ -271,13 +271,13 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, // Perform Fourier transform from `tmpSpectralField` to `tmpRealField` #ifdef AMREX_USE_GPU - // Perform Fast Fourier Transform on GPU using cuFFT. - // make sure that this is done on the same + // Perform Fast Fourier Transform on GPU using cuFFT. + // make sure that this is done on the same // GPU stream as the above copy cufftResult result; - cudaStream_t stream = amrex::Gpu::Device::cudaStream(); + cudaStream_t stream = amrex::Gpu::Device::cudaStream(); cufftSetStream ( backward_plan[mfi], stream); - result = cufftExecZ2D( backward_plan[mfi], + result = cufftExecZ2D( backward_plan[mfi], reinterpret_cast<cuDoubleComplex*>( tmpSpectralField[mfi].dataPtr()), tmpRealField[mfi].dataPtr() ); @@ -289,16 +289,17 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, #endif // Copy the temporary field `tmpRealField` to the real-space field `mf` - + // (only in the valid cells ; not in the guard cells) // Normalize (divide by 1/N) since the FFT+IFFT results in a factor N { - const Box realspace_bx = tmpRealField[mfi].box(); Array4<Real> mf_arr = mf[mfi].array(); Array4<const Real> tmp_arr = tmpRealField[mfi].array(); // Normalization: divide by the number of points in realspace + // (includes the guard cells) + const Box realspace_bx = tmpRealField[mfi].box(); const Real inv_N = 1./realspace_bx.numPts(); - ParallelFor( realspace_bx, + ParallelFor( mfi.validbox(), [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { // Copy and normalize field mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k); diff --git a/Source/FieldSolver/WarpXFFT.cpp b/Source/FieldSolver/WarpXFFT.cpp index 13d92f6f3..f9cd7570a 100644 --- a/Source/FieldSolver/WarpXFFT.cpp +++ b/Source/FieldSolver/WarpXFFT.cpp @@ -90,7 +90,7 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba const FArrayBox& srcfab = mf_fft[mfi]; const Box& srcbox = srcfab.box(); - + if (srcbox.contains(bx)) { // Copy the interior region (without guard cells) @@ -109,7 +109,7 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba mf.setVal(0.0, 0); mf.ParallelAdd(mftmp); - + } } @@ -129,19 +129,6 @@ WarpX::AllocLevelDataFFT (int lev) FFTDomainDecomposition(lev, ba_fp_fft, dm_fp_fft, ba_valid_fp_fft[lev], domain_fp_fft[lev], geom[lev].Domain()); - if (fft_hybrid_mpi_decomposition == false){ - // Allocate and initialize objects for the spectral solver - // (all use the same distribution mapping) - std::array<Real,3> dx = CellSize(lev); -#if (AMREX_SPACEDIM == 3) - RealVect dx_vect(dx[0], dx[1], dx[2]); -#elif (AMREX_SPACEDIM == 2) - RealVect dx_vect(dx[0], dx[2]); -#endif - spectral_solver_fp[lev].reset( new SpectralSolver( ba_fp_fft, dm_fp_fft, - nox_fft, noy_fft, noz_fft, do_nodal, dx_vect, dt[lev] ) ); - } - // rho2 has one extra ghost cell, so that it's safe to deposit charge density after // pushing particle. @@ -383,105 +370,106 @@ WarpX::PushPSATD (amrex::Real a_dt) { for (int lev = 0; lev <= finest_level; ++lev) { AMREX_ALWAYS_ASSERT_WITH_MESSAGE(dt[lev] == a_dt, "dt must be consistent"); - PushPSATD(lev, a_dt); + if (fft_hybrid_mpi_decomposition){ + PushPSATD_hybridFFT(lev, a_dt); + } else { + PushPSATD_localFFT(lev, a_dt); + } } } +void WarpX::PushPSATD_localFFT (int lev, amrex::Real /* dt */) +{ + auto& solver = *spectral_solver_fp[lev]; + + // Perform forward Fourier transform + solver.ForwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex); + solver.ForwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey); + solver.ForwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez); + solver.ForwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx); + solver.ForwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By); + solver.ForwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz); + solver.ForwardTransform(*current_fp[lev][0], SpectralFieldIndex::Jx); + solver.ForwardTransform(*current_fp[lev][1], SpectralFieldIndex::Jy); + solver.ForwardTransform(*current_fp[lev][2], SpectralFieldIndex::Jz); + solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_old, 0); + solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_new, 1); + + // Advance fields in spectral space + solver.pushSpectralFields(); + + // Perform backward Fourier Transform + solver.BackwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex); + solver.BackwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey); + solver.BackwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez); + solver.BackwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx); + solver.BackwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By); + solver.BackwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz); +} + void -WarpX::PushPSATD (int lev, amrex::Real /* dt */) +WarpX::PushPSATD_hybridFFT (int lev, amrex::Real /* dt */) { +#ifndef AMREX_USE_CUDA // Running on CPU ; use PICSAR code for the hybrid FFT + BL_PROFILE_VAR_NS("WarpXFFT::CopyDualGrid", blp_copy); BL_PROFILE_VAR_NS("PICSAR::FftPushEB", blp_push_eb); auto period_fp = geom[lev].periodicity(); BL_PROFILE_VAR_START(blp_copy); - Efield_fp_fft[lev][0]->ParallelCopy(*Efield_fp[lev][0], 0, 0, 1, 0, 0, period_fp); - Efield_fp_fft[lev][1]->ParallelCopy(*Efield_fp[lev][1], 0, 0, 1, 0, 0, period_fp); - Efield_fp_fft[lev][2]->ParallelCopy(*Efield_fp[lev][2], 0, 0, 1, 0, 0, period_fp); - Bfield_fp_fft[lev][0]->ParallelCopy(*Bfield_fp[lev][0], 0, 0, 1, 0, 0, period_fp); - Bfield_fp_fft[lev][1]->ParallelCopy(*Bfield_fp[lev][1], 0, 0, 1, 0, 0, period_fp); - Bfield_fp_fft[lev][2]->ParallelCopy(*Bfield_fp[lev][2], 0, 0, 1, 0, 0, period_fp); - current_fp_fft[lev][0]->ParallelCopy(*current_fp[lev][0], 0, 0, 1, 0, 0, period_fp); - current_fp_fft[lev][1]->ParallelCopy(*current_fp[lev][1], 0, 0, 1, 0, 0, period_fp); - current_fp_fft[lev][2]->ParallelCopy(*current_fp[lev][2], 0, 0, 1, 0, 0, period_fp); - rho_fp_fft[lev]->ParallelCopy(*rho_fp[lev], 0, 0, 2, 0, 0, period_fp); + Efield_fp_fft[lev][0]->ParallelCopy(*Efield_fp[lev][0], 0, 0, 1, Efield_fp[lev][0]->nGrow(), 0, period_fp); + Efield_fp_fft[lev][1]->ParallelCopy(*Efield_fp[lev][1], 0, 0, 1, Efield_fp[lev][1]->nGrow(), 0, period_fp); + Efield_fp_fft[lev][2]->ParallelCopy(*Efield_fp[lev][2], 0, 0, 1, Efield_fp[lev][2]->nGrow(), 0, period_fp); + Bfield_fp_fft[lev][0]->ParallelCopy(*Bfield_fp[lev][0], 0, 0, 1, Bfield_fp[lev][0]->nGrow(), 0, period_fp); + Bfield_fp_fft[lev][1]->ParallelCopy(*Bfield_fp[lev][1], 0, 0, 1, Bfield_fp[lev][1]->nGrow(), 0, period_fp); + Bfield_fp_fft[lev][2]->ParallelCopy(*Bfield_fp[lev][2], 0, 0, 1, Bfield_fp[lev][2]->nGrow(), 0, period_fp); + current_fp_fft[lev][0]->ParallelCopy(*current_fp[lev][0], 0, 0, 1, current_fp[lev][0]->nGrow(), 0, period_fp); + current_fp_fft[lev][1]->ParallelCopy(*current_fp[lev][1], 0, 0, 1, current_fp[lev][1]->nGrow(), 0, period_fp); + current_fp_fft[lev][2]->ParallelCopy(*current_fp[lev][2], 0, 0, 1, current_fp[lev][2]->nGrow(), 0, period_fp); + rho_fp_fft[lev]->ParallelCopy(*rho_fp[lev], 0, 0, 2, rho_fp[lev]->nGrow(), 0, period_fp); BL_PROFILE_VAR_STOP(blp_copy); BL_PROFILE_VAR_START(blp_push_eb); - if (fft_hybrid_mpi_decomposition){ -#ifndef AMREX_USE_CUDA // When running on CPU: use PICSAR code - if (Efield_fp_fft[lev][0]->local_size() == 1) - //Only one FFT patch on this MPI - { - for (MFIter mfi(*Efield_fp_fft[lev][0]); mfi.isValid(); ++mfi) - { - warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][0])[mfi]), - WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][1])[mfi]), - WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][2])[mfi]), - WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][0])[mfi]), - WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][1])[mfi]), - WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][2])[mfi]), - WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][0])[mfi]), - WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][1])[mfi]), - WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][2])[mfi]), - WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],0), - WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],1)); - } - } - else if (Efield_fp_fft[lev][0]->local_size() == 0) - // No FFT patch on this MPI rank - // Still need to call the MPI-FFT routine. - { - FArrayBox fab(Box(IntVect::TheZeroVector(), IntVect::TheUnitVector())); - warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab), - WARPX_TO_FORTRAN_ANYD(fab)); - } - else - // Multiple FFT patches on this MPI rank - { - amrex::Abort("WarpX::PushPSATD: TODO"); - } -#else // AMREX_USE_CUDA is defined ; running on GPU - amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU."); -#endif - } else { - // Not using the hybrid decomposition - auto& solver = *spectral_solver_fp[lev]; - - // Perform forward Fourier transform - solver.ForwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex); - solver.ForwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey); - solver.ForwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez); - solver.ForwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx); - solver.ForwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By); - solver.ForwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz); - solver.ForwardTransform(*current_fp_fft[lev][0], SpectralFieldIndex::Jx); - solver.ForwardTransform(*current_fp_fft[lev][1], SpectralFieldIndex::Jy); - solver.ForwardTransform(*current_fp_fft[lev][2], SpectralFieldIndex::Jz); - solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_old, 0); - solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_new, 1); - - // Advance fields in spectral space - solver.pushSpectralFields(); - - // Perform backward Fourier Transform - solver.BackwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex); - solver.BackwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey); - solver.BackwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez); - solver.BackwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx); - solver.BackwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By); - solver.BackwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz); - + if (Efield_fp_fft[lev][0]->local_size() == 1) + //Only one FFT patch on this MPI + { + for (MFIter mfi(*Efield_fp_fft[lev][0]); mfi.isValid(); ++mfi) + { + warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][0])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][1])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][2])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][0])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][1])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][2])[mfi]), + WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][0])[mfi]), + WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][1])[mfi]), + WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][2])[mfi]), + WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],0), + WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],1)); + } + } + else if (Efield_fp_fft[lev][0]->local_size() == 0) + // No FFT patch on this MPI rank + // Still need to call the MPI-FFT routine. + { + FArrayBox fab(Box(IntVect::TheZeroVector(), IntVect::TheUnitVector())); + warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab)); + } + else + // Multiple FFT patches on this MPI rank + { + amrex::Abort("WarpX::PushPSATD: TODO"); } BL_PROFILE_VAR_STOP(blp_push_eb); @@ -498,7 +486,8 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) { amrex::Abort("WarpX::PushPSATD: TODO"); } +#else // AMREX_USE_CUDA is defined ; running on GPU + amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU."); +#endif } - - diff --git a/Source/FortranInterface/WarpX_f.F90 b/Source/FortranInterface/WarpX_f.F90 index 0560a9981..542cc95a1 100644 --- a/Source/FortranInterface/WarpX_f.F90 +++ b/Source/FortranInterface/WarpX_f.F90 @@ -8,39 +8,6 @@ module warpx_module contains - subroutine warpx_copy_attribs(np, xp, yp, zp, uxp, uyp, uzp, & - xpold, ypold, zpold, uxpold, uypold, uzpold) & - bind(c,name='warpx_copy_attribs') - integer(c_long), intent(in) :: np - real(amrex_real), intent(in) :: xp(np) - real(amrex_real), intent(in) :: yp(np) - real(amrex_real), intent(in) :: zp(np) - real(amrex_real), intent(in) :: uxp(np) - real(amrex_real), intent(in) :: uyp(np) - real(amrex_real), intent(in) :: uzp(np) - real(amrex_real), intent(inout) :: xpold(np) - real(amrex_real), intent(inout) :: ypold(np) - real(amrex_real), intent(inout) :: zpold(np) - real(amrex_real), intent(inout) :: uxpold(np) - real(amrex_real), intent(inout) :: uypold(np) - real(amrex_real), intent(inout) :: uzpold(np) - - integer n - - do n = 1, np - - xpold(n) = xp(n) - ypold(n) = yp(n) - zpold(n) = zp(n) - - uxpold(n) = uxp(n) - uypold(n) = uyp(n) - uzpold(n) = uzp(n) - - end do - - end subroutine warpx_copy_attribs - subroutine warpx_compute_E (lo, hi, & phi, phlo, phhi, & Ex, Exlo, Exhi, & diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H index 98dd8685d..0440148eb 100644 --- a/Source/FortranInterface/WarpX_f.H +++ b/Source/FortranInterface/WarpX_f.H @@ -75,12 +75,6 @@ extern "C" { #endif - void warpx_copy_attribs(const long* np, - const amrex_real* xp, const amrex_real* yp, const amrex_real* zp, - const amrex_real* uxp, const amrex_real* uyp, const amrex_real* uzp, - amrex_real* xpold, amrex_real* ypold, amrex_real* zpold, - amrex_real* uxpold, amrex_real* uypold, amrex_real* uzpold); - // Charge deposition void warpx_charge_deposition(amrex::Real* rho, const long* np, const amrex::Real* xp, const amrex::Real* yp, const amrex::Real* zp, const amrex::Real* w, diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp index 515aa1f5d..3d3447a3c 100644 --- a/Source/Laser/LaserParticleContainer.cpp +++ b/Source/Laser/LaserParticleContainer.cpp @@ -504,15 +504,15 @@ LaserParticleContainer::Evolve (int lev, // Current Deposition // // Deposit inside domains - DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, - 0, np_current, thread_num, - lev, lev, dt); + DepositCurrentFortran(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, + 0, np_current, thread_num, + lev, lev, dt); bool has_buffer = cjx; if (has_buffer){ // Deposit in buffers - DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, - np_current, np-np_current, thread_num, - lev, lev-1, dt); + DepositCurrentFortran(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, + np_current, np-np_current, thread_num, + lev, lev-1, dt); } // diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H new file mode 100644 index 000000000..efda97892 --- /dev/null +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -0,0 +1,179 @@ +#ifndef CURRENTDEPOSITION_H_ +#define CURRENTDEPOSITION_H_ + +using namespace amrex; + +// Compute shape factor and return index of leftmost cell where +// particle writes. +// Specialized templates are defined below for orders 1, 2 and 3. +template <int depos_order> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shape_factor(Real* const sx, Real xint) {return 0;}; + +// Compute shape factor for order 1. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shape_factor <1> (Real* const sx, Real xmid){ + int j = (int) xmid; + Real xint = xmid-j; + sx[0] = 1.0 - xint; + sx[1] = xint; + return j; +} + +// Compute shape factor for order 2. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shape_factor <2> (Real* const sx, Real xmid){ + int j = (int) (xmid+0.5); + Real xint = xmid-j; + sx[0] = 0.5*(0.5-xint)*(0.5-xint); + sx[1] = 0.75-xint*xint; + sx[2] = 0.5*(0.5+xint)*(0.5+xint); + // index of the leftmost cell where particle deposits + return j-1; +} + +// Compute shape factor for order 3. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shape_factor <3> (Real* const sx, Real xmid){ + int j = (int) xmid; + Real xint = xmid-j; + sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); + sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0); + sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); + sx[3] = 1.0/6.0*xint*xint*xint; + // index of the leftmost cell where particle deposits + return j-1; +} + +/* \brief Current Deposition for thread thread_num + * /param xp, yp, zp : Pointer to arrays of particle positions. + * \param wp : Pointer to array of particle weights. + * \param uxp uyp uzp : Pointer to arrays of particle momentum. + * \param jx_arr : Array4 of current density, either full array or tile. + * \param jy_arr : Array4 of current density, either full array or tile. + * \param jz_arr : Array4 of current density, either full array or tile. + * \param offset : Index of first particle for which current is deposited + * \param np_to_depose : Number of particles for which current is deposited. + Particles [offset,offset+np_tp_depose] deposit current. + * \param dt : Time step for particle level + * \param dx : 3D cell size + * \param xyzmin : Physical lower bounds of domain. + * \param lo : Index lower bounds of domain. + * \param stagger_shift: 0 if nodal, 0.5 if staggered. + * /param q : species charge. + */ +template <int depos_order> +void doDepositionShapeN(const Real * const xp, const Real * const yp, const Real * const zp, + const Real * const wp, const Real * const uxp, + const Real * const uyp, const Real * const uzp, + const amrex::Array4<amrex::Real>& jx_arr, + const amrex::Array4<amrex::Real>& jy_arr, + const amrex::Array4<amrex::Real>& jz_arr, + const long offset, const long np_to_depose, + const amrex::Real dt, const std::array<amrex::Real,3>& dx, + const std::array<Real, 3> xyzmin, + const Dim3 lo, + const amrex::Real stagger_shift, + const amrex::Real q) +{ + const Real dxi = 1.0/dx[0]; + const Real dzi = 1.0/dx[2]; + const Real dts2dx = 0.5*dt*dxi; + const Real dts2dz = 0.5*dt*dzi; +#if (AMREX_SPACEDIM == 2) + const Real invvol = dxi*dzi; +#else // (AMREX_SPACEDIM == 3) + const Real dyi = 1.0/dx[1]; + const Real dts2dy = 0.5*dt*dyi; + const Real invvol = dxi*dyi*dzi; +#endif + + const Real xmin = xyzmin[0]; + const Real ymin = xyzmin[1]; + const Real zmin = xyzmin[2]; + const Real clightsq = 1.0/PhysConst::c/PhysConst::c; + + // Loop over particles and deposit into jx_arr, jy_arr and jz_arr + ParallelFor( np_to_depose, + [=] AMREX_GPU_DEVICE (long ip) { + // --- Get particle quantities + const Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + const Real wq = q*wp[ip]; + const Real vx = uxp[ip]*gaminv; + const Real vy = uyp[ip]*gaminv; + const Real vz = uzp[ip]*gaminv; + // wqx, wqy wqz are particle current in each direction + const Real wqx = wq*invvol*vx; + const Real wqy = wq*invvol*vy; + const Real wqz = wq*invvol*vz; + + // --- Compute shape factors + // x direction + // Get particle position after 1/2 push back in position + const Real xmid = (xp[ip]-xmin)*dxi-dts2dx*vx; + // Compute shape factors for node-centered quantities + Real AMREX_RESTRICT sx [depos_order + 1]; + // j: leftmost grid point (node-centered) that the particle touches + const int j = compute_shape_factor<depos_order>(sx, xmid); + // Compute shape factors for cell-centered quantities + Real AMREX_RESTRICT sx0[depos_order + 1]; + // j0: leftmost grid point (cell-centered) that the particle touches + const int j0 = compute_shape_factor<depos_order>(sx0, xmid-stagger_shift); + +#if (AMREX_SPACEDIM == 3) + // y direction + const Real ymid= (yp[ip]-ymin)*dyi-dts2dy*vy; + Real AMREX_RESTRICT sy [depos_order + 1]; + const int k = compute_shape_factor<depos_order>(sy, ymid); + Real AMREX_RESTRICT sy0[depos_order + 1]; + const int k0 = compute_shape_factor<depos_order>(sy0, ymid-stagger_shift); +#endif + // z direction + const Real zmid= (zp[ip]-zmin)*dzi-dts2dz*vz; + Real AMREX_RESTRICT sz [depos_order + 1]; + const int l = compute_shape_factor<depos_order>(sz, zmid); + Real AMREX_RESTRICT sz0[depos_order + 1]; + const int l0 = compute_shape_factor<depos_order>(sz0, zmid-stagger_shift); + + // Deposit current into jx_arr, jy_arr and jz_arr +#if (AMREX_SPACEDIM == 2) + for (int iz=0; iz<=depos_order; iz++){ + for (int ix=0; ix<=depos_order; ix++){ + amrex::Gpu::Atomic::Add( + &jx_arr(lo.x+j0+ix, lo.y+l +iz, 0), + sx0[ix]*sz [iz]*wqx); + amrex::Gpu::Atomic::Add( + &jy_arr(lo.x+j +ix, lo.y+l +iz, 0), + sx [ix]*sz [iz]*wqy); + amrex::Gpu::Atomic::Add( + &jz_arr(lo.x+j +ix, lo.y+l0+iz, 0), + sx [ix]*sz0[iz]*wqz); + } + } +#else // (AMREX_SPACEDIM == 3) + for (int iz=0; iz<=depos_order; iz++){ + for (int iy=0; iy<=depos_order; iy++){ + for (int ix=0; ix<=depos_order; ix++){ + amrex::Gpu::Atomic::Add( + &jx_arr(lo.x+j0+ix, lo.y+k +iy, lo.z+l +iz), + sx0[ix]*sy [iy]*sz [iz]*wqx); + amrex::Gpu::Atomic::Add( + &jy_arr(lo.x+j +ix, lo.y+k0+iy, lo.z+l +iz), + sx [ix]*sy0[iy]*sz [iz]*wqy); + amrex::Gpu::Atomic::Add( + &jz_arr(lo.x+j +ix, lo.y+k +iy, lo.z+l0+iz), + sx [ix]*sy [iy]*sz0[iz]*wqz); + } + } + } +#endif + } + ); +} + +#endif // CURRENTDEPOSITION_H_ diff --git a/Source/Particles/Deposition/Make.package b/Source/Particles/Deposition/Make.package new file mode 100644 index 000000000..0d5ebe2a7 --- /dev/null +++ b/Source/Particles/Deposition/Make.package @@ -0,0 +1,3 @@ +CEXE_headers += CurrentDeposition.H +INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Deposition +VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles/Deposition diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package index f33095738..2038472a1 100644 --- a/Source/Particles/Make.package +++ b/Source/Particles/Make.package @@ -11,6 +11,7 @@ CEXE_headers += RigidInjectedParticleContainer.H CEXE_headers += PhysicalParticleContainer.H include $(WARPX_HOME)/Source/Particles/Pusher/Make.package +include $(WARPX_HOME)/Source/Particles/Deposition/Make.package INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H index bd8cfb47e..d55764682 100644 --- a/Source/Particles/PhysicalParticleContainer.H +++ b/Source/Particles/PhysicalParticleContainer.H @@ -77,6 +77,9 @@ public: const amrex::MultiFab& Bx, const amrex::MultiFab& By, const amrex::MultiFab& Bz) override; + + void copy_attribs(WarpXParIter& pti,const amrex::Real* xp, + const amrex::Real* yp, const amrex::Real* zp); virtual void PostRestart () final {} diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 43b46ec49..d47a7b220 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -12,7 +12,7 @@ using namespace amrex; long PhysicalParticleContainer:: NumParticlesToAdd(const Box& overlap_box, const RealBox& overlap_realbox, - const RealBox& tile_realbox, const RealBox& particle_real_box) + const RealBox& tile_realbox, const RealBox& particle_real_box) { const int lev = 0; const Geometry& geom = Geom(lev); @@ -24,43 +24,43 @@ NumParticlesToAdd(const Box& overlap_box, const RealBox& overlap_realbox, for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv)) { int fac; - if (do_continuous_injection) { + if (do_continuous_injection) { #if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; - Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2]; + Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; + Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; + Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2]; #elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; + Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; + Real y = 0; + Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; #endif - fac = GetRefineFac(x, y, z); - } else { - fac = 1.0; - } + fac = GetRefineFac(x, y, z); + } else { + fac = 1.0; + } - int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac); - for (int i_part=0; i_part<ref_num_ppc;i_part++) { - std::array<Real, 3> r; - plasma_injector->getPositionUnitBox(r, i_part, fac); + int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac); + for (int i_part=0; i_part<ref_num_ppc;i_part++) { + std::array<Real, 3> r; + plasma_injector->getPositionUnitBox(r, i_part, fac); #if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1]; - Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2]; + Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; + Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1]; + Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2]; #elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1]; + Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; + Real y = 0; + Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1]; #endif - // If the new particle is not inside the tile box, - // go to the next generated particle. + // If the new particle is not inside the tile box, + // go to the next generated particle. #if ( AMREX_SPACEDIM == 3 ) - if(!tile_realbox.contains( RealVect{x, y, z} )) continue; + if(!tile_realbox.contains( RealVect{x, y, z} )) continue; #elif ( AMREX_SPACEDIM == 2 ) - if(!tile_realbox.contains( RealVect{x, z} )) continue; + if(!tile_realbox.contains( RealVect{x, z} )) continue; #endif - ++np; - } + ++np; + } } return np; @@ -170,8 +170,8 @@ void PhysicalParticleContainer::MapParticletoBoostedFrame(Real& x, Real& y, Real // Move the particles to where they will be at t = 0 in the boosted frame if (boost_adjust_transverse_positions) { - x = xpr - tpr*vxpr; - y = ypr - tpr*vypr; + x = xpr - tpr*vxpr; + y = ypr - tpr*vypr; } z = zpr - tpr*vzpr; @@ -323,9 +323,9 @@ void PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) { #ifdef AMREX_USE_GPU - AddPlasmaGPU(lev, part_realbox); + AddPlasmaGPU(lev, part_realbox); #else - AddPlasmaCPU(lev, part_realbox); + AddPlasmaCPU(lev, part_realbox); #endif } @@ -416,7 +416,7 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox) // Count the number of cells in this direction in overlap_realbox overlap_box.setSmall( dir, 0 ); overlap_box.setBig( dir, - int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1); + int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1); } if (no_overlap == 1) { continue; // Go to the next tile @@ -483,54 +483,54 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox) Real dens; std::array<Real, 3> u; if (WarpX::gamma_boost == 1.){ - // Lab-frame simulation - // If the particle is not within the species's - // xmin, xmax, ymin, ymax, zmin, zmax, go to - // the next generated particle. - if (!plasma_injector->insideBounds(xb, yb, z)) continue; - plasma_injector->getMomentum(u, x, y, z); - dens = plasma_injector->getDensity(x, y, z); + // Lab-frame simulation + // If the particle is not within the species's + // xmin, xmax, ymin, ymax, zmin, zmax, go to + // the next generated particle. + if (!plasma_injector->insideBounds(xb, yb, z)) continue; + plasma_injector->getMomentum(u, x, y, z); + dens = plasma_injector->getDensity(x, y, z); } else { - // Boosted-frame simulation - Real c = PhysConst::c; - Real gamma_boost = WarpX::gamma_boost; - Real beta_boost = WarpX::beta_boost; - // Since the user provides the density distribution - // at t_lab=0 and in the lab-frame coordinates, - // we need to find the lab-frame position of this - // particle at t_lab=0, from its boosted-frame coordinates - // Assuming ballistic motion, this is given by: - // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) ) - // where betaz_lab is the speed of the particle in the lab frame - // - // In order for this equation to be solvable, betaz_lab - // is explicitly assumed to have no dependency on z0_lab - plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency - // At this point u is the lab-frame momentum - // => Apply the above formula for z0_lab - Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) ); - Real betaz_lab = u[2]/gamma_lab/c; - Real t = WarpX::GetInstance().gett_new(lev); - Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) ); - // If the particle is not within the lab-frame zmin, zmax, etc. - // go to the next generated particle. - if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue; - // call `getDensity` with lab-frame parameters - dens = plasma_injector->getDensity(x, y, z0_lab); - // At this point u and dens are the lab-frame quantities - // => Perform Lorentz transform - dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab ); - u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab ); + // Boosted-frame simulation + Real c = PhysConst::c; + Real gamma_boost = WarpX::gamma_boost; + Real beta_boost = WarpX::beta_boost; + // Since the user provides the density distribution + // at t_lab=0 and in the lab-frame coordinates, + // we need to find the lab-frame position of this + // particle at t_lab=0, from its boosted-frame coordinates + // Assuming ballistic motion, this is given by: + // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) ) + // where betaz_lab is the speed of the particle in the lab frame + // + // In order for this equation to be solvable, betaz_lab + // is explicitly assumed to have no dependency on z0_lab + plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency + // At this point u is the lab-frame momentum + // => Apply the above formula for z0_lab + Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) ); + Real betaz_lab = u[2]/gamma_lab/c; + Real t = WarpX::GetInstance().gett_new(lev); + Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) ); + // If the particle is not within the lab-frame zmin, zmax, etc. + // go to the next generated particle. + if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue; + // call `getDensity` with lab-frame parameters + dens = plasma_injector->getDensity(x, y, z0_lab); + // At this point u and dens are the lab-frame quantities + // => Perform Lorentz transform + dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab ); + u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab ); } Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac)); #ifdef WARPX_RZ if (plasma_injector->radially_weighted) { - weight *= 2*MathConst::pi*xb; + weight *= 2*MathConst::pi*xb; } else { - // This is not correct since it might shift the particle - // out of the local grid - x = std::sqrt(xb*rmax); - weight *= dx[0]; + // This is not correct since it might shift the particle + // out of the local grid + x = std::sqrt(xb*rmax); + weight *= dx[0]; } #endif attribs[PIdx::w ] = weight; @@ -550,18 +550,18 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox) particle_tile.push_back_real(particle_comps["uzold"], u[2]); } - AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs); + AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs); } } if (cost) { - wt = (amrex::second() - wt) / tile_box.d_numPts(); + wt = (amrex::second() - wt) / tile_box.d_numPts(); Array4<Real> const& costarr = cost->array(mfi); amrex::ParallelFor(tile_box, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - costarr(i,j,k) += wt; - }); + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + costarr(i,j,k) += wt; + }); } } } @@ -655,7 +655,7 @@ PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox) // Count the number of cells in this direction in overlap_realbox overlap_box.setSmall( dir, 0 ); overlap_box.setBig( dir, - int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1); + int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1); } if (no_overlap == 1) { continue; // Go to the next tile @@ -664,8 +664,8 @@ PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox) const int grid_id = mfi.index(); const int tile_id = mfi.LocalTileIndex(); - Cuda::HostVector<ParticleType> host_particles; - std::array<Cuda::HostVector<Real>, PIdx::nattribs> host_attribs; + Cuda::HostVector<ParticleType> host_particles; + std::array<Cuda::HostVector<Real>, PIdx::nattribs> host_attribs; // Loop through the cells of overlap_box and inject // the corresponding particles @@ -725,54 +725,54 @@ PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox) Real dens; std::array<Real, 3> u; if (WarpX::gamma_boost == 1.){ - // Lab-frame simulation - // If the particle is not within the species's - // xmin, xmax, ymin, ymax, zmin, zmax, go to - // the next generated particle. - if (!plasma_injector->insideBounds(xb, yb, z)) continue; - plasma_injector->getMomentum(u, x, y, z); - dens = plasma_injector->getDensity(x, y, z); + // Lab-frame simulation + // If the particle is not within the species's + // xmin, xmax, ymin, ymax, zmin, zmax, go to + // the next generated particle. + if (!plasma_injector->insideBounds(xb, yb, z)) continue; + plasma_injector->getMomentum(u, x, y, z); + dens = plasma_injector->getDensity(x, y, z); } else { - // Boosted-frame simulation - Real c = PhysConst::c; - Real gamma_boost = WarpX::gamma_boost; - Real beta_boost = WarpX::beta_boost; - // Since the user provides the density distribution - // at t_lab=0 and in the lab-frame coordinates, - // we need to find the lab-frame position of this - // particle at t_lab=0, from its boosted-frame coordinates - // Assuming ballistic motion, this is given by: - // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) ) - // where betaz_lab is the speed of the particle in the lab frame - // - // In order for this equation to be solvable, betaz_lab - // is explicitly assumed to have no dependency on z0_lab - plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency - // At this point u is the lab-frame momentum - // => Apply the above formula for z0_lab - Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) ); - Real betaz_lab = u[2]/gamma_lab/c; - Real t = WarpX::GetInstance().gett_new(lev); - Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) ); - // If the particle is not within the lab-frame zmin, zmax, etc. - // go to the next generated particle. - if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue; - // call `getDensity` with lab-frame parameters - dens = plasma_injector->getDensity(x, y, z0_lab); - // At this point u and dens are the lab-frame quantities - // => Perform Lorentz transform - dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab ); - u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab ); + // Boosted-frame simulation + Real c = PhysConst::c; + Real gamma_boost = WarpX::gamma_boost; + Real beta_boost = WarpX::beta_boost; + // Since the user provides the density distribution + // at t_lab=0 and in the lab-frame coordinates, + // we need to find the lab-frame position of this + // particle at t_lab=0, from its boosted-frame coordinates + // Assuming ballistic motion, this is given by: + // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) ) + // where betaz_lab is the speed of the particle in the lab frame + // + // In order for this equation to be solvable, betaz_lab + // is explicitly assumed to have no dependency on z0_lab + plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency + // At this point u is the lab-frame momentum + // => Apply the above formula for z0_lab + Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) ); + Real betaz_lab = u[2]/gamma_lab/c; + Real t = WarpX::GetInstance().gett_new(lev); + Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) ); + // If the particle is not within the lab-frame zmin, zmax, etc. + // go to the next generated particle. + if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue; + // call `getDensity` with lab-frame parameters + dens = plasma_injector->getDensity(x, y, z0_lab); + // At this point u and dens are the lab-frame quantities + // => Perform Lorentz transform + dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab ); + u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab ); } Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac)); #ifdef WARPX_RZ if (plasma_injector->radially_weighted) { - weight *= 2*MathConst::pi*xb; + weight *= 2*MathConst::pi*xb; } else { - // This is not correct since it might shift the particle - // out of the local grid - x = std::sqrt(xb*rmax); - weight *= dx[0]; + // This is not correct since it might shift the particle + // out of the local grid + x = std::sqrt(xb*rmax); + weight *= dx[0]; } #endif attribs[PIdx::w ] = weight; @@ -793,50 +793,50 @@ PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox) particle_tile.push_back_real(particle_comps["uzold"], u[2]); } - ParticleType p; - p.id() = ParticleType::NextID(); - p.cpu() = ParallelDescriptor::MyProc(); + ParticleType p; + p.id() = ParticleType::NextID(); + p.cpu() = ParallelDescriptor::MyProc(); #if (AMREX_SPACEDIM == 3) - p.pos(0) = x; - p.pos(1) = y; - p.pos(2) = z; + p.pos(0) = x; + p.pos(1) = y; + p.pos(2) = z; #elif (AMREX_SPACEDIM == 2) #ifdef WARPX_RZ attribs[PIdx::theta] = theta; #endif - p.pos(0) = xb; - p.pos(1) = z; + p.pos(0) = xb; + p.pos(1) = z; #endif - host_particles.push_back(p); - for (int kk = 0; kk < PIdx::nattribs; ++kk) - host_attribs[kk].push_back(attribs[kk]); + host_particles.push_back(p); + for (int kk = 0; kk < PIdx::nattribs; ++kk) + host_attribs[kk].push_back(attribs[kk]); } } - auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)]; + auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)]; auto old_size = particle_tile.GetArrayOfStructs().size(); auto new_size = old_size + host_particles.size(); - particle_tile.resize(new_size); + particle_tile.resize(new_size); Cuda::thrust_copy(host_particles.begin(), host_particles.end(), particle_tile.GetArrayOfStructs().begin() + old_size); - for (int kk = 0; kk < PIdx::nattribs; ++kk) { + for (int kk = 0; kk < PIdx::nattribs; ++kk) { Cuda::thrust_copy(host_attribs[kk].begin(), host_attribs[kk].end(), particle_tile.GetStructOfArrays().GetRealData(kk).begin() + old_size); - } + } if (cost) { - wt = (amrex::second() - wt) / tile_box.d_numPts(); + wt = (amrex::second() - wt) / tile_box.d_numPts(); Array4<Real> const& costarr = cost->array(mfi); amrex::ParallelFor(tile_box, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - costarr(i,j,k) += wt; - }); + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + costarr(i,j,k) += wt; + }); } } } @@ -963,13 +963,13 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, WRPX_INTERPOLATE_CIC(particles.dataPtr(), nstride, np, Exp.dataPtr(), Eyp.dataPtr(), #if AMREX_SPACEDIM == 3 - Ezp.dataPtr(), + Ezp.dataPtr(), #endif - exfab.dataPtr(), eyfab.dataPtr(), + exfab.dataPtr(), eyfab.dataPtr(), #if AMREX_SPACEDIM == 3 - ezfab.dataPtr(), + ezfab.dataPtr(), #endif - box.loVect(), box.hiVect(), plo, dx, &ng); + box.loVect(), box.hiVect(), plo, dx, &ng); } else { const FArrayBox& exfab_coarse = coarse_Ex[pti]; @@ -1004,7 +1004,7 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, void PhysicalParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<MultiFab>, 3> >& E, - Vector<std::unique_ptr<MultiFab> >& rho, + Vector<std::unique_ptr<MultiFab> >& rho, Real t, Real dt) { BL_PROFILE("PPC::EvolveES()"); @@ -1014,7 +1014,7 @@ PhysicalParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<Mul BL_ASSERT(OnSameGrids(lev, *rho[lev])); const auto& gm = m_gdb->Geom(lev); const RealBox& prob_domain = gm.ProbDomain(); - for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { + for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { // Particle structs auto& particles = pti.GetArrayOfStructs(); int nstride = particles.dataShape().first; @@ -1071,11 +1071,11 @@ PhysicalParticleContainer::FieldGather (int lev, { Cuda::ManagedDeviceVector<Real> xp, yp, zp; - for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) - { + for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) + { Real wt = amrex::second(); - const Box& box = pti.validbox(); + const Box& box = pti.validbox(); auto& attribs = pti.GetAttribs(); @@ -1088,63 +1088,63 @@ PhysicalParticleContainer::FieldGather (int lev, const long np = pti.numParticles(); - // Data on the grid - const FArrayBox& exfab = Ex[pti]; - const FArrayBox& eyfab = Ey[pti]; - const FArrayBox& ezfab = Ez[pti]; - const FArrayBox& bxfab = Bx[pti]; - const FArrayBox& byfab = By[pti]; - const FArrayBox& bzfab = Bz[pti]; - - Exp.assign(np,0.0); - Eyp.assign(np,0.0); - Ezp.assign(np,0.0); - Bxp.assign(np,0.0); - Byp.assign(np,0.0); - Bzp.assign(np,0.0); - - // - // copy data from particle container to temp arrays - // + // Data on the grid + const FArrayBox& exfab = Ex[pti]; + const FArrayBox& eyfab = Ey[pti]; + const FArrayBox& ezfab = Ez[pti]; + const FArrayBox& bxfab = Bx[pti]; + const FArrayBox& byfab = By[pti]; + const FArrayBox& bzfab = Bz[pti]; + + Exp.assign(np,0.0); + Eyp.assign(np,0.0); + Ezp.assign(np,0.0); + Bxp.assign(np,0.0); + Byp.assign(np,0.0); + Bzp.assign(np,0.0); + + // + // copy data from particle container to temp arrays + // pti.GetPosition(xp, yp, zp); const std::array<Real,3>& xyzmin = WarpX::LowerCorner(box, lev); const int* ixyzmin = box.loVect(); - // - // Field Gather - // - const int ll4symtry = false; + // + // Field Gather + // + const int ll4symtry = false; long lvect_fieldgathe = 64; - warpx_geteb_energy_conserving( - &np, - xp.dataPtr(), - yp.dataPtr(), - zp.dataPtr(), - Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(), - Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(), - ixyzmin, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dx[0], &dx[1], &dx[2], - &WarpX::nox, &WarpX::noy, &WarpX::noz, - BL_TO_FORTRAN_ANYD(exfab), - BL_TO_FORTRAN_ANYD(eyfab), - BL_TO_FORTRAN_ANYD(ezfab), - BL_TO_FORTRAN_ANYD(bxfab), - BL_TO_FORTRAN_ANYD(byfab), - BL_TO_FORTRAN_ANYD(bzfab), - &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal, - &lvect_fieldgathe, &WarpX::field_gathering_algo); + warpx_geteb_energy_conserving( + &np, + xp.dataPtr(), + yp.dataPtr(), + zp.dataPtr(), + Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(), + Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(), + ixyzmin, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dx[0], &dx[1], &dx[2], + &WarpX::nox, &WarpX::noy, &WarpX::noz, + BL_TO_FORTRAN_ANYD(exfab), + BL_TO_FORTRAN_ANYD(eyfab), + BL_TO_FORTRAN_ANYD(ezfab), + BL_TO_FORTRAN_ANYD(bxfab), + BL_TO_FORTRAN_ANYD(byfab), + BL_TO_FORTRAN_ANYD(bzfab), + &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal, + &lvect_fieldgathe, &WarpX::field_gathering_algo); if (cost) { const Box& tbx = pti.tilebox(); wt = (amrex::second() - wt) / tbx.d_numPts(); Array4<Real> const& costarr = cost->array(pti); amrex::ParallelFor(tbx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - costarr(i,j,k) += wt; - }); + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + costarr(i,j,k) += wt; + }); } } } @@ -1152,9 +1152,9 @@ PhysicalParticleContainer::FieldGather (int lev, void PhysicalParticleContainer::Evolve (int lev, - const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez, - const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz, - MultiFab& jx, MultiFab& jy, MultiFab& jz, + const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez, + const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz, + MultiFab& jx, MultiFab& jy, MultiFab& jz, MultiFab* cjx, MultiFab* cjy, MultiFab* cjz, MultiFab* rho, MultiFab* crho, const MultiFab* cEx, const MultiFab* cEy, const MultiFab* cEz, @@ -1200,8 +1200,8 @@ PhysicalParticleContainer::Evolve (int lev, RealVector tmp; ParticleVector particle_tmp; - for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) - { + for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) + { Real wt = amrex::second(); const Box& box = pti.validbox(); @@ -1282,14 +1282,14 @@ PhysicalParticleContainer::Evolve (int lev, #endif } - Exp.assign(np,0.0); - Eyp.assign(np,0.0); - Ezp.assign(np,0.0); - Bxp.assign(np,WarpX::B_external[0]); - Byp.assign(np,WarpX::B_external[1]); - Bzp.assign(np,WarpX::B_external[2]); + Exp.assign(np,0.0); + Eyp.assign(np,0.0); + Ezp.assign(np,0.0); + Bxp.assign(np,WarpX::B_external[0]); + Byp.assign(np,WarpX::B_external[1]); + Bzp.assign(np,WarpX::B_external[2]); - m_giv[thread_num].resize(np); + m_giv[thread_num].resize(np); long nfine_current = np; long nfine_gather = np; @@ -1384,12 +1384,12 @@ PhysicalParticleContainer::Evolve (int lev, const long np_current = (cjx) ? nfine_current : np; - // - // copy data from particle container to temp arrays - // - BL_PROFILE_VAR_START(blp_copy); + // + // copy data from particle container to temp arrays + // + BL_PROFILE_VAR_START(blp_copy); pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); - BL_PROFILE_VAR_STOP(blp_copy); + BL_PROFILE_VAR_STOP(blp_copy); if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev); @@ -1529,17 +1529,30 @@ PhysicalParticleContainer::Evolve (int lev, // // Current Deposition // - // Deposit inside domains - DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, - 0, np_current, thread_num, - lev, lev, dt); - if (has_buffer){ - // Deposit in buffers - DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, - np_current, np-np_current, thread_num, - lev, lev-1, dt); + if (WarpX::use_picsar_deposition) { + // Deposit inside domains + DepositCurrentFortran(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, + 0, np_current, thread_num, + lev, lev, dt); + if (has_buffer){ + // Deposit in buffers + DepositCurrentFortran(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, + np_current, np-np_current, thread_num, + lev, lev-1, dt); + } + } else { + // Deposit inside domains + DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, + 0, np_current, thread_num, + lev, lev, dt); + if (has_buffer){ + // Deposit in buffers + DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, + np_current, np-np_current, thread_num, + lev, lev-1, dt); + } } - + // // copy particle data back // @@ -1555,10 +1568,10 @@ PhysicalParticleContainer::Evolve (int lev, wt = (amrex::second() - wt) / tbx.d_numPts(); Array4<Real> const& costarr = cost->array(pti); amrex::ParallelFor(tbx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - costarr(i,j,k) += wt; - }); + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + costarr(i,j,k) += wt; + }); } } } @@ -1590,9 +1603,9 @@ PhysicalParticleContainer::SplitParticles(int lev) { pti.GetPosition(xp, yp, zp); const std::array<Real,3>& dx = WarpX::CellSize(lev); - // particle Array Of Structs data + // particle Array Of Structs data auto& particles = pti.GetArrayOfStructs(); - // particle Struct Of Arrays data + // particle Struct Of Arrays data auto& attribs = pti.GetAttribs(); auto& wp = attribs[PIdx::w ]; auto& uxp = attribs[PIdx::ux]; @@ -1602,13 +1615,13 @@ PhysicalParticleContainer::SplitParticles(int lev) for(int i=0; i<np; i++){ auto& p = particles[i]; if (p.id() == DoSplitParticleID){ - // If particle is tagged, split it and put the - // split particles in local arrays psplit_x etc. + // If particle is tagged, split it and put the + // split particles in local arrays psplit_x etc. np_split_to_add += np_split; #if (AMREX_SPACEDIM==2) if (split_type==0){ - // Split particle in two along each axis - // 4 particles in 2d + // Split particle in two along each axis + // 4 particles in 2d for (int ishift = -1; ishift < 2; ishift +=2 ){ for (int kshift = -1; kshift < 2; kshift +=2 ){ // Add one particle with offset in x and z @@ -1622,8 +1635,8 @@ PhysicalParticleContainer::SplitParticles(int lev) } } } else { - // Split particle in two along each diagonal - // 4 particles in 2d + // Split particle in two along each diagonal + // 4 particles in 2d for (int ishift = -1; ishift < 2; ishift +=2 ){ // Add one particle with offset in x psplit_x.push_back( xp[i] + ishift*dx[0]/2 ); @@ -1644,26 +1657,26 @@ PhysicalParticleContainer::SplitParticles(int lev) } } #elif (AMREX_SPACEDIM==3) - if (split_type==0){ - // Split particle in two along each axis - // 6 particles in 2d - for (int ishift = -1; ishift < 2; ishift +=2 ){ - for (int jshift = -1; jshift < 2; jshift +=2 ){ - for (int kshift = -1; kshift < 2; kshift +=2 ){ - // Add one particle with offset in x, y and z - psplit_x.push_back( xp[i] + ishift*dx[0]/2 ); - psplit_y.push_back( yp[i] + jshift*dx[1]/2 ); - psplit_z.push_back( zp[i] + jshift*dx[2]/2 ); - psplit_ux.push_back( uxp[i] ); - psplit_uy.push_back( uyp[i] ); - psplit_uz.push_back( uzp[i] ); - psplit_w.push_back( wp[i]/np_split ); - } - } - } - } else { - // Split particle in two along each diagonal - // 8 particles in 3d + if (split_type==0){ + // Split particle in two along each axis + // 6 particles in 2d + for (int ishift = -1; ishift < 2; ishift +=2 ){ + for (int jshift = -1; jshift < 2; jshift +=2 ){ + for (int kshift = -1; kshift < 2; kshift +=2 ){ + // Add one particle with offset in x, y and z + psplit_x.push_back( xp[i] + ishift*dx[0]/2 ); + psplit_y.push_back( yp[i] + jshift*dx[1]/2 ); + psplit_z.push_back( zp[i] + jshift*dx[2]/2 ); + psplit_ux.push_back( uxp[i] ); + psplit_uy.push_back( uyp[i] ); + psplit_uz.push_back( uzp[i] ); + psplit_w.push_back( wp[i]/np_split ); + } + } + } + } else { + // Split particle in two along each diagonal + // 8 particles in 3d for (int ishift = -1; ishift < 2; ishift +=2 ){ // Add one particle with offset in x psplit_x.push_back( xp[i] + ishift*dx[0]/2 ); @@ -1690,9 +1703,9 @@ PhysicalParticleContainer::SplitParticles(int lev) psplit_uz.push_back( uzp[i] ); psplit_w.push_back( wp[i]/np_split ); } - } + } #endif - // invalidate the particle + // invalidate the particle p.m_idata.id = -p.m_idata.id; } } @@ -1704,16 +1717,16 @@ PhysicalParticleContainer::SplitParticles(int lev) // AddNParticles calls Redistribute, so that particles // in pctmp_split are in the proper grids and tiles pctmp_split.AddNParticles(lev, - np_split_to_add, - psplit_x.dataPtr(), - psplit_y.dataPtr(), - psplit_z.dataPtr(), - psplit_ux.dataPtr(), - psplit_uy.dataPtr(), - psplit_uz.dataPtr(), - 1, - psplit_w.dataPtr(), - 1, NoSplitParticleID); + np_split_to_add, + psplit_x.dataPtr(), + psplit_y.dataPtr(), + psplit_z.dataPtr(), + psplit_ux.dataPtr(), + psplit_uy.dataPtr(), + psplit_uz.dataPtr(), + 1, + psplit_w.dataPtr(), + 1, NoSplitParticleID); // Copy particles from tmp to current particle container addParticles(pctmp_split,1); // Clear tmp container @@ -1722,14 +1735,20 @@ PhysicalParticleContainer::SplitParticles(int lev) void PhysicalParticleContainer::PushPX(WarpXParIter& pti, - Cuda::ManagedDeviceVector<Real>& xp, + Cuda::ManagedDeviceVector<Real>& xp, Cuda::ManagedDeviceVector<Real>& yp, Cuda::ManagedDeviceVector<Real>& zp, Cuda::ManagedDeviceVector<Real>& giv, Real dt) { - // This wraps the call to warpx_particle_pusher so that inheritors can modify the call. + if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) + { + copy_attribs(pti, xp.dataPtr(), yp.dataPtr(), zp.dataPtr()); + } + + // The following attributes should be included in CPP version of warpx_particle_pusher + // This wraps the call to warpx_particle_pusher so that inheritors can modify the call. auto& attribs = pti.GetAttribs(); auto& uxp = attribs[PIdx::ux]; auto& uyp = attribs[PIdx::uy]; @@ -1741,22 +1760,7 @@ PhysicalParticleContainer::PushPX(WarpXParIter& pti, auto& Byp = attribs[PIdx::By]; auto& Bzp = attribs[PIdx::Bz]; const long np = pti.numParticles(); - - if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) - { - auto& xpold = pti.GetAttribs(particle_comps["xold"]); - auto& ypold = pti.GetAttribs(particle_comps["yold"]); - auto& zpold = pti.GetAttribs(particle_comps["zold"]); - auto& uxpold = pti.GetAttribs(particle_comps["uxold"]); - auto& uypold = pti.GetAttribs(particle_comps["uyold"]); - auto& uzpold = pti.GetAttribs(particle_comps["uzold"]); - - warpx_copy_attribs(&np, xp.dataPtr(), yp.dataPtr(), zp.dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - xpold.dataPtr(), ypold.dataPtr(), zpold.dataPtr(), - uxpold.dataPtr(), uypold.dataPtr(), uzpold.dataPtr()); - } - + warpx_particle_pusher(&np, xp.dataPtr(), yp.dataPtr(), @@ -1791,8 +1795,8 @@ PhysicalParticleContainer::PushP (int lev, Real dt, int thread_num = 0; #endif for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) - { - const Box& box = pti.validbox(); + { + const Box& box = pti.validbox(); auto& attribs = pti.GetAttribs(); @@ -1808,26 +1812,26 @@ PhysicalParticleContainer::PushP (int lev, Real dt, const long np = pti.numParticles(); - // Data on the grid - const FArrayBox& exfab = Ex[pti]; - const FArrayBox& eyfab = Ey[pti]; - const FArrayBox& ezfab = Ez[pti]; - const FArrayBox& bxfab = Bx[pti]; - const FArrayBox& byfab = By[pti]; - const FArrayBox& bzfab = Bz[pti]; - - Exp.assign(np,0.0); - Eyp.assign(np,0.0); - Ezp.assign(np,0.0); - Bxp.assign(np,WarpX::B_external[0]); - Byp.assign(np,WarpX::B_external[1]); - Bzp.assign(np,WarpX::B_external[2]); - - m_giv[thread_num].resize(np); - - // - // copy data from particle container to temp arrays - // + // Data on the grid + const FArrayBox& exfab = Ex[pti]; + const FArrayBox& eyfab = Ey[pti]; + const FArrayBox& ezfab = Ez[pti]; + const FArrayBox& bxfab = Bx[pti]; + const FArrayBox& byfab = By[pti]; + const FArrayBox& bzfab = Bz[pti]; + + Exp.assign(np,0.0); + Eyp.assign(np,0.0); + Ezp.assign(np,0.0); + Bxp.assign(np,WarpX::B_external[0]); + Byp.assign(np,WarpX::B_external[1]); + Bzp.assign(np,WarpX::B_external[2]); + + m_giv[thread_num].resize(np); + + // + // copy data from particle container to temp arrays + // pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); @@ -1870,6 +1874,38 @@ PhysicalParticleContainer::PushP (int lev, Real dt, } } +void PhysicalParticleContainer::copy_attribs(WarpXParIter& pti,const Real* xp, + const Real* yp, const Real* zp) +{ + + auto& attribs = pti.GetAttribs(); + + Real* AMREX_RESTRICT uxp = attribs[PIdx::ux].dataPtr(); + Real* AMREX_RESTRICT uyp = attribs[PIdx::uy].dataPtr(); + Real* AMREX_RESTRICT uzp = attribs[PIdx::uz].dataPtr(); + + Real* AMREX_RESTRICT xpold = pti.GetAttribs(particle_comps["xold"]).dataPtr(); + Real* AMREX_RESTRICT ypold = pti.GetAttribs(particle_comps["yold"]).dataPtr(); + Real* AMREX_RESTRICT zpold = pti.GetAttribs(particle_comps["zold"]).dataPtr(); + Real* AMREX_RESTRICT uxpold = pti.GetAttribs(particle_comps["uxold"]).dataPtr(); + Real* AMREX_RESTRICT uypold = pti.GetAttribs(particle_comps["uyold"]).dataPtr(); + Real* AMREX_RESTRICT uzpold = pti.GetAttribs(particle_comps["uzold"]).dataPtr(); + + const long np = pti.numParticles(); + + ParallelFor( np, + [=] AMREX_GPU_DEVICE (long i) { + xpold[i]=xp[i]; + ypold[i]=yp[i]; + zpold[i]=zp[i]; + + uxpold[i]=uxp[i]; + uypold[i]=uyp[i]; + uzpold[i]=uzp[i]; + } + ); +} + void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real z_old, const Real z_new, const Real t_boost, const Real t_lab, const Real dt, diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp index 2a3e8dd0d..9bd4cb4fc 100644 --- a/Source/Particles/RigidInjectedParticleContainer.cpp +++ b/Source/Particles/RigidInjectedParticleContainer.cpp @@ -211,6 +211,11 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti, Real dt) { + if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) + { + copy_attribs(pti, xp.dataPtr(), yp.dataPtr(), zp.dataPtr()); + } + // This wraps the call to warpx_particle_pusher so that inheritors can modify the call. auto& attribs = pti.GetAttribs(); auto& uxp = attribs[PIdx::ux]; @@ -224,21 +229,6 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti, auto& Bzp = attribs[PIdx::Bz]; const long np = pti.numParticles(); - if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) - { - auto& xpold = pti.GetAttribs(particle_comps["xold"]); - auto& ypold = pti.GetAttribs(particle_comps["yold"]); - auto& zpold = pti.GetAttribs(particle_comps["zold"]); - auto& uxpold = pti.GetAttribs(particle_comps["uxold"]); - auto& uypold = pti.GetAttribs(particle_comps["uyold"]); - auto& uzpold = pti.GetAttribs(particle_comps["uzold"]); - - warpx_copy_attribs(&np, xp.dataPtr(), yp.dataPtr(), zp.dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - xpold.dataPtr(), ypold.dataPtr(), zpold.dataPtr(), - uxpold.dataPtr(), uypold.dataPtr(), uzpold.dataPtr()); - } - // Save the position and momenta, making copies Cuda::ManagedDeviceVector<Real> xp_save, yp_save, zp_save; RealVector uxp_save, uyp_save, uzp_save; diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H index 0e800bf1d..662b2e1b8 100644 --- a/Source/Particles/WarpXParticleContainer.H +++ b/Source/Particles/WarpXParticleContainer.H @@ -189,6 +189,21 @@ public: int depos_lev, amrex::Real dt); + virtual void DepositCurrentFortran(WarpXParIter& pti, + RealVector& wp, + RealVector& uxp, + RealVector& uyp, + RealVector& uzp, + amrex::MultiFab* jx, + amrex::MultiFab* jy, + amrex::MultiFab* jz, + const long offset, + const long np_to_depose, + int thread_num, + int lev, + int depos_lev, + amrex::Real dt); + // If particles start outside of the domain, ContinuousInjection // makes sure that they are initialized when they enter the domain, and // NOT before. Virtual function, overriden by derived classes. @@ -253,8 +268,8 @@ public: AddIntComp(comm); } - int DoBoostedFrameDiags () const { return do_boosted_frame_diags; } - + int DoBoostedFrameDiags () const { return do_boosted_frame_diags; } + protected: std::map<std::string, int> particle_comps; diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 317f46fd4..7316dcc95 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -10,6 +10,7 @@ // Import low-level single-particle kernels #include <GetAndSetPosition.H> #include <UpdatePosition.H> +#include <CurrentDeposition.H> using namespace amrex; @@ -279,7 +280,7 @@ WarpXParticleContainer::AddNParticles (int lev, Redistribute(); } -/* \brief Current Deposition for thread thread_num +/* \brief Current Deposition for thread thread_num using PICSAR * \param pti : Particle iterator * \param wp : Array of particle weights * \param uxp uyp uzp : Array of particle @@ -292,16 +293,15 @@ WarpXParticleContainer::AddNParticles (int lev, * \param lev : Level of box that contains particles * \param depos_lev : Level on which particles deposit (if buffers are used) * \param dt : Time step for particle level - * \param ngJ : Number of ghosts cells (of lev) */ void -WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, - RealVector& wp, RealVector& uxp, - RealVector& uyp, RealVector& uzp, - MultiFab* jx, MultiFab* jy, MultiFab* jz, - const long offset, const long np_to_depose, - int thread_num, int lev, int depos_lev, - Real dt) +WarpXParticleContainer::DepositCurrentFortran(WarpXParIter& pti, + RealVector& wp, RealVector& uxp, + RealVector& uyp, RealVector& uzp, + MultiFab* jx, MultiFab* jy, MultiFab* jz, + const long offset, const long np_to_depose, + int thread_num, int lev, int depos_lev, + Real dt) { AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) || (depos_lev==(lev )), @@ -414,6 +414,130 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, #endif } +/* \brief Current Deposition for thread thread_num + * \param pti : Particle iterator + * \param wp : Array of particle weights + * \param uxp uyp uzp : Array of particle + * \param jx jy jz : Full array of current density + * \param offset : Index of first particle for which current is deposited + * \param np_to_depose: Number of particles for which current is deposited. + Particles [offset,offset+np_tp_depose] deposit current + * \param thread_num : Thread number (if tiling) + * \param lev : Level of box that contains particles + * \param depos_lev : Level on which particles deposit (if buffers are used) + * \param dt : Time step for particle level + */ +void +WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, + RealVector& wp, RealVector& uxp, + RealVector& uyp, RealVector& uzp, + MultiFab* jx, MultiFab* jy, MultiFab* jz, + const long offset, const long np_to_depose, + int thread_num, int lev, int depos_lev, + Real dt) +{ + AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) || + (depos_lev==(lev )), + "Deposition buffers only work for lev-1"); + // If no particles, do not do anything + if (np_to_depose == 0) return; + + const long ngJ = jx->nGrow(); + const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0)); + int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal(); + Real q = this->charge; + const Real stagger_shift = j_is_nodal ? 0.0 : 0.5; + + BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); + + // Get tile box where current is deposited. + // The tile box is different when depositing in the buffers (depos_lev<lev) + // or when depositing inside the level (depos_lev=lev) + Box tilebox; + if (lev == depos_lev) { + tilebox = pti.tilebox(); + } else { + const IntVect& ref_ratio = WarpX::RefRatio(depos_lev); + tilebox = amrex::coarsen(pti.tilebox(),ref_ratio); + } + + // Staggered tile boxes (different in each direction) + Box tbx = convert(tilebox, WarpX::jx_nodal_flag); + Box tby = convert(tilebox, WarpX::jy_nodal_flag); + Box tbz = convert(tilebox, WarpX::jz_nodal_flag); + tilebox.grow(ngJ); + +#ifdef AMREX_USE_GPU + // No tiling on GPU: jx_ptr points to the full + // jx array (same for jy_ptr and jz_ptr). + Array4<Real> const& jx_arr = jx->array(pti); + Array4<Real> const& jy_arr = jy->array(pti); + Array4<Real> const& jz_arr = jz->array(pti); +#else + // Tiling is on: jx_ptr points to local_jx[thread_num] + // (same for jy_ptr and jz_ptr) + tbx.grow(ngJ); + tby.grow(ngJ); + tbz.grow(ngJ); + + local_jx[thread_num].resize(tbx); + local_jy[thread_num].resize(tby); + local_jz[thread_num].resize(tbz); + + // local_jx[thread_num] is set to zero + local_jx[thread_num].setVal(0.0); + local_jy[thread_num].setVal(0.0); + local_jz[thread_num].setVal(0.0); + + Array4<Real> const& jx_arr = local_jx[thread_num].array(); + Array4<Real> const& jy_arr = local_jy[thread_num].array(); + Array4<Real> const& jz_arr = local_jz[thread_num].array(); +#endif + // GPU, no tiling: deposit directly in jx + // CPU, tiling: deposit into local_jx + // (same for jx and jz) + + Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; + Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; + Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset; + + // Lower corner of tile box physical domain + const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; + // xyzmin is built on pti.tilebox(), so it does + // not include staggering, so the stagger_shift has to be done by hand. + // Alternatively, we could define xyzminx from tbx (and the same for 3 + // directions and for jx, jy, jz). This way, sx0 would not be needed. + // Better for memory? worth trying? + const Dim3 lo = lbound(tilebox); + + if (WarpX::nox == 1){ + doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } else if (WarpX::nox == 2){ + doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } else if (WarpX::nox == 3){ + doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } + +#ifndef AMREX_USE_GPU + BL_PROFILE_VAR_START(blp_accumulate); + // CPU, tiling: atomicAdd local_jx into jx + // (same for jx and jz) + (*jx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); + (*jy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); + (*jz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); + BL_PROFILE_VAR_STOP(blp_accumulate); +#endif +} + void WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, MultiFab* rhomf, MultiFab* crhomf, int icomp, @@ -932,3 +1056,5 @@ WarpXParticleContainer::particlePostLocate(ParticleType& p, if (pld.m_lev == lev-1){ } } + + diff --git a/Source/Python/WarpXWrappers.cpp b/Source/Python/WarpXWrappers.cpp index 3c1a930b3..3ed4830f5 100644 --- a/Source/Python/WarpXWrappers.cpp +++ b/Source/Python/WarpXWrappers.cpp @@ -10,22 +10,26 @@ namespace { - double** getMultiFabPointers(const amrex::MultiFab& mf, int *num_boxes, int *ngrow, int **shapes) + double** getMultiFabPointers(const amrex::MultiFab& mf, int *num_boxes, int *ncomps, int *ngrow, int **shapes) { + *ncomps = mf.nComp(); *ngrow = mf.nGrow(); *num_boxes = mf.local_size(); - *shapes = (int*) malloc(AMREX_SPACEDIM * (*num_boxes) * sizeof(int)); + int shapesize = AMREX_SPACEDIM; + if (mf.nComp() > 1) shapesize += 1; + *shapes = (int*) malloc(shapesize * (*num_boxes) * sizeof(int)); double** data = (double**) malloc((*num_boxes) * sizeof(double*)); - int i = 0; #ifdef _OPENMP #pragma omp parallel #endif - for ( amrex::MFIter mfi(mf, false); mfi.isValid(); ++mfi, ++i ) { + for ( amrex::MFIter mfi(mf, false); mfi.isValid(); ++mfi ) { + int i = mfi.LocalIndex(); data[i] = (double*) mf[mfi].dataPtr(); for (int j = 0; j < AMREX_SPACEDIM; ++j) { - (*shapes)[AMREX_SPACEDIM*i+j] = mf[mfi].box().length(j); + (*shapes)[shapesize*i+j] = mf[mfi].box().length(j); } + if (mf.nComp() > 1) (*shapes)[shapesize*i+2] = mf.nComp(); } return data; } @@ -197,9 +201,9 @@ extern "C" } double** warpx_getEfield(int lev, int direction, - int *return_size, int *ngrow, int **shapes) { + int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getEfield(lev, direction); - return getMultiFabPointers(mf, return_size, ngrow, shapes); + return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); } int* warpx_getEfieldLoVects(int lev, int direction, @@ -209,9 +213,9 @@ extern "C" } double** warpx_getEfieldCP(int lev, int direction, - int *return_size, int *ngrow, int **shapes) { + int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getEfield_cp(lev, direction); - return getMultiFabPointers(mf, return_size, ngrow, shapes); + return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); } int* warpx_getEfieldCPLoVects(int lev, int direction, @@ -221,9 +225,9 @@ extern "C" } double** warpx_getEfieldFP(int lev, int direction, - int *return_size, int *ngrow, int **shapes) { + int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getEfield_fp(lev, direction); - return getMultiFabPointers(mf, return_size, ngrow, shapes); + return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); } int* warpx_getEfieldFPLoVects(int lev, int direction, @@ -233,9 +237,9 @@ extern "C" } double** warpx_getBfield(int lev, int direction, - int *return_size, int *ngrow, int **shapes) { + int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getBfield(lev, direction); - return getMultiFabPointers(mf, return_size, ngrow, shapes); + return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); } int* warpx_getBfieldLoVects(int lev, int direction, @@ -245,9 +249,9 @@ extern "C" } double** warpx_getBfieldCP(int lev, int direction, - int *return_size, int *ngrow, int **shapes) { + int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getBfield_cp(lev, direction); - return getMultiFabPointers(mf, return_size, ngrow, shapes); + return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); } int* warpx_getBfieldCPLoVects(int lev, int direction, @@ -257,9 +261,9 @@ extern "C" } double** warpx_getBfieldFP(int lev, int direction, - int *return_size, int *ngrow, int **shapes) { + int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getBfield_fp(lev, direction); - return getMultiFabPointers(mf, return_size, ngrow, shapes); + return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); } int* warpx_getBfieldFPLoVects(int lev, int direction, @@ -269,9 +273,9 @@ extern "C" } double** warpx_getCurrentDensity(int lev, int direction, - int *return_size, int *ngrow, int **shapes) { + int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getcurrent(lev, direction); - return getMultiFabPointers(mf, return_size, ngrow, shapes); + return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); } int* warpx_getCurrentDensityLoVects(int lev, int direction, @@ -281,9 +285,9 @@ extern "C" } double** warpx_getCurrentDensityCP(int lev, int direction, - int *return_size, int *ngrow, int **shapes) { + int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getcurrent_cp(lev, direction); - return getMultiFabPointers(mf, return_size, ngrow, shapes); + return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); } int* warpx_getCurrentDensityCPLoVects(int lev, int direction, @@ -293,9 +297,9 @@ extern "C" } double** warpx_getCurrentDensityFP(int lev, int direction, - int *return_size, int *ngrow, int **shapes) { + int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getcurrent_fp(lev, direction); - return getMultiFabPointers(mf, return_size, ngrow, shapes); + return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); } int* warpx_getCurrentDensityFPLoVects(int lev, int direction, diff --git a/Source/Python/WarpXWrappers.h b/Source/Python/WarpXWrappers.h index 94fbb0d30..44e0ed4e1 100644 --- a/Source/Python/WarpXWrappers.h +++ b/Source/Python/WarpXWrappers.h @@ -62,19 +62,19 @@ extern "C" { long warpx_getNumParticles(int speciesnumber); double** warpx_getEfield(int lev, int direction, - int *return_size, int* ngrow, int **shapes); + int *return_size, int* ncomps, int* ngrow, int **shapes); int* warpx_getEfieldLoVects(int lev, int direction, int *return_size, int* ngrow); double** warpx_getBfield(int lev, int direction, - int *return_size, int* ngrow, int **shapes); + int *return_size, int* ncomps, int* ngrow, int **shapes); int* warpx_getBfieldLoVects(int lev, int direction, int *return_size, int* ngrow); double** warpx_getCurrentDensity(int lev, int direction, - int *return_size, int* ngrow, int **shapes); + int *return_size, int* ncomps, int* ngrow, int **shapes); int* warpx_getCurrentDensityLoVects(int lev, int direction, int *return_size, int* ngrow); diff --git a/Source/WarpX.H b/Source/WarpX.H index 4ad3d119f..ca7596589 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -81,6 +81,7 @@ public: static amrex::Vector<amrex::Real> B_external; // Algorithms + static long use_picsar_deposition; static long current_deposition_algo; static long charge_deposition_algo; static long field_gathering_algo; @@ -656,7 +657,8 @@ private: void EvolvePSATD (int numsteps); void PushPSATD (amrex::Real dt); - void PushPSATD (int lev, amrex::Real dt); + void PushPSATD_localFFT (int lev, amrex::Real dt); + void PushPSATD_hybridFFT (int lev, amrex::Real dt); #endif diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 877882037..08948227c 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -37,6 +37,7 @@ Vector<int> WarpX::boost_direction = {0,0,0}; int WarpX::do_compute_max_step_from_zmax = 0; Real WarpX::zmax_plasma_to_compute_max_step = 0.; +long WarpX::use_picsar_deposition = 1; long WarpX::current_deposition_algo; long WarpX::charge_deposition_algo; long WarpX::field_gathering_algo; @@ -484,7 +485,17 @@ WarpX::ReadParameters () { ParmParse pp("algo"); + // If not in RZ mode, read use_picsar_deposition + // In RZ mode, use_picsar_deposition is on, as the C++ version + // of the deposition does not support RZ +#ifndef WARPX_RZ + pp.query("use_picsar_deposition", use_picsar_deposition); +#endif current_deposition_algo = GetAlgorithmInteger(pp, "current_deposition"); + if (!use_picsar_deposition){ + AMREX_ALWAYS_ASSERT_WITH_MESSAGE( current_deposition_algo >= 2, + "if not use_picsar_deposition, cannot use Esirkepov deposition."); + } charge_deposition_algo = GetAlgorithmInteger(pp, "charge_deposition"); field_gathering_algo = GetAlgorithmInteger(pp, "field_gathering"); particle_pusher_algo = GetAlgorithmInteger(pp, "particle_pusher"); @@ -556,8 +567,10 @@ WarpX::MakeNewLevelFromScratch (int lev, Real time, const BoxArray& new_grids, InitLevelData(lev, time); #ifdef WARPX_USE_PSATD - AllocLevelDataFFT(lev); - InitLevelDataFFT(lev, time); + if (fft_hybrid_mpi_decomposition){ + AllocLevelDataFFT(lev); + InitLevelDataFFT(lev, time); + } #endif } @@ -693,6 +706,37 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d // CKC solver requires one additional guard cell if (maxwell_fdtd_solver_id == 1) ngF = std::max( ngF, 1 ); +#ifdef WARPX_USE_PSATD + if (fft_hybrid_mpi_decomposition == false){ + // All boxes should have the same number of guard cells + // (to avoid temporary parallel copies) + // Thus take the max of the required number of guards for each field + // Also: the number of guard cell should be enough to contain + // the stencil of the FFT solver. Here, this number (`ngFFT`) + // is determined *empirically* to be the order of the solver + // for nodal, and half the order of the solver for staggered. + IntVect ngFFT; + if (do_nodal) { + ngFFT = IntVect(AMREX_D_DECL(nox_fft, noy_fft, noz_fft)); + } else { + ngFFT = IntVect(AMREX_D_DECL(nox_fft/2, noy_fft/2, noz_fft/2)); + } + for (int i_dim=0; i_dim<AMREX_SPACEDIM; i_dim++ ){ + int ng_required = ngFFT[i_dim]; + // Get the max + ng_required = std::max( ng_required, ngE[i_dim] ); + ng_required = std::max( ng_required, ngJ[i_dim] ); + ng_required = std::max( ng_required, ngRho[i_dim] ); + ng_required = std::max( ng_required, ngF ); + // Set the guard cells to this max + ngE[i_dim] = ng_required; + ngJ[i_dim] = ng_required; + ngRho[i_dim] = ng_required; + ngF = ng_required; + } + } +#endif + AllocLevelMFs(lev, ba, dm, ngE, ngJ, ngRho, ngF); } @@ -743,6 +787,21 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm rho_fp[lev].reset(new MultiFab(amrex::convert(ba,IntVect::TheUnitVector()),dm,2,ngRho)); rho_fp_owner_masks[lev] = std::move(rho_fp[lev]->OwnerMask(period)); } + if (fft_hybrid_mpi_decomposition == false){ + // Allocate and initialize the spectral solver + std::array<Real,3> dx = CellSize(lev); +#if (AMREX_SPACEDIM == 3) + RealVect dx_vect(dx[0], dx[1], dx[2]); +#elif (AMREX_SPACEDIM == 2) + RealVect dx_vect(dx[0], dx[2]); +#endif + // Get the cell-centered box, with guard cells + BoxArray realspace_ba = ba; // Copy box + realspace_ba.enclosedCells().grow(ngE); // cell-centered + guard cells + // Define spectral solver + spectral_solver_fp[lev].reset( new SpectralSolver( realspace_ba, dm, + nox_fft, noy_fft, noz_fft, do_nodal, dx_vect, dt[lev] ) ); + } #endif // |