aboutsummaryrefslogtreecommitdiff
path: root/Source
diff options
context:
space:
mode:
Diffstat (limited to 'Source')
-rw-r--r--Source/.DS_Storebin0 -> 6148 bytes
-rw-r--r--Source/Diagnostics/WarpXIO.cpp1
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp43
-rw-r--r--Source/FieldSolver/WarpXFFT.cpp193
-rw-r--r--Source/FortranInterface/WarpX_f.F9033
-rw-r--r--Source/FortranInterface/WarpX_f.H6
-rw-r--r--Source/Laser/LaserParticleContainer.cpp12
-rw-r--r--Source/Particles/Deposition/CurrentDeposition.H179
-rw-r--r--Source/Particles/Deposition/Make.package3
-rw-r--r--Source/Particles/Make.package1
-rw-r--r--Source/Particles/PhysicalParticleContainer.H3
-rw-r--r--Source/Particles/PhysicalParticleContainer.cpp658
-rw-r--r--Source/Particles/RigidInjectedParticleContainer.cpp20
-rw-r--r--Source/Particles/WarpXParticleContainer.H19
-rw-r--r--Source/Particles/WarpXParticleContainer.cpp144
-rw-r--r--Source/Python/WarpXWrappers.cpp50
-rw-r--r--Source/Python/WarpXWrappers.h6
-rw-r--r--Source/WarpX.H4
-rw-r--r--Source/WarpX.cpp63
19 files changed, 903 insertions, 535 deletions
diff --git a/Source/.DS_Store b/Source/.DS_Store
new file mode 100644
index 000000000..01640e062
--- /dev/null
+++ b/Source/.DS_Store
Binary files differ
diff --git a/Source/Diagnostics/WarpXIO.cpp b/Source/Diagnostics/WarpXIO.cpp
index 24272c588..38399bf9e 100644
--- a/Source/Diagnostics/WarpXIO.cpp
+++ b/Source/Diagnostics/WarpXIO.cpp
@@ -755,7 +755,6 @@ WarpX::WriteSlicePlotFile () const
// writing plotfile //
const std::string& slice_plotfilename = amrex::Concatenate(slice_plot_file,istep[0]);
amrex::Print() << " Writing slice plotfile " << slice_plotfilename << "\n";
- const int ngrow = 0;
Vector<std::string> rfs;
VisMF::Header::Version current_version = VisMF::GetHeaderVersion();
diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
index a2b695568..948baf0a6 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
@@ -55,30 +55,30 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba,
#ifdef AMREX_USE_GPU
// Create cuFFT plans
// Creating 3D plan for real to complex -- double precision
- // Assuming CUDA is used for programming GPU
- // Note that D2Z is inherently forward plan
- // and Z2D is inherently backward plan
+ // Assuming CUDA is used for programming GPU
+ // Note that D2Z is inherently forward plan
+ // and Z2D is inherently backward plan
cufftResult result;
#if (AMREX_SPACEDIM == 3)
- result = cufftPlan3d( &forward_plan[mfi], fft_size[2],
+ result = cufftPlan3d( &forward_plan[mfi], fft_size[2],
fft_size[1],fft_size[0], CUFFT_D2Z);
if ( result != CUFFT_SUCCESS ) {
amrex::Print() << " cufftplan3d forward failed! \n";
}
- result = cufftPlan3d( &backward_plan[mfi], fft_size[2],
+ result = cufftPlan3d( &backward_plan[mfi], fft_size[2],
fft_size[1], fft_size[0], CUFFT_Z2D);
if ( result != CUFFT_SUCCESS ) {
amrex::Print() << " cufftplan3d backward failed! \n";
}
#else
- result = cufftPlan2d( &forward_plan[mfi], fft_size[1],
+ result = cufftPlan2d( &forward_plan[mfi], fft_size[1],
fft_size[0], CUFFT_D2Z );
if ( result != CUFFT_SUCCESS ) {
amrex::Print() << " cufftplan2d forward failed! \n";
}
- result = cufftPlan2d( &backward_plan[mfi], fft_size[1],
+ result = cufftPlan2d( &backward_plan[mfi], fft_size[1],
fft_size[0], CUFFT_Z2D );
if ( result != CUFFT_SUCCESS ) {
amrex::Print() << " cufftplan2d backward failed! \n";
@@ -162,20 +162,20 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf,
Array4<Real> tmp_arr = tmpRealField[mfi].array();
ParallelFor( realspace_bx,
[=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
- tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp);
+ tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp);
});
}
// Perform Fourier transform from `tmpRealField` to `tmpSpectralField`
#ifdef AMREX_USE_GPU
- // Perform Fast Fourier Transform on GPU using cuFFT
- // make sure that this is done on the same
- // GPU stream as the above copy
+ // Perform Fast Fourier Transform on GPU using cuFFT
+ // make sure that this is done on the same
+ // GPU stream as the above copy
cufftResult result;
- cudaStream_t stream = amrex::Gpu::Device::cudaStream();
+ cudaStream_t stream = amrex::Gpu::Device::cudaStream();
cufftSetStream ( forward_plan[mfi], stream);
- result = cufftExecD2Z( forward_plan[mfi],
- tmpRealField[mfi].dataPtr(),
+ result = cufftExecD2Z( forward_plan[mfi],
+ tmpRealField[mfi].dataPtr(),
reinterpret_cast<cuDoubleComplex*>(
tmpSpectralField[mfi].dataPtr()) );
if ( result != CUFFT_SUCCESS ) {
@@ -271,13 +271,13 @@ SpectralFieldData::BackwardTransform( MultiFab& mf,
// Perform Fourier transform from `tmpSpectralField` to `tmpRealField`
#ifdef AMREX_USE_GPU
- // Perform Fast Fourier Transform on GPU using cuFFT.
- // make sure that this is done on the same
+ // Perform Fast Fourier Transform on GPU using cuFFT.
+ // make sure that this is done on the same
// GPU stream as the above copy
cufftResult result;
- cudaStream_t stream = amrex::Gpu::Device::cudaStream();
+ cudaStream_t stream = amrex::Gpu::Device::cudaStream();
cufftSetStream ( backward_plan[mfi], stream);
- result = cufftExecZ2D( backward_plan[mfi],
+ result = cufftExecZ2D( backward_plan[mfi],
reinterpret_cast<cuDoubleComplex*>(
tmpSpectralField[mfi].dataPtr()),
tmpRealField[mfi].dataPtr() );
@@ -289,16 +289,17 @@ SpectralFieldData::BackwardTransform( MultiFab& mf,
#endif
// Copy the temporary field `tmpRealField` to the real-space field `mf`
-
+ // (only in the valid cells ; not in the guard cells)
// Normalize (divide by 1/N) since the FFT+IFFT results in a factor N
{
- const Box realspace_bx = tmpRealField[mfi].box();
Array4<Real> mf_arr = mf[mfi].array();
Array4<const Real> tmp_arr = tmpRealField[mfi].array();
// Normalization: divide by the number of points in realspace
+ // (includes the guard cells)
+ const Box realspace_bx = tmpRealField[mfi].box();
const Real inv_N = 1./realspace_bx.numPts();
- ParallelFor( realspace_bx,
+ ParallelFor( mfi.validbox(),
[=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
// Copy and normalize field
mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k);
diff --git a/Source/FieldSolver/WarpXFFT.cpp b/Source/FieldSolver/WarpXFFT.cpp
index 13d92f6f3..f9cd7570a 100644
--- a/Source/FieldSolver/WarpXFFT.cpp
+++ b/Source/FieldSolver/WarpXFFT.cpp
@@ -90,7 +90,7 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba
const FArrayBox& srcfab = mf_fft[mfi];
const Box& srcbox = srcfab.box();
-
+
if (srcbox.contains(bx))
{
// Copy the interior region (without guard cells)
@@ -109,7 +109,7 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba
mf.setVal(0.0, 0);
mf.ParallelAdd(mftmp);
-
+
}
}
@@ -129,19 +129,6 @@ WarpX::AllocLevelDataFFT (int lev)
FFTDomainDecomposition(lev, ba_fp_fft, dm_fp_fft, ba_valid_fp_fft[lev], domain_fp_fft[lev],
geom[lev].Domain());
- if (fft_hybrid_mpi_decomposition == false){
- // Allocate and initialize objects for the spectral solver
- // (all use the same distribution mapping)
- std::array<Real,3> dx = CellSize(lev);
-#if (AMREX_SPACEDIM == 3)
- RealVect dx_vect(dx[0], dx[1], dx[2]);
-#elif (AMREX_SPACEDIM == 2)
- RealVect dx_vect(dx[0], dx[2]);
-#endif
- spectral_solver_fp[lev].reset( new SpectralSolver( ba_fp_fft, dm_fp_fft,
- nox_fft, noy_fft, noz_fft, do_nodal, dx_vect, dt[lev] ) );
- }
-
// rho2 has one extra ghost cell, so that it's safe to deposit charge density after
// pushing particle.
@@ -383,105 +370,106 @@ WarpX::PushPSATD (amrex::Real a_dt)
{
for (int lev = 0; lev <= finest_level; ++lev) {
AMREX_ALWAYS_ASSERT_WITH_MESSAGE(dt[lev] == a_dt, "dt must be consistent");
- PushPSATD(lev, a_dt);
+ if (fft_hybrid_mpi_decomposition){
+ PushPSATD_hybridFFT(lev, a_dt);
+ } else {
+ PushPSATD_localFFT(lev, a_dt);
+ }
}
}
+void WarpX::PushPSATD_localFFT (int lev, amrex::Real /* dt */)
+{
+ auto& solver = *spectral_solver_fp[lev];
+
+ // Perform forward Fourier transform
+ solver.ForwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex);
+ solver.ForwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey);
+ solver.ForwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez);
+ solver.ForwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx);
+ solver.ForwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By);
+ solver.ForwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz);
+ solver.ForwardTransform(*current_fp[lev][0], SpectralFieldIndex::Jx);
+ solver.ForwardTransform(*current_fp[lev][1], SpectralFieldIndex::Jy);
+ solver.ForwardTransform(*current_fp[lev][2], SpectralFieldIndex::Jz);
+ solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_old, 0);
+ solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_new, 1);
+
+ // Advance fields in spectral space
+ solver.pushSpectralFields();
+
+ // Perform backward Fourier Transform
+ solver.BackwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex);
+ solver.BackwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey);
+ solver.BackwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez);
+ solver.BackwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx);
+ solver.BackwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By);
+ solver.BackwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz);
+}
+
void
-WarpX::PushPSATD (int lev, amrex::Real /* dt */)
+WarpX::PushPSATD_hybridFFT (int lev, amrex::Real /* dt */)
{
+#ifndef AMREX_USE_CUDA // Running on CPU ; use PICSAR code for the hybrid FFT
+
BL_PROFILE_VAR_NS("WarpXFFT::CopyDualGrid", blp_copy);
BL_PROFILE_VAR_NS("PICSAR::FftPushEB", blp_push_eb);
auto period_fp = geom[lev].periodicity();
BL_PROFILE_VAR_START(blp_copy);
- Efield_fp_fft[lev][0]->ParallelCopy(*Efield_fp[lev][0], 0, 0, 1, 0, 0, period_fp);
- Efield_fp_fft[lev][1]->ParallelCopy(*Efield_fp[lev][1], 0, 0, 1, 0, 0, period_fp);
- Efield_fp_fft[lev][2]->ParallelCopy(*Efield_fp[lev][2], 0, 0, 1, 0, 0, period_fp);
- Bfield_fp_fft[lev][0]->ParallelCopy(*Bfield_fp[lev][0], 0, 0, 1, 0, 0, period_fp);
- Bfield_fp_fft[lev][1]->ParallelCopy(*Bfield_fp[lev][1], 0, 0, 1, 0, 0, period_fp);
- Bfield_fp_fft[lev][2]->ParallelCopy(*Bfield_fp[lev][2], 0, 0, 1, 0, 0, period_fp);
- current_fp_fft[lev][0]->ParallelCopy(*current_fp[lev][0], 0, 0, 1, 0, 0, period_fp);
- current_fp_fft[lev][1]->ParallelCopy(*current_fp[lev][1], 0, 0, 1, 0, 0, period_fp);
- current_fp_fft[lev][2]->ParallelCopy(*current_fp[lev][2], 0, 0, 1, 0, 0, period_fp);
- rho_fp_fft[lev]->ParallelCopy(*rho_fp[lev], 0, 0, 2, 0, 0, period_fp);
+ Efield_fp_fft[lev][0]->ParallelCopy(*Efield_fp[lev][0], 0, 0, 1, Efield_fp[lev][0]->nGrow(), 0, period_fp);
+ Efield_fp_fft[lev][1]->ParallelCopy(*Efield_fp[lev][1], 0, 0, 1, Efield_fp[lev][1]->nGrow(), 0, period_fp);
+ Efield_fp_fft[lev][2]->ParallelCopy(*Efield_fp[lev][2], 0, 0, 1, Efield_fp[lev][2]->nGrow(), 0, period_fp);
+ Bfield_fp_fft[lev][0]->ParallelCopy(*Bfield_fp[lev][0], 0, 0, 1, Bfield_fp[lev][0]->nGrow(), 0, period_fp);
+ Bfield_fp_fft[lev][1]->ParallelCopy(*Bfield_fp[lev][1], 0, 0, 1, Bfield_fp[lev][1]->nGrow(), 0, period_fp);
+ Bfield_fp_fft[lev][2]->ParallelCopy(*Bfield_fp[lev][2], 0, 0, 1, Bfield_fp[lev][2]->nGrow(), 0, period_fp);
+ current_fp_fft[lev][0]->ParallelCopy(*current_fp[lev][0], 0, 0, 1, current_fp[lev][0]->nGrow(), 0, period_fp);
+ current_fp_fft[lev][1]->ParallelCopy(*current_fp[lev][1], 0, 0, 1, current_fp[lev][1]->nGrow(), 0, period_fp);
+ current_fp_fft[lev][2]->ParallelCopy(*current_fp[lev][2], 0, 0, 1, current_fp[lev][2]->nGrow(), 0, period_fp);
+ rho_fp_fft[lev]->ParallelCopy(*rho_fp[lev], 0, 0, 2, rho_fp[lev]->nGrow(), 0, period_fp);
BL_PROFILE_VAR_STOP(blp_copy);
BL_PROFILE_VAR_START(blp_push_eb);
- if (fft_hybrid_mpi_decomposition){
-#ifndef AMREX_USE_CUDA // When running on CPU: use PICSAR code
- if (Efield_fp_fft[lev][0]->local_size() == 1)
- //Only one FFT patch on this MPI
- {
- for (MFIter mfi(*Efield_fp_fft[lev][0]); mfi.isValid(); ++mfi)
- {
- warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][0])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][1])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][2])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][0])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][1])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][2])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][0])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][1])[mfi]),
- WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][2])[mfi]),
- WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],0),
- WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],1));
- }
- }
- else if (Efield_fp_fft[lev][0]->local_size() == 0)
- // No FFT patch on this MPI rank
- // Still need to call the MPI-FFT routine.
- {
- FArrayBox fab(Box(IntVect::TheZeroVector(), IntVect::TheUnitVector()));
- warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab),
- WARPX_TO_FORTRAN_ANYD(fab));
- }
- else
- // Multiple FFT patches on this MPI rank
- {
- amrex::Abort("WarpX::PushPSATD: TODO");
- }
-#else // AMREX_USE_CUDA is defined ; running on GPU
- amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU.");
-#endif
- } else {
- // Not using the hybrid decomposition
- auto& solver = *spectral_solver_fp[lev];
-
- // Perform forward Fourier transform
- solver.ForwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex);
- solver.ForwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey);
- solver.ForwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez);
- solver.ForwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx);
- solver.ForwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By);
- solver.ForwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz);
- solver.ForwardTransform(*current_fp_fft[lev][0], SpectralFieldIndex::Jx);
- solver.ForwardTransform(*current_fp_fft[lev][1], SpectralFieldIndex::Jy);
- solver.ForwardTransform(*current_fp_fft[lev][2], SpectralFieldIndex::Jz);
- solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_old, 0);
- solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_new, 1);
-
- // Advance fields in spectral space
- solver.pushSpectralFields();
-
- // Perform backward Fourier Transform
- solver.BackwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex);
- solver.BackwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey);
- solver.BackwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez);
- solver.BackwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx);
- solver.BackwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By);
- solver.BackwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz);
-
+ if (Efield_fp_fft[lev][0]->local_size() == 1)
+ //Only one FFT patch on this MPI
+ {
+ for (MFIter mfi(*Efield_fp_fft[lev][0]); mfi.isValid(); ++mfi)
+ {
+ warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][0])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][1])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][2])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][0])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][1])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][2])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][0])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][1])[mfi]),
+ WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][2])[mfi]),
+ WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],0),
+ WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],1));
+ }
+ }
+ else if (Efield_fp_fft[lev][0]->local_size() == 0)
+ // No FFT patch on this MPI rank
+ // Still need to call the MPI-FFT routine.
+ {
+ FArrayBox fab(Box(IntVect::TheZeroVector(), IntVect::TheUnitVector()));
+ warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab),
+ WARPX_TO_FORTRAN_ANYD(fab));
+ }
+ else
+ // Multiple FFT patches on this MPI rank
+ {
+ amrex::Abort("WarpX::PushPSATD: TODO");
}
BL_PROFILE_VAR_STOP(blp_push_eb);
@@ -498,7 +486,8 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */)
{
amrex::Abort("WarpX::PushPSATD: TODO");
}
+#else // AMREX_USE_CUDA is defined ; running on GPU
+ amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU.");
+#endif
}
-
-
diff --git a/Source/FortranInterface/WarpX_f.F90 b/Source/FortranInterface/WarpX_f.F90
index 0560a9981..542cc95a1 100644
--- a/Source/FortranInterface/WarpX_f.F90
+++ b/Source/FortranInterface/WarpX_f.F90
@@ -8,39 +8,6 @@ module warpx_module
contains
- subroutine warpx_copy_attribs(np, xp, yp, zp, uxp, uyp, uzp, &
- xpold, ypold, zpold, uxpold, uypold, uzpold) &
- bind(c,name='warpx_copy_attribs')
- integer(c_long), intent(in) :: np
- real(amrex_real), intent(in) :: xp(np)
- real(amrex_real), intent(in) :: yp(np)
- real(amrex_real), intent(in) :: zp(np)
- real(amrex_real), intent(in) :: uxp(np)
- real(amrex_real), intent(in) :: uyp(np)
- real(amrex_real), intent(in) :: uzp(np)
- real(amrex_real), intent(inout) :: xpold(np)
- real(amrex_real), intent(inout) :: ypold(np)
- real(amrex_real), intent(inout) :: zpold(np)
- real(amrex_real), intent(inout) :: uxpold(np)
- real(amrex_real), intent(inout) :: uypold(np)
- real(amrex_real), intent(inout) :: uzpold(np)
-
- integer n
-
- do n = 1, np
-
- xpold(n) = xp(n)
- ypold(n) = yp(n)
- zpold(n) = zp(n)
-
- uxpold(n) = uxp(n)
- uypold(n) = uyp(n)
- uzpold(n) = uzp(n)
-
- end do
-
- end subroutine warpx_copy_attribs
-
subroutine warpx_compute_E (lo, hi, &
phi, phlo, phhi, &
Ex, Exlo, Exhi, &
diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H
index 98dd8685d..0440148eb 100644
--- a/Source/FortranInterface/WarpX_f.H
+++ b/Source/FortranInterface/WarpX_f.H
@@ -75,12 +75,6 @@ extern "C"
{
#endif
- void warpx_copy_attribs(const long* np,
- const amrex_real* xp, const amrex_real* yp, const amrex_real* zp,
- const amrex_real* uxp, const amrex_real* uyp, const amrex_real* uzp,
- amrex_real* xpold, amrex_real* ypold, amrex_real* zpold,
- amrex_real* uxpold, amrex_real* uypold, amrex_real* uzpold);
-
// Charge deposition
void warpx_charge_deposition(amrex::Real* rho,
const long* np, const amrex::Real* xp, const amrex::Real* yp, const amrex::Real* zp, const amrex::Real* w,
diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp
index 515aa1f5d..3d3447a3c 100644
--- a/Source/Laser/LaserParticleContainer.cpp
+++ b/Source/Laser/LaserParticleContainer.cpp
@@ -504,15 +504,15 @@ LaserParticleContainer::Evolve (int lev,
// Current Deposition
//
// Deposit inside domains
- DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz,
- 0, np_current, thread_num,
- lev, lev, dt);
+ DepositCurrentFortran(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz,
+ 0, np_current, thread_num,
+ lev, lev, dt);
bool has_buffer = cjx;
if (has_buffer){
// Deposit in buffers
- DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz,
- np_current, np-np_current, thread_num,
- lev, lev-1, dt);
+ DepositCurrentFortran(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz,
+ np_current, np-np_current, thread_num,
+ lev, lev-1, dt);
}
//
diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H
new file mode 100644
index 000000000..efda97892
--- /dev/null
+++ b/Source/Particles/Deposition/CurrentDeposition.H
@@ -0,0 +1,179 @@
+#ifndef CURRENTDEPOSITION_H_
+#define CURRENTDEPOSITION_H_
+
+using namespace amrex;
+
+// Compute shape factor and return index of leftmost cell where
+// particle writes.
+// Specialized templates are defined below for orders 1, 2 and 3.
+template <int depos_order>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor(Real* const sx, Real xint) {return 0;};
+
+// Compute shape factor for order 1.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor <1> (Real* const sx, Real xmid){
+ int j = (int) xmid;
+ Real xint = xmid-j;
+ sx[0] = 1.0 - xint;
+ sx[1] = xint;
+ return j;
+}
+
+// Compute shape factor for order 2.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor <2> (Real* const sx, Real xmid){
+ int j = (int) (xmid+0.5);
+ Real xint = xmid-j;
+ sx[0] = 0.5*(0.5-xint)*(0.5-xint);
+ sx[1] = 0.75-xint*xint;
+ sx[2] = 0.5*(0.5+xint)*(0.5+xint);
+ // index of the leftmost cell where particle deposits
+ return j-1;
+}
+
+// Compute shape factor for order 3.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor <3> (Real* const sx, Real xmid){
+ int j = (int) xmid;
+ Real xint = xmid-j;
+ sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint);
+ sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0);
+ sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint));
+ sx[3] = 1.0/6.0*xint*xint*xint;
+ // index of the leftmost cell where particle deposits
+ return j-1;
+}
+
+/* \brief Current Deposition for thread thread_num
+ * /param xp, yp, zp : Pointer to arrays of particle positions.
+ * \param wp : Pointer to array of particle weights.
+ * \param uxp uyp uzp : Pointer to arrays of particle momentum.
+ * \param jx_arr : Array4 of current density, either full array or tile.
+ * \param jy_arr : Array4 of current density, either full array or tile.
+ * \param jz_arr : Array4 of current density, either full array or tile.
+ * \param offset : Index of first particle for which current is deposited
+ * \param np_to_depose : Number of particles for which current is deposited.
+ Particles [offset,offset+np_tp_depose] deposit current.
+ * \param dt : Time step for particle level
+ * \param dx : 3D cell size
+ * \param xyzmin : Physical lower bounds of domain.
+ * \param lo : Index lower bounds of domain.
+ * \param stagger_shift: 0 if nodal, 0.5 if staggered.
+ * /param q : species charge.
+ */
+template <int depos_order>
+void doDepositionShapeN(const Real * const xp, const Real * const yp, const Real * const zp,
+ const Real * const wp, const Real * const uxp,
+ const Real * const uyp, const Real * const uzp,
+ const amrex::Array4<amrex::Real>& jx_arr,
+ const amrex::Array4<amrex::Real>& jy_arr,
+ const amrex::Array4<amrex::Real>& jz_arr,
+ const long offset, const long np_to_depose,
+ const amrex::Real dt, const std::array<amrex::Real,3>& dx,
+ const std::array<Real, 3> xyzmin,
+ const Dim3 lo,
+ const amrex::Real stagger_shift,
+ const amrex::Real q)
+{
+ const Real dxi = 1.0/dx[0];
+ const Real dzi = 1.0/dx[2];
+ const Real dts2dx = 0.5*dt*dxi;
+ const Real dts2dz = 0.5*dt*dzi;
+#if (AMREX_SPACEDIM == 2)
+ const Real invvol = dxi*dzi;
+#else // (AMREX_SPACEDIM == 3)
+ const Real dyi = 1.0/dx[1];
+ const Real dts2dy = 0.5*dt*dyi;
+ const Real invvol = dxi*dyi*dzi;
+#endif
+
+ const Real xmin = xyzmin[0];
+ const Real ymin = xyzmin[1];
+ const Real zmin = xyzmin[2];
+ const Real clightsq = 1.0/PhysConst::c/PhysConst::c;
+
+ // Loop over particles and deposit into jx_arr, jy_arr and jz_arr
+ ParallelFor( np_to_depose,
+ [=] AMREX_GPU_DEVICE (long ip) {
+ // --- Get particle quantities
+ const Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq
+ + uyp[ip]*uyp[ip]*clightsq
+ + uzp[ip]*uzp[ip]*clightsq);
+ const Real wq = q*wp[ip];
+ const Real vx = uxp[ip]*gaminv;
+ const Real vy = uyp[ip]*gaminv;
+ const Real vz = uzp[ip]*gaminv;
+ // wqx, wqy wqz are particle current in each direction
+ const Real wqx = wq*invvol*vx;
+ const Real wqy = wq*invvol*vy;
+ const Real wqz = wq*invvol*vz;
+
+ // --- Compute shape factors
+ // x direction
+ // Get particle position after 1/2 push back in position
+ const Real xmid = (xp[ip]-xmin)*dxi-dts2dx*vx;
+ // Compute shape factors for node-centered quantities
+ Real AMREX_RESTRICT sx [depos_order + 1];
+ // j: leftmost grid point (node-centered) that the particle touches
+ const int j = compute_shape_factor<depos_order>(sx, xmid);
+ // Compute shape factors for cell-centered quantities
+ Real AMREX_RESTRICT sx0[depos_order + 1];
+ // j0: leftmost grid point (cell-centered) that the particle touches
+ const int j0 = compute_shape_factor<depos_order>(sx0, xmid-stagger_shift);
+
+#if (AMREX_SPACEDIM == 3)
+ // y direction
+ const Real ymid= (yp[ip]-ymin)*dyi-dts2dy*vy;
+ Real AMREX_RESTRICT sy [depos_order + 1];
+ const int k = compute_shape_factor<depos_order>(sy, ymid);
+ Real AMREX_RESTRICT sy0[depos_order + 1];
+ const int k0 = compute_shape_factor<depos_order>(sy0, ymid-stagger_shift);
+#endif
+ // z direction
+ const Real zmid= (zp[ip]-zmin)*dzi-dts2dz*vz;
+ Real AMREX_RESTRICT sz [depos_order + 1];
+ const int l = compute_shape_factor<depos_order>(sz, zmid);
+ Real AMREX_RESTRICT sz0[depos_order + 1];
+ const int l0 = compute_shape_factor<depos_order>(sz0, zmid-stagger_shift);
+
+ // Deposit current into jx_arr, jy_arr and jz_arr
+#if (AMREX_SPACEDIM == 2)
+ for (int iz=0; iz<=depos_order; iz++){
+ for (int ix=0; ix<=depos_order; ix++){
+ amrex::Gpu::Atomic::Add(
+ &jx_arr(lo.x+j0+ix, lo.y+l +iz, 0),
+ sx0[ix]*sz [iz]*wqx);
+ amrex::Gpu::Atomic::Add(
+ &jy_arr(lo.x+j +ix, lo.y+l +iz, 0),
+ sx [ix]*sz [iz]*wqy);
+ amrex::Gpu::Atomic::Add(
+ &jz_arr(lo.x+j +ix, lo.y+l0+iz, 0),
+ sx [ix]*sz0[iz]*wqz);
+ }
+ }
+#else // (AMREX_SPACEDIM == 3)
+ for (int iz=0; iz<=depos_order; iz++){
+ for (int iy=0; iy<=depos_order; iy++){
+ for (int ix=0; ix<=depos_order; ix++){
+ amrex::Gpu::Atomic::Add(
+ &jx_arr(lo.x+j0+ix, lo.y+k +iy, lo.z+l +iz),
+ sx0[ix]*sy [iy]*sz [iz]*wqx);
+ amrex::Gpu::Atomic::Add(
+ &jy_arr(lo.x+j +ix, lo.y+k0+iy, lo.z+l +iz),
+ sx [ix]*sy0[iy]*sz [iz]*wqy);
+ amrex::Gpu::Atomic::Add(
+ &jz_arr(lo.x+j +ix, lo.y+k +iy, lo.z+l0+iz),
+ sx [ix]*sy [iy]*sz0[iz]*wqz);
+ }
+ }
+ }
+#endif
+ }
+ );
+}
+
+#endif // CURRENTDEPOSITION_H_
diff --git a/Source/Particles/Deposition/Make.package b/Source/Particles/Deposition/Make.package
new file mode 100644
index 000000000..0d5ebe2a7
--- /dev/null
+++ b/Source/Particles/Deposition/Make.package
@@ -0,0 +1,3 @@
+CEXE_headers += CurrentDeposition.H
+INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Deposition
+VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles/Deposition
diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package
index f33095738..2038472a1 100644
--- a/Source/Particles/Make.package
+++ b/Source/Particles/Make.package
@@ -11,6 +11,7 @@ CEXE_headers += RigidInjectedParticleContainer.H
CEXE_headers += PhysicalParticleContainer.H
include $(WARPX_HOME)/Source/Particles/Pusher/Make.package
+include $(WARPX_HOME)/Source/Particles/Deposition/Make.package
INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles
VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles
diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H
index bd8cfb47e..d55764682 100644
--- a/Source/Particles/PhysicalParticleContainer.H
+++ b/Source/Particles/PhysicalParticleContainer.H
@@ -77,6 +77,9 @@ public:
const amrex::MultiFab& Bx,
const amrex::MultiFab& By,
const amrex::MultiFab& Bz) override;
+
+ void copy_attribs(WarpXParIter& pti,const amrex::Real* xp,
+ const amrex::Real* yp, const amrex::Real* zp);
virtual void PostRestart () final {}
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 43b46ec49..d47a7b220 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -12,7 +12,7 @@ using namespace amrex;
long PhysicalParticleContainer::
NumParticlesToAdd(const Box& overlap_box, const RealBox& overlap_realbox,
- const RealBox& tile_realbox, const RealBox& particle_real_box)
+ const RealBox& tile_realbox, const RealBox& particle_real_box)
{
const int lev = 0;
const Geometry& geom = Geom(lev);
@@ -24,43 +24,43 @@ NumParticlesToAdd(const Box& overlap_box, const RealBox& overlap_realbox,
for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv))
{
int fac;
- if (do_continuous_injection) {
+ if (do_continuous_injection) {
#if ( AMREX_SPACEDIM == 3 )
- Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
- Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
- Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
+ Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
+ Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
+ Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
#elif ( AMREX_SPACEDIM == 2 )
- Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
- Real y = 0;
- Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
+ Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
+ Real y = 0;
+ Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
#endif
- fac = GetRefineFac(x, y, z);
- } else {
- fac = 1.0;
- }
+ fac = GetRefineFac(x, y, z);
+ } else {
+ fac = 1.0;
+ }
- int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
- for (int i_part=0; i_part<ref_num_ppc;i_part++) {
- std::array<Real, 3> r;
- plasma_injector->getPositionUnitBox(r, i_part, fac);
+ int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
+ for (int i_part=0; i_part<ref_num_ppc;i_part++) {
+ std::array<Real, 3> r;
+ plasma_injector->getPositionUnitBox(r, i_part, fac);
#if ( AMREX_SPACEDIM == 3 )
- Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
- Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
- Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
+ Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
+ Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
+ Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
#elif ( AMREX_SPACEDIM == 2 )
- Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
- Real y = 0;
- Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
+ Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
+ Real y = 0;
+ Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
#endif
- // If the new particle is not inside the tile box,
- // go to the next generated particle.
+ // If the new particle is not inside the tile box,
+ // go to the next generated particle.
#if ( AMREX_SPACEDIM == 3 )
- if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
+ if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
#elif ( AMREX_SPACEDIM == 2 )
- if(!tile_realbox.contains( RealVect{x, z} )) continue;
+ if(!tile_realbox.contains( RealVect{x, z} )) continue;
#endif
- ++np;
- }
+ ++np;
+ }
}
return np;
@@ -170,8 +170,8 @@ void PhysicalParticleContainer::MapParticletoBoostedFrame(Real& x, Real& y, Real
// Move the particles to where they will be at t = 0 in the boosted frame
if (boost_adjust_transverse_positions) {
- x = xpr - tpr*vxpr;
- y = ypr - tpr*vypr;
+ x = xpr - tpr*vxpr;
+ y = ypr - tpr*vypr;
}
z = zpr - tpr*vzpr;
@@ -323,9 +323,9 @@ void
PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
{
#ifdef AMREX_USE_GPU
- AddPlasmaGPU(lev, part_realbox);
+ AddPlasmaGPU(lev, part_realbox);
#else
- AddPlasmaCPU(lev, part_realbox);
+ AddPlasmaCPU(lev, part_realbox);
#endif
}
@@ -416,7 +416,7 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox)
// Count the number of cells in this direction in overlap_realbox
overlap_box.setSmall( dir, 0 );
overlap_box.setBig( dir,
- int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
+ int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
}
if (no_overlap == 1) {
continue; // Go to the next tile
@@ -483,54 +483,54 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox)
Real dens;
std::array<Real, 3> u;
if (WarpX::gamma_boost == 1.){
- // Lab-frame simulation
- // If the particle is not within the species's
- // xmin, xmax, ymin, ymax, zmin, zmax, go to
- // the next generated particle.
- if (!plasma_injector->insideBounds(xb, yb, z)) continue;
- plasma_injector->getMomentum(u, x, y, z);
- dens = plasma_injector->getDensity(x, y, z);
+ // Lab-frame simulation
+ // If the particle is not within the species's
+ // xmin, xmax, ymin, ymax, zmin, zmax, go to
+ // the next generated particle.
+ if (!plasma_injector->insideBounds(xb, yb, z)) continue;
+ plasma_injector->getMomentum(u, x, y, z);
+ dens = plasma_injector->getDensity(x, y, z);
} else {
- // Boosted-frame simulation
- Real c = PhysConst::c;
- Real gamma_boost = WarpX::gamma_boost;
- Real beta_boost = WarpX::beta_boost;
- // Since the user provides the density distribution
- // at t_lab=0 and in the lab-frame coordinates,
- // we need to find the lab-frame position of this
- // particle at t_lab=0, from its boosted-frame coordinates
- // Assuming ballistic motion, this is given by:
- // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
- // where betaz_lab is the speed of the particle in the lab frame
- //
- // In order for this equation to be solvable, betaz_lab
- // is explicitly assumed to have no dependency on z0_lab
- plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
- // At this point u is the lab-frame momentum
- // => Apply the above formula for z0_lab
- Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
- Real betaz_lab = u[2]/gamma_lab/c;
- Real t = WarpX::GetInstance().gett_new(lev);
- Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
- // If the particle is not within the lab-frame zmin, zmax, etc.
- // go to the next generated particle.
- if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue;
- // call `getDensity` with lab-frame parameters
- dens = plasma_injector->getDensity(x, y, z0_lab);
- // At this point u and dens are the lab-frame quantities
- // => Perform Lorentz transform
- dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
- u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
+ // Boosted-frame simulation
+ Real c = PhysConst::c;
+ Real gamma_boost = WarpX::gamma_boost;
+ Real beta_boost = WarpX::beta_boost;
+ // Since the user provides the density distribution
+ // at t_lab=0 and in the lab-frame coordinates,
+ // we need to find the lab-frame position of this
+ // particle at t_lab=0, from its boosted-frame coordinates
+ // Assuming ballistic motion, this is given by:
+ // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
+ // where betaz_lab is the speed of the particle in the lab frame
+ //
+ // In order for this equation to be solvable, betaz_lab
+ // is explicitly assumed to have no dependency on z0_lab
+ plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
+ // At this point u is the lab-frame momentum
+ // => Apply the above formula for z0_lab
+ Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
+ Real betaz_lab = u[2]/gamma_lab/c;
+ Real t = WarpX::GetInstance().gett_new(lev);
+ Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
+ // If the particle is not within the lab-frame zmin, zmax, etc.
+ // go to the next generated particle.
+ if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue;
+ // call `getDensity` with lab-frame parameters
+ dens = plasma_injector->getDensity(x, y, z0_lab);
+ // At this point u and dens are the lab-frame quantities
+ // => Perform Lorentz transform
+ dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
+ u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
}
Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac));
#ifdef WARPX_RZ
if (plasma_injector->radially_weighted) {
- weight *= 2*MathConst::pi*xb;
+ weight *= 2*MathConst::pi*xb;
} else {
- // This is not correct since it might shift the particle
- // out of the local grid
- x = std::sqrt(xb*rmax);
- weight *= dx[0];
+ // This is not correct since it might shift the particle
+ // out of the local grid
+ x = std::sqrt(xb*rmax);
+ weight *= dx[0];
}
#endif
attribs[PIdx::w ] = weight;
@@ -550,18 +550,18 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox)
particle_tile.push_back_real(particle_comps["uzold"], u[2]);
}
- AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs);
+ AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs);
}
}
if (cost) {
- wt = (amrex::second() - wt) / tile_box.d_numPts();
+ wt = (amrex::second() - wt) / tile_box.d_numPts();
Array4<Real> const& costarr = cost->array(mfi);
amrex::ParallelFor(tile_box,
- [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
- {
- costarr(i,j,k) += wt;
- });
+ [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+ {
+ costarr(i,j,k) += wt;
+ });
}
}
}
@@ -655,7 +655,7 @@ PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox)
// Count the number of cells in this direction in overlap_realbox
overlap_box.setSmall( dir, 0 );
overlap_box.setBig( dir,
- int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
+ int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
}
if (no_overlap == 1) {
continue; // Go to the next tile
@@ -664,8 +664,8 @@ PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox)
const int grid_id = mfi.index();
const int tile_id = mfi.LocalTileIndex();
- Cuda::HostVector<ParticleType> host_particles;
- std::array<Cuda::HostVector<Real>, PIdx::nattribs> host_attribs;
+ Cuda::HostVector<ParticleType> host_particles;
+ std::array<Cuda::HostVector<Real>, PIdx::nattribs> host_attribs;
// Loop through the cells of overlap_box and inject
// the corresponding particles
@@ -725,54 +725,54 @@ PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox)
Real dens;
std::array<Real, 3> u;
if (WarpX::gamma_boost == 1.){
- // Lab-frame simulation
- // If the particle is not within the species's
- // xmin, xmax, ymin, ymax, zmin, zmax, go to
- // the next generated particle.
- if (!plasma_injector->insideBounds(xb, yb, z)) continue;
- plasma_injector->getMomentum(u, x, y, z);
- dens = plasma_injector->getDensity(x, y, z);
+ // Lab-frame simulation
+ // If the particle is not within the species's
+ // xmin, xmax, ymin, ymax, zmin, zmax, go to
+ // the next generated particle.
+ if (!plasma_injector->insideBounds(xb, yb, z)) continue;
+ plasma_injector->getMomentum(u, x, y, z);
+ dens = plasma_injector->getDensity(x, y, z);
} else {
- // Boosted-frame simulation
- Real c = PhysConst::c;
- Real gamma_boost = WarpX::gamma_boost;
- Real beta_boost = WarpX::beta_boost;
- // Since the user provides the density distribution
- // at t_lab=0 and in the lab-frame coordinates,
- // we need to find the lab-frame position of this
- // particle at t_lab=0, from its boosted-frame coordinates
- // Assuming ballistic motion, this is given by:
- // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
- // where betaz_lab is the speed of the particle in the lab frame
- //
- // In order for this equation to be solvable, betaz_lab
- // is explicitly assumed to have no dependency on z0_lab
- plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
- // At this point u is the lab-frame momentum
- // => Apply the above formula for z0_lab
- Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
- Real betaz_lab = u[2]/gamma_lab/c;
- Real t = WarpX::GetInstance().gett_new(lev);
- Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
- // If the particle is not within the lab-frame zmin, zmax, etc.
- // go to the next generated particle.
- if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue;
- // call `getDensity` with lab-frame parameters
- dens = plasma_injector->getDensity(x, y, z0_lab);
- // At this point u and dens are the lab-frame quantities
- // => Perform Lorentz transform
- dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
- u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
+ // Boosted-frame simulation
+ Real c = PhysConst::c;
+ Real gamma_boost = WarpX::gamma_boost;
+ Real beta_boost = WarpX::beta_boost;
+ // Since the user provides the density distribution
+ // at t_lab=0 and in the lab-frame coordinates,
+ // we need to find the lab-frame position of this
+ // particle at t_lab=0, from its boosted-frame coordinates
+ // Assuming ballistic motion, this is given by:
+ // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
+ // where betaz_lab is the speed of the particle in the lab frame
+ //
+ // In order for this equation to be solvable, betaz_lab
+ // is explicitly assumed to have no dependency on z0_lab
+ plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
+ // At this point u is the lab-frame momentum
+ // => Apply the above formula for z0_lab
+ Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
+ Real betaz_lab = u[2]/gamma_lab/c;
+ Real t = WarpX::GetInstance().gett_new(lev);
+ Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
+ // If the particle is not within the lab-frame zmin, zmax, etc.
+ // go to the next generated particle.
+ if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue;
+ // call `getDensity` with lab-frame parameters
+ dens = plasma_injector->getDensity(x, y, z0_lab);
+ // At this point u and dens are the lab-frame quantities
+ // => Perform Lorentz transform
+ dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
+ u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
}
Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac));
#ifdef WARPX_RZ
if (plasma_injector->radially_weighted) {
- weight *= 2*MathConst::pi*xb;
+ weight *= 2*MathConst::pi*xb;
} else {
- // This is not correct since it might shift the particle
- // out of the local grid
- x = std::sqrt(xb*rmax);
- weight *= dx[0];
+ // This is not correct since it might shift the particle
+ // out of the local grid
+ x = std::sqrt(xb*rmax);
+ weight *= dx[0];
}
#endif
attribs[PIdx::w ] = weight;
@@ -793,50 +793,50 @@ PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox)
particle_tile.push_back_real(particle_comps["uzold"], u[2]);
}
- ParticleType p;
- p.id() = ParticleType::NextID();
- p.cpu() = ParallelDescriptor::MyProc();
+ ParticleType p;
+ p.id() = ParticleType::NextID();
+ p.cpu() = ParallelDescriptor::MyProc();
#if (AMREX_SPACEDIM == 3)
- p.pos(0) = x;
- p.pos(1) = y;
- p.pos(2) = z;
+ p.pos(0) = x;
+ p.pos(1) = y;
+ p.pos(2) = z;
#elif (AMREX_SPACEDIM == 2)
#ifdef WARPX_RZ
attribs[PIdx::theta] = theta;
#endif
- p.pos(0) = xb;
- p.pos(1) = z;
+ p.pos(0) = xb;
+ p.pos(1) = z;
#endif
- host_particles.push_back(p);
- for (int kk = 0; kk < PIdx::nattribs; ++kk)
- host_attribs[kk].push_back(attribs[kk]);
+ host_particles.push_back(p);
+ for (int kk = 0; kk < PIdx::nattribs; ++kk)
+ host_attribs[kk].push_back(attribs[kk]);
}
}
- auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)];
+ auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)];
auto old_size = particle_tile.GetArrayOfStructs().size();
auto new_size = old_size + host_particles.size();
- particle_tile.resize(new_size);
+ particle_tile.resize(new_size);
Cuda::thrust_copy(host_particles.begin(),
host_particles.end(),
particle_tile.GetArrayOfStructs().begin() + old_size);
- for (int kk = 0; kk < PIdx::nattribs; ++kk) {
+ for (int kk = 0; kk < PIdx::nattribs; ++kk) {
Cuda::thrust_copy(host_attribs[kk].begin(),
host_attribs[kk].end(),
particle_tile.GetStructOfArrays().GetRealData(kk).begin() + old_size);
- }
+ }
if (cost) {
- wt = (amrex::second() - wt) / tile_box.d_numPts();
+ wt = (amrex::second() - wt) / tile_box.d_numPts();
Array4<Real> const& costarr = cost->array(mfi);
amrex::ParallelFor(tile_box,
- [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
- {
- costarr(i,j,k) += wt;
- });
+ [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+ {
+ costarr(i,j,k) += wt;
+ });
}
}
}
@@ -963,13 +963,13 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>,
WRPX_INTERPOLATE_CIC(particles.dataPtr(), nstride, np,
Exp.dataPtr(), Eyp.dataPtr(),
#if AMREX_SPACEDIM == 3
- Ezp.dataPtr(),
+ Ezp.dataPtr(),
#endif
- exfab.dataPtr(), eyfab.dataPtr(),
+ exfab.dataPtr(), eyfab.dataPtr(),
#if AMREX_SPACEDIM == 3
- ezfab.dataPtr(),
+ ezfab.dataPtr(),
#endif
- box.loVect(), box.hiVect(), plo, dx, &ng);
+ box.loVect(), box.hiVect(), plo, dx, &ng);
} else {
const FArrayBox& exfab_coarse = coarse_Ex[pti];
@@ -1004,7 +1004,7 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>,
void
PhysicalParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<MultiFab>, 3> >& E,
- Vector<std::unique_ptr<MultiFab> >& rho,
+ Vector<std::unique_ptr<MultiFab> >& rho,
Real t, Real dt)
{
BL_PROFILE("PPC::EvolveES()");
@@ -1014,7 +1014,7 @@ PhysicalParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<Mul
BL_ASSERT(OnSameGrids(lev, *rho[lev]));
const auto& gm = m_gdb->Geom(lev);
const RealBox& prob_domain = gm.ProbDomain();
- for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) {
+ for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) {
// Particle structs
auto& particles = pti.GetArrayOfStructs();
int nstride = particles.dataShape().first;
@@ -1071,11 +1071,11 @@ PhysicalParticleContainer::FieldGather (int lev,
{
Cuda::ManagedDeviceVector<Real> xp, yp, zp;
- for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
- {
+ for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+ {
Real wt = amrex::second();
- const Box& box = pti.validbox();
+ const Box& box = pti.validbox();
auto& attribs = pti.GetAttribs();
@@ -1088,63 +1088,63 @@ PhysicalParticleContainer::FieldGather (int lev,
const long np = pti.numParticles();
- // Data on the grid
- const FArrayBox& exfab = Ex[pti];
- const FArrayBox& eyfab = Ey[pti];
- const FArrayBox& ezfab = Ez[pti];
- const FArrayBox& bxfab = Bx[pti];
- const FArrayBox& byfab = By[pti];
- const FArrayBox& bzfab = Bz[pti];
-
- Exp.assign(np,0.0);
- Eyp.assign(np,0.0);
- Ezp.assign(np,0.0);
- Bxp.assign(np,0.0);
- Byp.assign(np,0.0);
- Bzp.assign(np,0.0);
-
- //
- // copy data from particle container to temp arrays
- //
+ // Data on the grid
+ const FArrayBox& exfab = Ex[pti];
+ const FArrayBox& eyfab = Ey[pti];
+ const FArrayBox& ezfab = Ez[pti];
+ const FArrayBox& bxfab = Bx[pti];
+ const FArrayBox& byfab = By[pti];
+ const FArrayBox& bzfab = Bz[pti];
+
+ Exp.assign(np,0.0);
+ Eyp.assign(np,0.0);
+ Ezp.assign(np,0.0);
+ Bxp.assign(np,0.0);
+ Byp.assign(np,0.0);
+ Bzp.assign(np,0.0);
+
+ //
+ // copy data from particle container to temp arrays
+ //
pti.GetPosition(xp, yp, zp);
const std::array<Real,3>& xyzmin = WarpX::LowerCorner(box, lev);
const int* ixyzmin = box.loVect();
- //
- // Field Gather
- //
- const int ll4symtry = false;
+ //
+ // Field Gather
+ //
+ const int ll4symtry = false;
long lvect_fieldgathe = 64;
- warpx_geteb_energy_conserving(
- &np,
- xp.dataPtr(),
- yp.dataPtr(),
- zp.dataPtr(),
- Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
- Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
- ixyzmin,
- &xyzmin[0], &xyzmin[1], &xyzmin[2],
- &dx[0], &dx[1], &dx[2],
- &WarpX::nox, &WarpX::noy, &WarpX::noz,
- BL_TO_FORTRAN_ANYD(exfab),
- BL_TO_FORTRAN_ANYD(eyfab),
- BL_TO_FORTRAN_ANYD(ezfab),
- BL_TO_FORTRAN_ANYD(bxfab),
- BL_TO_FORTRAN_ANYD(byfab),
- BL_TO_FORTRAN_ANYD(bzfab),
- &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
- &lvect_fieldgathe, &WarpX::field_gathering_algo);
+ warpx_geteb_energy_conserving(
+ &np,
+ xp.dataPtr(),
+ yp.dataPtr(),
+ zp.dataPtr(),
+ Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
+ Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
+ ixyzmin,
+ &xyzmin[0], &xyzmin[1], &xyzmin[2],
+ &dx[0], &dx[1], &dx[2],
+ &WarpX::nox, &WarpX::noy, &WarpX::noz,
+ BL_TO_FORTRAN_ANYD(exfab),
+ BL_TO_FORTRAN_ANYD(eyfab),
+ BL_TO_FORTRAN_ANYD(ezfab),
+ BL_TO_FORTRAN_ANYD(bxfab),
+ BL_TO_FORTRAN_ANYD(byfab),
+ BL_TO_FORTRAN_ANYD(bzfab),
+ &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
+ &lvect_fieldgathe, &WarpX::field_gathering_algo);
if (cost) {
const Box& tbx = pti.tilebox();
wt = (amrex::second() - wt) / tbx.d_numPts();
Array4<Real> const& costarr = cost->array(pti);
amrex::ParallelFor(tbx,
- [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
- {
- costarr(i,j,k) += wt;
- });
+ [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+ {
+ costarr(i,j,k) += wt;
+ });
}
}
}
@@ -1152,9 +1152,9 @@ PhysicalParticleContainer::FieldGather (int lev,
void
PhysicalParticleContainer::Evolve (int lev,
- const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
- const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz,
- MultiFab& jx, MultiFab& jy, MultiFab& jz,
+ const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
+ const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz,
+ MultiFab& jx, MultiFab& jy, MultiFab& jz,
MultiFab* cjx, MultiFab* cjy, MultiFab* cjz,
MultiFab* rho, MultiFab* crho,
const MultiFab* cEx, const MultiFab* cEy, const MultiFab* cEz,
@@ -1200,8 +1200,8 @@ PhysicalParticleContainer::Evolve (int lev,
RealVector tmp;
ParticleVector particle_tmp;
- for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
- {
+ for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+ {
Real wt = amrex::second();
const Box& box = pti.validbox();
@@ -1282,14 +1282,14 @@ PhysicalParticleContainer::Evolve (int lev,
#endif
}
- Exp.assign(np,0.0);
- Eyp.assign(np,0.0);
- Ezp.assign(np,0.0);
- Bxp.assign(np,WarpX::B_external[0]);
- Byp.assign(np,WarpX::B_external[1]);
- Bzp.assign(np,WarpX::B_external[2]);
+ Exp.assign(np,0.0);
+ Eyp.assign(np,0.0);
+ Ezp.assign(np,0.0);
+ Bxp.assign(np,WarpX::B_external[0]);
+ Byp.assign(np,WarpX::B_external[1]);
+ Bzp.assign(np,WarpX::B_external[2]);
- m_giv[thread_num].resize(np);
+ m_giv[thread_num].resize(np);
long nfine_current = np;
long nfine_gather = np;
@@ -1384,12 +1384,12 @@ PhysicalParticleContainer::Evolve (int lev,
const long np_current = (cjx) ? nfine_current : np;
- //
- // copy data from particle container to temp arrays
- //
- BL_PROFILE_VAR_START(blp_copy);
+ //
+ // copy data from particle container to temp arrays
+ //
+ BL_PROFILE_VAR_START(blp_copy);
pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
- BL_PROFILE_VAR_STOP(blp_copy);
+ BL_PROFILE_VAR_STOP(blp_copy);
if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev);
@@ -1529,17 +1529,30 @@ PhysicalParticleContainer::Evolve (int lev,
//
// Current Deposition
//
- // Deposit inside domains
- DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz,
- 0, np_current, thread_num,
- lev, lev, dt);
- if (has_buffer){
- // Deposit in buffers
- DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz,
- np_current, np-np_current, thread_num,
- lev, lev-1, dt);
+ if (WarpX::use_picsar_deposition) {
+ // Deposit inside domains
+ DepositCurrentFortran(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz,
+ 0, np_current, thread_num,
+ lev, lev, dt);
+ if (has_buffer){
+ // Deposit in buffers
+ DepositCurrentFortran(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz,
+ np_current, np-np_current, thread_num,
+ lev, lev-1, dt);
+ }
+ } else {
+ // Deposit inside domains
+ DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz,
+ 0, np_current, thread_num,
+ lev, lev, dt);
+ if (has_buffer){
+ // Deposit in buffers
+ DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz,
+ np_current, np-np_current, thread_num,
+ lev, lev-1, dt);
+ }
}
-
+
//
// copy particle data back
//
@@ -1555,10 +1568,10 @@ PhysicalParticleContainer::Evolve (int lev,
wt = (amrex::second() - wt) / tbx.d_numPts();
Array4<Real> const& costarr = cost->array(pti);
amrex::ParallelFor(tbx,
- [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
- {
- costarr(i,j,k) += wt;
- });
+ [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+ {
+ costarr(i,j,k) += wt;
+ });
}
}
}
@@ -1590,9 +1603,9 @@ PhysicalParticleContainer::SplitParticles(int lev)
{
pti.GetPosition(xp, yp, zp);
const std::array<Real,3>& dx = WarpX::CellSize(lev);
- // particle Array Of Structs data
+ // particle Array Of Structs data
auto& particles = pti.GetArrayOfStructs();
- // particle Struct Of Arrays data
+ // particle Struct Of Arrays data
auto& attribs = pti.GetAttribs();
auto& wp = attribs[PIdx::w ];
auto& uxp = attribs[PIdx::ux];
@@ -1602,13 +1615,13 @@ PhysicalParticleContainer::SplitParticles(int lev)
for(int i=0; i<np; i++){
auto& p = particles[i];
if (p.id() == DoSplitParticleID){
- // If particle is tagged, split it and put the
- // split particles in local arrays psplit_x etc.
+ // If particle is tagged, split it and put the
+ // split particles in local arrays psplit_x etc.
np_split_to_add += np_split;
#if (AMREX_SPACEDIM==2)
if (split_type==0){
- // Split particle in two along each axis
- // 4 particles in 2d
+ // Split particle in two along each axis
+ // 4 particles in 2d
for (int ishift = -1; ishift < 2; ishift +=2 ){
for (int kshift = -1; kshift < 2; kshift +=2 ){
// Add one particle with offset in x and z
@@ -1622,8 +1635,8 @@ PhysicalParticleContainer::SplitParticles(int lev)
}
}
} else {
- // Split particle in two along each diagonal
- // 4 particles in 2d
+ // Split particle in two along each diagonal
+ // 4 particles in 2d
for (int ishift = -1; ishift < 2; ishift +=2 ){
// Add one particle with offset in x
psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
@@ -1644,26 +1657,26 @@ PhysicalParticleContainer::SplitParticles(int lev)
}
}
#elif (AMREX_SPACEDIM==3)
- if (split_type==0){
- // Split particle in two along each axis
- // 6 particles in 2d
- for (int ishift = -1; ishift < 2; ishift +=2 ){
- for (int jshift = -1; jshift < 2; jshift +=2 ){
- for (int kshift = -1; kshift < 2; kshift +=2 ){
- // Add one particle with offset in x, y and z
- psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
- psplit_y.push_back( yp[i] + jshift*dx[1]/2 );
- psplit_z.push_back( zp[i] + jshift*dx[2]/2 );
- psplit_ux.push_back( uxp[i] );
- psplit_uy.push_back( uyp[i] );
- psplit_uz.push_back( uzp[i] );
- psplit_w.push_back( wp[i]/np_split );
- }
- }
- }
- } else {
- // Split particle in two along each diagonal
- // 8 particles in 3d
+ if (split_type==0){
+ // Split particle in two along each axis
+ // 6 particles in 2d
+ for (int ishift = -1; ishift < 2; ishift +=2 ){
+ for (int jshift = -1; jshift < 2; jshift +=2 ){
+ for (int kshift = -1; kshift < 2; kshift +=2 ){
+ // Add one particle with offset in x, y and z
+ psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
+ psplit_y.push_back( yp[i] + jshift*dx[1]/2 );
+ psplit_z.push_back( zp[i] + jshift*dx[2]/2 );
+ psplit_ux.push_back( uxp[i] );
+ psplit_uy.push_back( uyp[i] );
+ psplit_uz.push_back( uzp[i] );
+ psplit_w.push_back( wp[i]/np_split );
+ }
+ }
+ }
+ } else {
+ // Split particle in two along each diagonal
+ // 8 particles in 3d
for (int ishift = -1; ishift < 2; ishift +=2 ){
// Add one particle with offset in x
psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
@@ -1690,9 +1703,9 @@ PhysicalParticleContainer::SplitParticles(int lev)
psplit_uz.push_back( uzp[i] );
psplit_w.push_back( wp[i]/np_split );
}
- }
+ }
#endif
- // invalidate the particle
+ // invalidate the particle
p.m_idata.id = -p.m_idata.id;
}
}
@@ -1704,16 +1717,16 @@ PhysicalParticleContainer::SplitParticles(int lev)
// AddNParticles calls Redistribute, so that particles
// in pctmp_split are in the proper grids and tiles
pctmp_split.AddNParticles(lev,
- np_split_to_add,
- psplit_x.dataPtr(),
- psplit_y.dataPtr(),
- psplit_z.dataPtr(),
- psplit_ux.dataPtr(),
- psplit_uy.dataPtr(),
- psplit_uz.dataPtr(),
- 1,
- psplit_w.dataPtr(),
- 1, NoSplitParticleID);
+ np_split_to_add,
+ psplit_x.dataPtr(),
+ psplit_y.dataPtr(),
+ psplit_z.dataPtr(),
+ psplit_ux.dataPtr(),
+ psplit_uy.dataPtr(),
+ psplit_uz.dataPtr(),
+ 1,
+ psplit_w.dataPtr(),
+ 1, NoSplitParticleID);
// Copy particles from tmp to current particle container
addParticles(pctmp_split,1);
// Clear tmp container
@@ -1722,14 +1735,20 @@ PhysicalParticleContainer::SplitParticles(int lev)
void
PhysicalParticleContainer::PushPX(WarpXParIter& pti,
- Cuda::ManagedDeviceVector<Real>& xp,
+ Cuda::ManagedDeviceVector<Real>& xp,
Cuda::ManagedDeviceVector<Real>& yp,
Cuda::ManagedDeviceVector<Real>& zp,
Cuda::ManagedDeviceVector<Real>& giv,
Real dt)
{
- // This wraps the call to warpx_particle_pusher so that inheritors can modify the call.
+ if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
+ {
+ copy_attribs(pti, xp.dataPtr(), yp.dataPtr(), zp.dataPtr());
+ }
+
+ // The following attributes should be included in CPP version of warpx_particle_pusher
+ // This wraps the call to warpx_particle_pusher so that inheritors can modify the call.
auto& attribs = pti.GetAttribs();
auto& uxp = attribs[PIdx::ux];
auto& uyp = attribs[PIdx::uy];
@@ -1741,22 +1760,7 @@ PhysicalParticleContainer::PushPX(WarpXParIter& pti,
auto& Byp = attribs[PIdx::By];
auto& Bzp = attribs[PIdx::Bz];
const long np = pti.numParticles();
-
- if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
- {
- auto& xpold = pti.GetAttribs(particle_comps["xold"]);
- auto& ypold = pti.GetAttribs(particle_comps["yold"]);
- auto& zpold = pti.GetAttribs(particle_comps["zold"]);
- auto& uxpold = pti.GetAttribs(particle_comps["uxold"]);
- auto& uypold = pti.GetAttribs(particle_comps["uyold"]);
- auto& uzpold = pti.GetAttribs(particle_comps["uzold"]);
-
- warpx_copy_attribs(&np, xp.dataPtr(), yp.dataPtr(), zp.dataPtr(),
- uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
- xpold.dataPtr(), ypold.dataPtr(), zpold.dataPtr(),
- uxpold.dataPtr(), uypold.dataPtr(), uzpold.dataPtr());
- }
-
+
warpx_particle_pusher(&np,
xp.dataPtr(),
yp.dataPtr(),
@@ -1791,8 +1795,8 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
int thread_num = 0;
#endif
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
- {
- const Box& box = pti.validbox();
+ {
+ const Box& box = pti.validbox();
auto& attribs = pti.GetAttribs();
@@ -1808,26 +1812,26 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
const long np = pti.numParticles();
- // Data on the grid
- const FArrayBox& exfab = Ex[pti];
- const FArrayBox& eyfab = Ey[pti];
- const FArrayBox& ezfab = Ez[pti];
- const FArrayBox& bxfab = Bx[pti];
- const FArrayBox& byfab = By[pti];
- const FArrayBox& bzfab = Bz[pti];
-
- Exp.assign(np,0.0);
- Eyp.assign(np,0.0);
- Ezp.assign(np,0.0);
- Bxp.assign(np,WarpX::B_external[0]);
- Byp.assign(np,WarpX::B_external[1]);
- Bzp.assign(np,WarpX::B_external[2]);
-
- m_giv[thread_num].resize(np);
-
- //
- // copy data from particle container to temp arrays
- //
+ // Data on the grid
+ const FArrayBox& exfab = Ex[pti];
+ const FArrayBox& eyfab = Ey[pti];
+ const FArrayBox& ezfab = Ez[pti];
+ const FArrayBox& bxfab = Bx[pti];
+ const FArrayBox& byfab = By[pti];
+ const FArrayBox& bzfab = Bz[pti];
+
+ Exp.assign(np,0.0);
+ Eyp.assign(np,0.0);
+ Ezp.assign(np,0.0);
+ Bxp.assign(np,WarpX::B_external[0]);
+ Byp.assign(np,WarpX::B_external[1]);
+ Bzp.assign(np,WarpX::B_external[2]);
+
+ m_giv[thread_num].resize(np);
+
+ //
+ // copy data from particle container to temp arrays
+ //
pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
@@ -1870,6 +1874,38 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
}
}
+void PhysicalParticleContainer::copy_attribs(WarpXParIter& pti,const Real* xp,
+ const Real* yp, const Real* zp)
+{
+
+ auto& attribs = pti.GetAttribs();
+
+ Real* AMREX_RESTRICT uxp = attribs[PIdx::ux].dataPtr();
+ Real* AMREX_RESTRICT uyp = attribs[PIdx::uy].dataPtr();
+ Real* AMREX_RESTRICT uzp = attribs[PIdx::uz].dataPtr();
+
+ Real* AMREX_RESTRICT xpold = pti.GetAttribs(particle_comps["xold"]).dataPtr();
+ Real* AMREX_RESTRICT ypold = pti.GetAttribs(particle_comps["yold"]).dataPtr();
+ Real* AMREX_RESTRICT zpold = pti.GetAttribs(particle_comps["zold"]).dataPtr();
+ Real* AMREX_RESTRICT uxpold = pti.GetAttribs(particle_comps["uxold"]).dataPtr();
+ Real* AMREX_RESTRICT uypold = pti.GetAttribs(particle_comps["uyold"]).dataPtr();
+ Real* AMREX_RESTRICT uzpold = pti.GetAttribs(particle_comps["uzold"]).dataPtr();
+
+ const long np = pti.numParticles();
+
+ ParallelFor( np,
+ [=] AMREX_GPU_DEVICE (long i) {
+ xpold[i]=xp[i];
+ ypold[i]=yp[i];
+ zpold[i]=zp[i];
+
+ uxpold[i]=uxp[i];
+ uypold[i]=uyp[i];
+ uzpold[i]=uzp[i];
+ }
+ );
+}
+
void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real z_old,
const Real z_new, const Real t_boost,
const Real t_lab, const Real dt,
diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp
index 2a3e8dd0d..9bd4cb4fc 100644
--- a/Source/Particles/RigidInjectedParticleContainer.cpp
+++ b/Source/Particles/RigidInjectedParticleContainer.cpp
@@ -211,6 +211,11 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
Real dt)
{
+ if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
+ {
+ copy_attribs(pti, xp.dataPtr(), yp.dataPtr(), zp.dataPtr());
+ }
+
// This wraps the call to warpx_particle_pusher so that inheritors can modify the call.
auto& attribs = pti.GetAttribs();
auto& uxp = attribs[PIdx::ux];
@@ -224,21 +229,6 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
auto& Bzp = attribs[PIdx::Bz];
const long np = pti.numParticles();
- if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
- {
- auto& xpold = pti.GetAttribs(particle_comps["xold"]);
- auto& ypold = pti.GetAttribs(particle_comps["yold"]);
- auto& zpold = pti.GetAttribs(particle_comps["zold"]);
- auto& uxpold = pti.GetAttribs(particle_comps["uxold"]);
- auto& uypold = pti.GetAttribs(particle_comps["uyold"]);
- auto& uzpold = pti.GetAttribs(particle_comps["uzold"]);
-
- warpx_copy_attribs(&np, xp.dataPtr(), yp.dataPtr(), zp.dataPtr(),
- uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
- xpold.dataPtr(), ypold.dataPtr(), zpold.dataPtr(),
- uxpold.dataPtr(), uypold.dataPtr(), uzpold.dataPtr());
- }
-
// Save the position and momenta, making copies
Cuda::ManagedDeviceVector<Real> xp_save, yp_save, zp_save;
RealVector uxp_save, uyp_save, uzp_save;
diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H
index 0e800bf1d..662b2e1b8 100644
--- a/Source/Particles/WarpXParticleContainer.H
+++ b/Source/Particles/WarpXParticleContainer.H
@@ -189,6 +189,21 @@ public:
int depos_lev,
amrex::Real dt);
+ virtual void DepositCurrentFortran(WarpXParIter& pti,
+ RealVector& wp,
+ RealVector& uxp,
+ RealVector& uyp,
+ RealVector& uzp,
+ amrex::MultiFab* jx,
+ amrex::MultiFab* jy,
+ amrex::MultiFab* jz,
+ const long offset,
+ const long np_to_depose,
+ int thread_num,
+ int lev,
+ int depos_lev,
+ amrex::Real dt);
+
// If particles start outside of the domain, ContinuousInjection
// makes sure that they are initialized when they enter the domain, and
// NOT before. Virtual function, overriden by derived classes.
@@ -253,8 +268,8 @@ public:
AddIntComp(comm);
}
- int DoBoostedFrameDiags () const { return do_boosted_frame_diags; }
-
+ int DoBoostedFrameDiags () const { return do_boosted_frame_diags; }
+
protected:
std::map<std::string, int> particle_comps;
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 317f46fd4..7316dcc95 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -10,6 +10,7 @@
// Import low-level single-particle kernels
#include <GetAndSetPosition.H>
#include <UpdatePosition.H>
+#include <CurrentDeposition.H>
using namespace amrex;
@@ -279,7 +280,7 @@ WarpXParticleContainer::AddNParticles (int lev,
Redistribute();
}
-/* \brief Current Deposition for thread thread_num
+/* \brief Current Deposition for thread thread_num using PICSAR
* \param pti : Particle iterator
* \param wp : Array of particle weights
* \param uxp uyp uzp : Array of particle
@@ -292,16 +293,15 @@ WarpXParticleContainer::AddNParticles (int lev,
* \param lev : Level of box that contains particles
* \param depos_lev : Level on which particles deposit (if buffers are used)
* \param dt : Time step for particle level
- * \param ngJ : Number of ghosts cells (of lev)
*/
void
-WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
- RealVector& wp, RealVector& uxp,
- RealVector& uyp, RealVector& uzp,
- MultiFab* jx, MultiFab* jy, MultiFab* jz,
- const long offset, const long np_to_depose,
- int thread_num, int lev, int depos_lev,
- Real dt)
+WarpXParticleContainer::DepositCurrentFortran(WarpXParIter& pti,
+ RealVector& wp, RealVector& uxp,
+ RealVector& uyp, RealVector& uzp,
+ MultiFab* jx, MultiFab* jy, MultiFab* jz,
+ const long offset, const long np_to_depose,
+ int thread_num, int lev, int depos_lev,
+ Real dt)
{
AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) ||
(depos_lev==(lev )),
@@ -414,6 +414,130 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
#endif
}
+/* \brief Current Deposition for thread thread_num
+ * \param pti : Particle iterator
+ * \param wp : Array of particle weights
+ * \param uxp uyp uzp : Array of particle
+ * \param jx jy jz : Full array of current density
+ * \param offset : Index of first particle for which current is deposited
+ * \param np_to_depose: Number of particles for which current is deposited.
+ Particles [offset,offset+np_tp_depose] deposit current
+ * \param thread_num : Thread number (if tiling)
+ * \param lev : Level of box that contains particles
+ * \param depos_lev : Level on which particles deposit (if buffers are used)
+ * \param dt : Time step for particle level
+ */
+void
+WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
+ RealVector& wp, RealVector& uxp,
+ RealVector& uyp, RealVector& uzp,
+ MultiFab* jx, MultiFab* jy, MultiFab* jz,
+ const long offset, const long np_to_depose,
+ int thread_num, int lev, int depos_lev,
+ Real dt)
+{
+ AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) ||
+ (depos_lev==(lev )),
+ "Deposition buffers only work for lev-1");
+ // If no particles, do not do anything
+ if (np_to_depose == 0) return;
+
+ const long ngJ = jx->nGrow();
+ const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0));
+ int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal();
+ Real q = this->charge;
+ const Real stagger_shift = j_is_nodal ? 0.0 : 0.5;
+
+ BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
+
+ // Get tile box where current is deposited.
+ // The tile box is different when depositing in the buffers (depos_lev<lev)
+ // or when depositing inside the level (depos_lev=lev)
+ Box tilebox;
+ if (lev == depos_lev) {
+ tilebox = pti.tilebox();
+ } else {
+ const IntVect& ref_ratio = WarpX::RefRatio(depos_lev);
+ tilebox = amrex::coarsen(pti.tilebox(),ref_ratio);
+ }
+
+ // Staggered tile boxes (different in each direction)
+ Box tbx = convert(tilebox, WarpX::jx_nodal_flag);
+ Box tby = convert(tilebox, WarpX::jy_nodal_flag);
+ Box tbz = convert(tilebox, WarpX::jz_nodal_flag);
+ tilebox.grow(ngJ);
+
+#ifdef AMREX_USE_GPU
+ // No tiling on GPU: jx_ptr points to the full
+ // jx array (same for jy_ptr and jz_ptr).
+ Array4<Real> const& jx_arr = jx->array(pti);
+ Array4<Real> const& jy_arr = jy->array(pti);
+ Array4<Real> const& jz_arr = jz->array(pti);
+#else
+ // Tiling is on: jx_ptr points to local_jx[thread_num]
+ // (same for jy_ptr and jz_ptr)
+ tbx.grow(ngJ);
+ tby.grow(ngJ);
+ tbz.grow(ngJ);
+
+ local_jx[thread_num].resize(tbx);
+ local_jy[thread_num].resize(tby);
+ local_jz[thread_num].resize(tbz);
+
+ // local_jx[thread_num] is set to zero
+ local_jx[thread_num].setVal(0.0);
+ local_jy[thread_num].setVal(0.0);
+ local_jz[thread_num].setVal(0.0);
+
+ Array4<Real> const& jx_arr = local_jx[thread_num].array();
+ Array4<Real> const& jy_arr = local_jy[thread_num].array();
+ Array4<Real> const& jz_arr = local_jz[thread_num].array();
+#endif
+ // GPU, no tiling: deposit directly in jx
+ // CPU, tiling: deposit into local_jx
+ // (same for jx and jz)
+
+ Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
+ Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
+ Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
+
+ // Lower corner of tile box physical domain
+ const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);;
+ // xyzmin is built on pti.tilebox(), so it does
+ // not include staggering, so the stagger_shift has to be done by hand.
+ // Alternatively, we could define xyzminx from tbx (and the same for 3
+ // directions and for jx, jy, jz). This way, sx0 would not be needed.
+ // Better for memory? worth trying?
+ const Dim3 lo = lbound(tilebox);
+
+ if (WarpX::nox == 1){
+ doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(),
+ uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr,
+ jz_arr, offset, np_to_depose, dt, dx,
+ xyzmin, lo, stagger_shift, q);
+ } else if (WarpX::nox == 2){
+ doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(),
+ uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr,
+ jz_arr, offset, np_to_depose, dt, dx,
+ xyzmin, lo, stagger_shift, q);
+ } else if (WarpX::nox == 3){
+ doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(),
+ uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr,
+ jz_arr, offset, np_to_depose, dt, dx,
+ xyzmin, lo, stagger_shift, q);
+ }
+
+#ifndef AMREX_USE_GPU
+ BL_PROFILE_VAR_START(blp_accumulate);
+ // CPU, tiling: atomicAdd local_jx into jx
+ // (same for jx and jz)
+ (*jx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1);
+ (*jy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1);
+ (*jz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1);
+ BL_PROFILE_VAR_STOP(blp_accumulate);
+#endif
+}
+
void
WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
MultiFab* rhomf, MultiFab* crhomf, int icomp,
@@ -932,3 +1056,5 @@ WarpXParticleContainer::particlePostLocate(ParticleType& p,
if (pld.m_lev == lev-1){
}
}
+
+
diff --git a/Source/Python/WarpXWrappers.cpp b/Source/Python/WarpXWrappers.cpp
index 3c1a930b3..3ed4830f5 100644
--- a/Source/Python/WarpXWrappers.cpp
+++ b/Source/Python/WarpXWrappers.cpp
@@ -10,22 +10,26 @@
namespace
{
- double** getMultiFabPointers(const amrex::MultiFab& mf, int *num_boxes, int *ngrow, int **shapes)
+ double** getMultiFabPointers(const amrex::MultiFab& mf, int *num_boxes, int *ncomps, int *ngrow, int **shapes)
{
+ *ncomps = mf.nComp();
*ngrow = mf.nGrow();
*num_boxes = mf.local_size();
- *shapes = (int*) malloc(AMREX_SPACEDIM * (*num_boxes) * sizeof(int));
+ int shapesize = AMREX_SPACEDIM;
+ if (mf.nComp() > 1) shapesize += 1;
+ *shapes = (int*) malloc(shapesize * (*num_boxes) * sizeof(int));
double** data = (double**) malloc((*num_boxes) * sizeof(double*));
- int i = 0;
#ifdef _OPENMP
#pragma omp parallel
#endif
- for ( amrex::MFIter mfi(mf, false); mfi.isValid(); ++mfi, ++i ) {
+ for ( amrex::MFIter mfi(mf, false); mfi.isValid(); ++mfi ) {
+ int i = mfi.LocalIndex();
data[i] = (double*) mf[mfi].dataPtr();
for (int j = 0; j < AMREX_SPACEDIM; ++j) {
- (*shapes)[AMREX_SPACEDIM*i+j] = mf[mfi].box().length(j);
+ (*shapes)[shapesize*i+j] = mf[mfi].box().length(j);
}
+ if (mf.nComp() > 1) (*shapes)[shapesize*i+2] = mf.nComp();
}
return data;
}
@@ -197,9 +201,9 @@ extern "C"
}
double** warpx_getEfield(int lev, int direction,
- int *return_size, int *ngrow, int **shapes) {
+ int *return_size, int *ncomps, int *ngrow, int **shapes) {
auto & mf = WarpX::GetInstance().getEfield(lev, direction);
- return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
}
int* warpx_getEfieldLoVects(int lev, int direction,
@@ -209,9 +213,9 @@ extern "C"
}
double** warpx_getEfieldCP(int lev, int direction,
- int *return_size, int *ngrow, int **shapes) {
+ int *return_size, int *ncomps, int *ngrow, int **shapes) {
auto & mf = WarpX::GetInstance().getEfield_cp(lev, direction);
- return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
}
int* warpx_getEfieldCPLoVects(int lev, int direction,
@@ -221,9 +225,9 @@ extern "C"
}
double** warpx_getEfieldFP(int lev, int direction,
- int *return_size, int *ngrow, int **shapes) {
+ int *return_size, int *ncomps, int *ngrow, int **shapes) {
auto & mf = WarpX::GetInstance().getEfield_fp(lev, direction);
- return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
}
int* warpx_getEfieldFPLoVects(int lev, int direction,
@@ -233,9 +237,9 @@ extern "C"
}
double** warpx_getBfield(int lev, int direction,
- int *return_size, int *ngrow, int **shapes) {
+ int *return_size, int *ncomps, int *ngrow, int **shapes) {
auto & mf = WarpX::GetInstance().getBfield(lev, direction);
- return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
}
int* warpx_getBfieldLoVects(int lev, int direction,
@@ -245,9 +249,9 @@ extern "C"
}
double** warpx_getBfieldCP(int lev, int direction,
- int *return_size, int *ngrow, int **shapes) {
+ int *return_size, int *ncomps, int *ngrow, int **shapes) {
auto & mf = WarpX::GetInstance().getBfield_cp(lev, direction);
- return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
}
int* warpx_getBfieldCPLoVects(int lev, int direction,
@@ -257,9 +261,9 @@ extern "C"
}
double** warpx_getBfieldFP(int lev, int direction,
- int *return_size, int *ngrow, int **shapes) {
+ int *return_size, int *ncomps, int *ngrow, int **shapes) {
auto & mf = WarpX::GetInstance().getBfield_fp(lev, direction);
- return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
}
int* warpx_getBfieldFPLoVects(int lev, int direction,
@@ -269,9 +273,9 @@ extern "C"
}
double** warpx_getCurrentDensity(int lev, int direction,
- int *return_size, int *ngrow, int **shapes) {
+ int *return_size, int *ncomps, int *ngrow, int **shapes) {
auto & mf = WarpX::GetInstance().getcurrent(lev, direction);
- return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
}
int* warpx_getCurrentDensityLoVects(int lev, int direction,
@@ -281,9 +285,9 @@ extern "C"
}
double** warpx_getCurrentDensityCP(int lev, int direction,
- int *return_size, int *ngrow, int **shapes) {
+ int *return_size, int *ncomps, int *ngrow, int **shapes) {
auto & mf = WarpX::GetInstance().getcurrent_cp(lev, direction);
- return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
}
int* warpx_getCurrentDensityCPLoVects(int lev, int direction,
@@ -293,9 +297,9 @@ extern "C"
}
double** warpx_getCurrentDensityFP(int lev, int direction,
- int *return_size, int *ngrow, int **shapes) {
+ int *return_size, int *ncomps, int *ngrow, int **shapes) {
auto & mf = WarpX::GetInstance().getcurrent_fp(lev, direction);
- return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
}
int* warpx_getCurrentDensityFPLoVects(int lev, int direction,
diff --git a/Source/Python/WarpXWrappers.h b/Source/Python/WarpXWrappers.h
index 94fbb0d30..44e0ed4e1 100644
--- a/Source/Python/WarpXWrappers.h
+++ b/Source/Python/WarpXWrappers.h
@@ -62,19 +62,19 @@ extern "C" {
long warpx_getNumParticles(int speciesnumber);
double** warpx_getEfield(int lev, int direction,
- int *return_size, int* ngrow, int **shapes);
+ int *return_size, int* ncomps, int* ngrow, int **shapes);
int* warpx_getEfieldLoVects(int lev, int direction,
int *return_size, int* ngrow);
double** warpx_getBfield(int lev, int direction,
- int *return_size, int* ngrow, int **shapes);
+ int *return_size, int* ncomps, int* ngrow, int **shapes);
int* warpx_getBfieldLoVects(int lev, int direction,
int *return_size, int* ngrow);
double** warpx_getCurrentDensity(int lev, int direction,
- int *return_size, int* ngrow, int **shapes);
+ int *return_size, int* ncomps, int* ngrow, int **shapes);
int* warpx_getCurrentDensityLoVects(int lev, int direction,
int *return_size, int* ngrow);
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 4ad3d119f..ca7596589 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -81,6 +81,7 @@ public:
static amrex::Vector<amrex::Real> B_external;
// Algorithms
+ static long use_picsar_deposition;
static long current_deposition_algo;
static long charge_deposition_algo;
static long field_gathering_algo;
@@ -656,7 +657,8 @@ private:
void EvolvePSATD (int numsteps);
void PushPSATD (amrex::Real dt);
- void PushPSATD (int lev, amrex::Real dt);
+ void PushPSATD_localFFT (int lev, amrex::Real dt);
+ void PushPSATD_hybridFFT (int lev, amrex::Real dt);
#endif
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 877882037..08948227c 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -37,6 +37,7 @@ Vector<int> WarpX::boost_direction = {0,0,0};
int WarpX::do_compute_max_step_from_zmax = 0;
Real WarpX::zmax_plasma_to_compute_max_step = 0.;
+long WarpX::use_picsar_deposition = 1;
long WarpX::current_deposition_algo;
long WarpX::charge_deposition_algo;
long WarpX::field_gathering_algo;
@@ -484,7 +485,17 @@ WarpX::ReadParameters ()
{
ParmParse pp("algo");
+ // If not in RZ mode, read use_picsar_deposition
+ // In RZ mode, use_picsar_deposition is on, as the C++ version
+ // of the deposition does not support RZ
+#ifndef WARPX_RZ
+ pp.query("use_picsar_deposition", use_picsar_deposition);
+#endif
current_deposition_algo = GetAlgorithmInteger(pp, "current_deposition");
+ if (!use_picsar_deposition){
+ AMREX_ALWAYS_ASSERT_WITH_MESSAGE( current_deposition_algo >= 2,
+ "if not use_picsar_deposition, cannot use Esirkepov deposition.");
+ }
charge_deposition_algo = GetAlgorithmInteger(pp, "charge_deposition");
field_gathering_algo = GetAlgorithmInteger(pp, "field_gathering");
particle_pusher_algo = GetAlgorithmInteger(pp, "particle_pusher");
@@ -556,8 +567,10 @@ WarpX::MakeNewLevelFromScratch (int lev, Real time, const BoxArray& new_grids,
InitLevelData(lev, time);
#ifdef WARPX_USE_PSATD
- AllocLevelDataFFT(lev);
- InitLevelDataFFT(lev, time);
+ if (fft_hybrid_mpi_decomposition){
+ AllocLevelDataFFT(lev);
+ InitLevelDataFFT(lev, time);
+ }
#endif
}
@@ -693,6 +706,37 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d
// CKC solver requires one additional guard cell
if (maxwell_fdtd_solver_id == 1) ngF = std::max( ngF, 1 );
+#ifdef WARPX_USE_PSATD
+ if (fft_hybrid_mpi_decomposition == false){
+ // All boxes should have the same number of guard cells
+ // (to avoid temporary parallel copies)
+ // Thus take the max of the required number of guards for each field
+ // Also: the number of guard cell should be enough to contain
+ // the stencil of the FFT solver. Here, this number (`ngFFT`)
+ // is determined *empirically* to be the order of the solver
+ // for nodal, and half the order of the solver for staggered.
+ IntVect ngFFT;
+ if (do_nodal) {
+ ngFFT = IntVect(AMREX_D_DECL(nox_fft, noy_fft, noz_fft));
+ } else {
+ ngFFT = IntVect(AMREX_D_DECL(nox_fft/2, noy_fft/2, noz_fft/2));
+ }
+ for (int i_dim=0; i_dim<AMREX_SPACEDIM; i_dim++ ){
+ int ng_required = ngFFT[i_dim];
+ // Get the max
+ ng_required = std::max( ng_required, ngE[i_dim] );
+ ng_required = std::max( ng_required, ngJ[i_dim] );
+ ng_required = std::max( ng_required, ngRho[i_dim] );
+ ng_required = std::max( ng_required, ngF );
+ // Set the guard cells to this max
+ ngE[i_dim] = ng_required;
+ ngJ[i_dim] = ng_required;
+ ngRho[i_dim] = ng_required;
+ ngF = ng_required;
+ }
+ }
+#endif
+
AllocLevelMFs(lev, ba, dm, ngE, ngJ, ngRho, ngF);
}
@@ -743,6 +787,21 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm
rho_fp[lev].reset(new MultiFab(amrex::convert(ba,IntVect::TheUnitVector()),dm,2,ngRho));
rho_fp_owner_masks[lev] = std::move(rho_fp[lev]->OwnerMask(period));
}
+ if (fft_hybrid_mpi_decomposition == false){
+ // Allocate and initialize the spectral solver
+ std::array<Real,3> dx = CellSize(lev);
+#if (AMREX_SPACEDIM == 3)
+ RealVect dx_vect(dx[0], dx[1], dx[2]);
+#elif (AMREX_SPACEDIM == 2)
+ RealVect dx_vect(dx[0], dx[2]);
+#endif
+ // Get the cell-centered box, with guard cells
+ BoxArray realspace_ba = ba; // Copy box
+ realspace_ba.enclosedCells().grow(ngE); // cell-centered + guard cells
+ // Define spectral solver
+ spectral_solver_fp[lev].reset( new SpectralSolver( realspace_ba, dm,
+ nox_fft, noy_fft, noz_fft, do_nodal, dx_vect, dt[lev] ) );
+ }
#endif
//