diff options
Diffstat (limited to 'Source')
64 files changed, 3765 insertions, 2658 deletions
diff --git a/Source/.DS_Store b/Source/.DS_Store Binary files differdeleted file mode 100644 index 01640e062..000000000 --- a/Source/.DS_Store +++ /dev/null diff --git a/Source/BoundaryConditions/PML.H b/Source/BoundaryConditions/PML.H index 0cf367284..b34cbe88b 100644 --- a/Source/BoundaryConditions/PML.H +++ b/Source/BoundaryConditions/PML.H @@ -6,6 +6,10 @@ #include <AMReX_MultiFab.H> #include <AMReX_Geometry.H> +#ifdef WARPX_USE_PSATD +#include <SpectralSolver.H> +#endif + #if (AMREX_SPACEDIM == 3) #define WRPX_PML_TO_FORTRAN(x) \ @@ -93,7 +97,12 @@ class PML public: PML (const amrex::BoxArray& ba, const amrex::DistributionMapping& dm, const amrex::Geometry* geom, const amrex::Geometry* cgeom, - int ncell, int delta, int ref_ratio, int do_dive_cleaning, int do_moving_window); + int ncell, int delta, int ref_ratio, +#ifdef WARPX_USE_PSATD + amrex::Real dt, int nox_fft, int noy_fft, int noz_fft, bool do_nodal, +#endif + int do_dive_cleaning, int do_moving_window, + const amrex::IntVect do_pml_Lo, const amrex::IntVect do_pml_Hi); void ComputePMLFactors (amrex::Real dt); @@ -111,6 +120,10 @@ public: const MultiSigmaBox& GetMultiSigmaBox_cp () const { return *sigba_cp; } +#ifdef WARPX_USE_PSATD + void PushPSATD (); +#endif + void ExchangeB (const std::array<amrex::MultiFab*,3>& B_fp, const std::array<amrex::MultiFab*,3>& B_cp); void ExchangeE (const std::array<amrex::MultiFab*,3>& E_fp, @@ -154,10 +167,23 @@ private: std::unique_ptr<MultiSigmaBox> sigba_fp; std::unique_ptr<MultiSigmaBox> sigba_cp; +#ifdef WARPX_USE_PSATD + std::unique_ptr<SpectralSolver> spectral_solver_fp; + std::unique_ptr<SpectralSolver> spectral_solver_cp; +#endif + static amrex::BoxArray MakeBoxArray (const amrex::Geometry& geom, - const amrex::BoxArray& grid_ba, int ncell); + const amrex::BoxArray& grid_ba, int ncell, + const amrex::IntVect do_pml_Lo, + const amrex::IntVect do_pml_Hi); static void Exchange (amrex::MultiFab& pml, amrex::MultiFab& reg, const amrex::Geometry& geom); }; +#ifdef WARPX_USE_PSATD +void PushPMLPSATDSinglePatch( SpectralSolver& solver, + std::array<std::unique_ptr<amrex::MultiFab>,3>& pml_E, + std::array<std::unique_ptr<amrex::MultiFab>,3>& pml_B ); +#endif + #endif diff --git a/Source/BoundaryConditions/PML.cpp b/Source/BoundaryConditions/PML.cpp index f780f335c..21d348482 100644 --- a/Source/BoundaryConditions/PML.cpp +++ b/Source/BoundaryConditions/PML.cpp @@ -258,14 +258,7 @@ SigmaBox::ComputePMLFactorsB (const Real* dx, Real dt) { for (int i = 0, N = sigma_star[idim].size(); i < N; ++i) { - if (sigma_star[idim][i] == 0.0) - { - sigma_star_fac[idim][i] = 1.0; - } - else - { - sigma_star_fac[idim][i] = std::exp(-sigma_star[idim][i]*dt); - } + sigma_star_fac[idim][i] = std::exp(-sigma_star[idim][i]*dt); } } } @@ -277,14 +270,7 @@ SigmaBox::ComputePMLFactorsE (const Real* dx, Real dt) { for (int i = 0, N = sigma[idim].size(); i < N; ++i) { - if (sigma[idim][i] == 0.0) - { - sigma_fac[idim][i] = 1.0; - } - else - { - sigma_fac[idim][i] = std::exp(-sigma[idim][i]*dt); - } + sigma_fac[idim][i] = std::exp(-sigma[idim][i]*dt); } } } @@ -329,11 +315,16 @@ MultiSigmaBox::ComputePMLFactorsE (const Real* dx, Real dt) PML::PML (const BoxArray& grid_ba, const DistributionMapping& grid_dm, const Geometry* geom, const Geometry* cgeom, - int ncell, int delta, int ref_ratio, int do_dive_cleaning, int do_moving_window) + int ncell, int delta, int ref_ratio, +#ifdef WARPX_USE_PSATD + Real dt, int nox_fft, int noy_fft, int noz_fft, bool do_nodal, +#endif + int do_dive_cleaning, int do_moving_window, + const amrex::IntVect do_pml_Lo, const amrex::IntVect do_pml_Hi) : m_geom(geom), m_cgeom(cgeom) { - const BoxArray& ba = MakeBoxArray(*geom, grid_ba, ncell); + const BoxArray& ba = MakeBoxArray(*geom, grid_ba, ncell, do_pml_Lo, do_pml_Hi); if (ba.size() == 0) { m_ok = false; return; @@ -343,10 +334,30 @@ PML::PML (const BoxArray& grid_ba, const DistributionMapping& grid_dm, DistributionMapping dm{ba}; - int nge = 2; - int ngb = 2; - int ngf = (do_moving_window) ? 2 : 0; - if (WarpX::maxwell_fdtd_solver_id == 1) ngf = std::max( ngf, 1 ); + // Define the number of guard cells in each direction, for E, B, and F + IntVect nge = IntVect(AMREX_D_DECL(2, 2, 2)); + IntVect ngb = IntVect(AMREX_D_DECL(2, 2, 2)); + int ngf_int = (do_moving_window) ? 2 : 0; + if (WarpX::maxwell_fdtd_solver_id == 1) ngf_int = std::max( ngf_int, 1 ); + IntVect ngf = IntVect(AMREX_D_DECL(ngf_int, ngf_int, ngf_int)); +#ifdef WARPX_USE_PSATD + // Increase the number of guard cells, in order to fit the extent + // of the stencil for the spectral solver + IntVect ngFFT; + if (do_nodal) { + ngFFT = IntVect(AMREX_D_DECL(nox_fft, noy_fft, noz_fft)); + } else { + ngFFT = IntVect(AMREX_D_DECL(nox_fft/2, noy_fft/2, noz_fft/2)); + } + // Set the number of guard cells to the maximum of each field + // (all fields should have the same number of guard cells) + ngFFT = ngFFT.max(nge); + ngFFT = ngFFT.max(ngb); + ngFFT = ngFFT.max(ngf); + nge = ngFFT; + ngb = ngFFT; + ngf = ngFFT; + #endif pml_E_fp[0].reset(new MultiFab(amrex::convert(ba,WarpX::Ex_nodal_flag), dm, 3, nge)); pml_E_fp[1].reset(new MultiFab(amrex::convert(ba,WarpX::Ey_nodal_flag), dm, 3, nge)); @@ -370,15 +381,26 @@ PML::PML (const BoxArray& grid_ba, const DistributionMapping& grid_dm, sigba_fp.reset(new MultiSigmaBox(ba, dm, grid_ba, geom->CellSize(), ncell, delta)); +#ifdef WARPX_USE_PSATD + const bool in_pml = true; // Tells spectral solver to use split-PML equations + const RealVect dx{AMREX_D_DECL(geom->CellSize(0), geom->CellSize(1), geom->CellSize(2))}; + // Get the cell-centered box, with guard cells + BoxArray realspace_ba = ba; // Copy box + realspace_ba.enclosedCells().grow(nge); // cell-centered + guard cells + spectral_solver_fp.reset( new SpectralSolver( realspace_ba, dm, + nox_fft, noy_fft, noz_fft, do_nodal, dx, dt, in_pml ) ); +#endif + if (cgeom) { - - nge = 1; - ngb = 1; +#ifndef WARPX_USE_PSATD + nge = IntVect(AMREX_D_DECL(1, 1, 1)); + ngb = IntVect(AMREX_D_DECL(1, 1, 1)); +#endif BoxArray grid_cba = grid_ba; grid_cba.coarsen(ref_ratio); - const BoxArray& cba = MakeBoxArray(*cgeom, grid_cba, ncell); + const BoxArray& cba = MakeBoxArray(*cgeom, grid_cba, ncell, do_pml_Lo, do_pml_Hi); DistributionMapping cdm{cba}; @@ -403,17 +425,32 @@ PML::PML (const BoxArray& grid_ba, const DistributionMapping& grid_dm, } sigba_cp.reset(new MultiSigmaBox(cba, cdm, grid_cba, cgeom->CellSize(), ncell, delta)); - } +#ifdef WARPX_USE_PSATD + const bool in_pml = true; // Tells spectral solver to use split-PML equations + const RealVect cdx{AMREX_D_DECL(cgeom->CellSize(0), cgeom->CellSize(1), cgeom->CellSize(2))}; + // Get the cell-centered box, with guard cells + BoxArray realspace_cba = cba; // Copy box + realspace_cba.enclosedCells().grow(nge); // cell-centered + guard cells + spectral_solver_cp.reset( new SpectralSolver( realspace_cba, cdm, + nox_fft, noy_fft, noz_fft, do_nodal, cdx, dt, in_pml ) ); +#endif + } } BoxArray -PML::MakeBoxArray (const amrex::Geometry& geom, const amrex::BoxArray& grid_ba, int ncell) +PML::MakeBoxArray (const amrex::Geometry& geom, const amrex::BoxArray& grid_ba, int ncell, + const amrex::IntVect do_pml_Lo, const amrex::IntVect do_pml_Hi) { Box domain = geom.Domain(); for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { if ( ! geom.isPeriodic(idim) ) { - domain.grow(idim, ncell); + if (do_pml_Lo[idim]){ + domain.growLo(idim, ncell); + } + if (do_pml_Hi[idim]){ + domain.growHi(idim, ncell); + } } } @@ -753,3 +790,57 @@ PML::Restart (const std::string& dir) VisMF::Read(*pml_B_cp[2], dir+"_Bz_cp"); } } + +#ifdef WARPX_USE_PSATD +void +PML::PushPSATD () { + + // Update the fields on the fine and coarse patch + PushPMLPSATDSinglePatch( *spectral_solver_fp, pml_E_fp, pml_B_fp ); + if (spectral_solver_cp) { + PushPMLPSATDSinglePatch( *spectral_solver_cp, pml_E_cp, pml_B_cp ); + } +} + +void +PushPMLPSATDSinglePatch ( + SpectralSolver& solver, + std::array<std::unique_ptr<amrex::MultiFab>,3>& pml_E, + std::array<std::unique_ptr<amrex::MultiFab>,3>& pml_B ) { + + using Idx = SpectralPMLIndex; + + // Perform forward Fourier transform + // Note: the correspondance between the spectral PML index + // (Exy, Ezx, etc.) and the component (0 or 1) of the + // MultiFabs (e.g. pml_E) is dictated by the + // function that damps the PML + solver.ForwardTransform(*pml_E[0], Idx::Exy, 0); + solver.ForwardTransform(*pml_E[0], Idx::Exz, 1); + solver.ForwardTransform(*pml_E[1], Idx::Eyz, 0); + solver.ForwardTransform(*pml_E[1], Idx::Eyx, 1); + solver.ForwardTransform(*pml_E[2], Idx::Ezx, 0); + solver.ForwardTransform(*pml_E[2], Idx::Ezy, 1); + solver.ForwardTransform(*pml_B[0], Idx::Bxy, 0); + solver.ForwardTransform(*pml_B[0], Idx::Bxz, 1); + solver.ForwardTransform(*pml_B[1], Idx::Byz, 0); + solver.ForwardTransform(*pml_B[1], Idx::Byx, 1); + solver.ForwardTransform(*pml_B[2], Idx::Bzx, 0); + solver.ForwardTransform(*pml_B[2], Idx::Bzy, 1); + // Advance fields in spectral space + solver.pushSpectralFields(); + // Perform backward Fourier Transform + solver.BackwardTransform(*pml_E[0], Idx::Exy, 0); + solver.BackwardTransform(*pml_E[0], Idx::Exz, 1); + solver.BackwardTransform(*pml_E[1], Idx::Eyz, 0); + solver.BackwardTransform(*pml_E[1], Idx::Eyx, 1); + solver.BackwardTransform(*pml_E[2], Idx::Ezx, 0); + solver.BackwardTransform(*pml_E[2], Idx::Ezy, 1); + solver.BackwardTransform(*pml_B[0], Idx::Bxy, 0); + solver.BackwardTransform(*pml_B[0], Idx::Bxz, 1); + solver.BackwardTransform(*pml_B[1], Idx::Byz, 0); + solver.BackwardTransform(*pml_B[1], Idx::Byx, 1); + solver.BackwardTransform(*pml_B[2], Idx::Bzx, 0); + solver.BackwardTransform(*pml_B[2], Idx::Bzy, 1); +} +#endif diff --git a/Source/Diagnostics/ParticleIO.cpp b/Source/Diagnostics/ParticleIO.cpp index f2a543ed5..f159e5302 100644 --- a/Source/Diagnostics/ParticleIO.cpp +++ b/Source/Diagnostics/ParticleIO.cpp @@ -98,7 +98,7 @@ MultiParticleContainer::WritePlotFile (const std::string& dir) const real_names.push_back("By"); real_names.push_back("Bz"); -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ real_names.push_back("theta"); #endif diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp index 32a4747db..16b5905d1 100644 --- a/Source/Evolve/WarpXEvolveEM.cpp +++ b/Source/Evolve/WarpXEvolveEM.cpp @@ -299,6 +299,7 @@ WarpX::OneStep_nosub (Real cur_time) // (And update guard cells immediately afterwards) #ifdef WARPX_USE_PSATD PushPSATD(dt[0]); + if (do_pml) DampPML(); FillBoundaryE(); FillBoundaryB(); #else @@ -481,6 +482,19 @@ WarpX::PushParticlesandDepose (int lev, Real cur_time) Efield_cax[lev][0].get(), Efield_cax[lev][1].get(), Efield_cax[lev][2].get(), Bfield_cax[lev][0].get(), Bfield_cax[lev][1].get(), Bfield_cax[lev][2].get(), cur_time, dt[lev]); +#ifdef WARPX_DIM_RZ + // This is called after all particles have deposited their current and charge. + ApplyInverseVolumeScalingToCurrentDensity(current_fp[lev][0].get(), current_fp[lev][1].get(), current_fp[lev][2].get(), lev); + if (current_buf[lev][0].get()) { + ApplyInverseVolumeScalingToCurrentDensity(current_buf[lev][0].get(), current_buf[lev][1].get(), current_buf[lev][2].get(), lev-1); + } + if (rho_fp[lev].get()) { + ApplyInverseVolumeScalingToChargeDensity(rho_fp[lev].get(), lev); + if (charge_buf[lev].get()) { + ApplyInverseVolumeScalingToChargeDensity(charge_buf[lev].get(), lev-1); + } + } +#endif } void @@ -491,7 +505,7 @@ WarpX::ComputeDt () if (maxwell_fdtd_solver_id == 0) { // CFL time step Yee solver -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ // Derived semi-analytically by R. Lehe deltat = cfl * 1./( std::sqrt((1+0.2105)/(dx[0]*dx[0]) + 1./(dx[1]*dx[1])) * PhysConst::c ); #else @@ -536,10 +550,7 @@ WarpX::computeMaxStepBoostAccelerator(amrex::Geometry a_geom){ WarpX::moving_window_dir == AMREX_SPACEDIM-1, "Can use zmax_plasma_to_compute_max_step only if " + "moving window along z. TODO: all directions."); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE( - maxLevel() == 0, - "Can use zmax_plasma_to_compute_max_step only if " + - "max level = 0."); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE( (WarpX::boost_direction[0]-0)*(WarpX::boost_direction[0]-0) + (WarpX::boost_direction[1]-0)*(WarpX::boost_direction[1]-0) + @@ -560,7 +571,12 @@ WarpX::computeMaxStepBoostAccelerator(amrex::Geometry a_geom){ const Real interaction_time_boost = (len_plasma_boost-zmin_domain_boost)/ (moving_window_v-v_plasma_boost); // Divide by dt, and update value of max_step. - const int computed_max_step = interaction_time_boost/dt[0]; + int computed_max_step; + if (do_subcycling){ + computed_max_step = interaction_time_boost/dt[0]; + } else { + computed_max_step = interaction_time_boost/dt[maxLevel()]; + } max_step = computed_max_step; Print()<<"max_step computed in computeMaxStepBoostAccelerator: " <<computed_max_step<<std::endl; diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/Make.package b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/Make.package index c62c21f44..ee8376865 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/Make.package +++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/Make.package @@ -1,6 +1,8 @@ CEXE_headers += SpectralBaseAlgorithm.H CEXE_headers += PsatdAlgorithm.H CEXE_sources += PsatdAlgorithm.cpp +CEXE_headers += PMLPsatdAlgorithm.H +CEXE_sources += PMLPsatdAlgorithm.cpp INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/FieldSolver/SpectralSolver/SpectralAlgorithms VPATH_LOCATIONS += $(WARPX_HOME)/Source/FieldSolver/SpectralSolver/SpectralAlgorithms diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.H b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.H new file mode 100644 index 000000000..a2511b6b7 --- /dev/null +++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.H @@ -0,0 +1,34 @@ +#ifndef WARPX_PML_PSATD_ALGORITHM_H_ +#define WARPX_PML_PSATD_ALGORITHM_H_ + +#include <SpectralBaseAlgorithm.H> + +/* \brief Class that updates the field in spectral space + * and stores the coefficients of the corresponding update equation. + */ +class PMLPsatdAlgorithm : public SpectralBaseAlgorithm +{ + public: + PMLPsatdAlgorithm(const SpectralKSpace& spectral_kspace, + const amrex::DistributionMapping& dm, + const int norder_x, const int norder_y, + const int norder_z, const bool nodal, + const amrex::Real dt); + + void InitializeSpectralCoefficients( + const SpectralKSpace& spectral_kspace, + const amrex::DistributionMapping& dm, + const amrex::Real dt); + + // Redefine functions from base class + virtual void pushSpectralFields(SpectralFieldData& f) const override final; + virtual int getRequiredNumberOfFields() const override final { + return SpectralPMLIndex::n_fields; + } + + private: + SpectralCoefficients C_coef, S_ck_coef; + +}; + +#endif // WARPX_PML_PSATD_ALGORITHM_H_ diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.cpp b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.cpp new file mode 100644 index 000000000..d76259d4c --- /dev/null +++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PMLPsatdAlgorithm.cpp @@ -0,0 +1,146 @@ +#include <PMLPsatdAlgorithm.H> +#include <WarpXConst.H> +#include <cmath> + +using namespace amrex; + +/* \brief Initialize coefficients for the update equation */ +PMLPsatdAlgorithm::PMLPsatdAlgorithm( + const SpectralKSpace& spectral_kspace, + const DistributionMapping& dm, + const int norder_x, const int norder_y, + const int norder_z, const bool nodal, const Real dt) + // Initialize members of base class + : SpectralBaseAlgorithm( spectral_kspace, dm, + norder_x, norder_y, norder_z, nodal ) +{ + const BoxArray& ba = spectral_kspace.spectralspace_ba; + + // Allocate the arrays of coefficients + C_coef = SpectralCoefficients(ba, dm, 1, 0); + S_ck_coef = SpectralCoefficients(ba, dm, 1, 0); + + InitializeSpectralCoefficients(spectral_kspace, dm, dt); +} + +/* Advance the E and B field in spectral space (stored in `f`) + * over one time step */ +void +PMLPsatdAlgorithm::pushSpectralFields(SpectralFieldData& f) const{ + + // Loop over boxes + for (MFIter mfi(f.fields); mfi.isValid(); ++mfi){ + + const Box& bx = f.fields[mfi].box(); + + // Extract arrays for the fields to be updated + Array4<Complex> fields = f.fields[mfi].array(); + // Extract arrays for the coefficients + Array4<const Real> C_arr = C_coef[mfi].array(); + Array4<const Real> S_ck_arr = S_ck_coef[mfi].array(); + // Extract pointers for the k vectors + const Real* modified_kx_arr = modified_kx_vec[mfi].dataPtr(); +#if (AMREX_SPACEDIM==3) + const Real* modified_ky_arr = modified_ky_vec[mfi].dataPtr(); +#endif + const Real* modified_kz_arr = modified_kz_vec[mfi].dataPtr(); + + // Loop over indices within one box + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept + { + // Record old values of the fields to be updated + using Idx = SpectralPMLIndex; + const Complex Ex_old = fields(i,j,k,Idx::Exy) \ + + fields(i,j,k,Idx::Exz); + const Complex Ey_old = fields(i,j,k,Idx::Eyx) \ + + fields(i,j,k,Idx::Eyz); + const Complex Ez_old = fields(i,j,k,Idx::Ezx) \ + + fields(i,j,k,Idx::Ezy); + const Complex Bx_old = fields(i,j,k,Idx::Bxy) \ + + fields(i,j,k,Idx::Bxz); + const Complex By_old = fields(i,j,k,Idx::Byx) \ + + fields(i,j,k,Idx::Byz); + const Complex Bz_old = fields(i,j,k,Idx::Bzx) \ + + fields(i,j,k,Idx::Bzy); + // k vector values, and coefficients + const Real kx = modified_kx_arr[i]; +#if (AMREX_SPACEDIM==3) + const Real ky = modified_ky_arr[j]; + const Real kz = modified_kz_arr[k]; +#else + constexpr Real ky = 0; + const Real kz = modified_kz_arr[j]; +#endif + constexpr Real c2 = PhysConst::c*PhysConst::c; + const Complex I = Complex{0,1}; + const Real C = C_arr(i,j,k); + const Real S_ck = S_ck_arr(i,j,k); + + // Update E + fields(i,j,k,Idx::Exy) = C*fields(i,j,k,Idx::Exy) + S_ck*c2*I*ky*Bz_old; + fields(i,j,k,Idx::Exz) = C*fields(i,j,k,Idx::Exz) - S_ck*c2*I*kz*By_old; + fields(i,j,k,Idx::Eyz) = C*fields(i,j,k,Idx::Eyz) + S_ck*c2*I*kz*Bx_old; + fields(i,j,k,Idx::Eyx) = C*fields(i,j,k,Idx::Eyx) - S_ck*c2*I*kx*Bz_old; + fields(i,j,k,Idx::Ezx) = C*fields(i,j,k,Idx::Ezx) + S_ck*c2*I*kx*By_old; + fields(i,j,k,Idx::Ezy) = C*fields(i,j,k,Idx::Ezy) - S_ck*c2*I*ky*Bx_old; + // Update B + fields(i,j,k,Idx::Bxy) = C*fields(i,j,k,Idx::Bxy) - S_ck*I*ky*Ez_old; + fields(i,j,k,Idx::Bxz) = C*fields(i,j,k,Idx::Bxz) + S_ck*I*kz*Ey_old; + fields(i,j,k,Idx::Byz) = C*fields(i,j,k,Idx::Byz) - S_ck*I*kz*Ex_old; + fields(i,j,k,Idx::Byx) = C*fields(i,j,k,Idx::Byx) + S_ck*I*kx*Ez_old; + fields(i,j,k,Idx::Bzx) = C*fields(i,j,k,Idx::Bzx) - S_ck*I*kx*Ey_old; + fields(i,j,k,Idx::Bzy) = C*fields(i,j,k,Idx::Bzy) + S_ck*I*ky*Ex_old; + }); + } +}; + +void PMLPsatdAlgorithm::InitializeSpectralCoefficients ( + const SpectralKSpace& spectral_kspace, + const amrex::DistributionMapping& dm, + const amrex::Real dt) +{ + const BoxArray& ba = spectral_kspace.spectralspace_ba; + // Fill them with the right values: + // Loop over boxes and allocate the corresponding coefficients + // for each box owned by the local MPI proc + for (MFIter mfi(ba, dm); mfi.isValid(); ++mfi){ + + const Box& bx = ba[mfi]; + + // Extract pointers for the k vectors + const Real* modified_kx = modified_kx_vec[mfi].dataPtr(); +#if (AMREX_SPACEDIM==3) + const Real* modified_ky = modified_ky_vec[mfi].dataPtr(); +#endif + const Real* modified_kz = modified_kz_vec[mfi].dataPtr(); + // Extract arrays for the coefficients + Array4<Real> C = C_coef[mfi].array(); + Array4<Real> S_ck = S_ck_coef[mfi].array(); + + // Loop over indices within one box + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept + { + // Calculate norm of vector + const Real k_norm = std::sqrt( + std::pow(modified_kx[i], 2) + +#if (AMREX_SPACEDIM==3) + std::pow(modified_ky[j], 2) + + std::pow(modified_kz[k], 2)); +#else + std::pow(modified_kz[j], 2)); +#endif + + // Calculate coefficients + constexpr Real c = PhysConst::c; + if (k_norm != 0){ + C(i,j,k) = std::cos(c*k_norm*dt); + S_ck(i,j,k) = std::sin(c*k_norm*dt)/(c*k_norm); + } else { // Handle k_norm = 0, by using the analytical limit + C(i,j,k) = 1.; + S_ck(i,j,k) = dt; + } + }); + } +}; diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H index 12718e38b..825d04dc2 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H +++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H @@ -13,14 +13,18 @@ class PsatdAlgorithm : public SpectralBaseAlgorithm PsatdAlgorithm(const SpectralKSpace& spectral_kspace, const amrex::DistributionMapping& dm, const int norder_x, const int norder_y, - const int norder_z, const bool nodal, const amrex::Real dt); - + const int norder_z, const bool nodal, + const amrex::Real dt); + // Redefine functions from base class + virtual void pushSpectralFields(SpectralFieldData& f) const override final; + virtual int getRequiredNumberOfFields() const override final { + return SpectralFieldIndex::n_fields; + } + void InitializeSpectralCoefficients(const SpectralKSpace& spectral_kspace, - const amrex::DistributionMapping& dm, + const amrex::DistributionMapping& dm, const amrex::Real dt); - void pushSpectralFields(SpectralFieldData& f) const override final; - private: SpectralCoefficients C_coef, S_ck_coef, X1_coef, X2_coef, X3_coef; }; diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithm.H b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithm.H index 602eb2473..5d5e376c1 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithm.H +++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/SpectralBaseAlgorithm.H @@ -14,9 +14,9 @@ class SpectralBaseAlgorithm { public: - // Member function that updates the fields in spectral space ; - // meant to be overridden in subclasses + // Virtual member function ; meant to be overridden in subclasses virtual void pushSpectralFields(SpectralFieldData& f) const = 0; + virtual int getRequiredNumberOfFields() const = 0; // The destructor should also be a virtual function, so that // a pointer to subclass of `SpectraBaseAlgorithm` actually // calls the subclass's destructor. diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.H b/Source/FieldSolver/SpectralSolver/SpectralFieldData.H index 7954414b8..6a2446981 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.H +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.H @@ -8,18 +8,24 @@ // Declare type for spectral fields using SpectralField = amrex::FabArray< amrex::BaseFab <Complex> >; -/* Index for the fields that will be stored in spectral space */ +/* Index for the regular fields, when stored in spectral space */ struct SpectralFieldIndex { enum { Ex=0, Ey, Ez, Bx, By, Bz, Jx, Jy, Jz, rho_old, rho_new, n_fields }; // n_fields is automatically the total number of fields }; +/* Index for the PML fields, when stored in spectral space */ +struct SpectralPMLIndex { + enum { Exy=0, Exz, Eyx, Eyz, Ezx, Ezy, + Bxy, Bxz, Byx, Byz, Bzx, Bzy, n_fields }; + // n_fields is automatically the total number of fields +}; + /* \brief Class that stores the fields in spectral space, and performs the * Fourier transforms between real space and spectral space */ class SpectralFieldData { - friend class PsatdAlgorithm; // Define the FFTplans type, which holds one fft plan per box // (plans are only initialized for the boxes that are owned by @@ -32,8 +38,9 @@ class SpectralFieldData public: SpectralFieldData( const amrex::BoxArray& realspace_ba, - const SpectralKSpace& k_space, - const amrex::DistributionMapping& dm ); + const SpectralKSpace& k_space, + const amrex::DistributionMapping& dm, + const int n_field_required ); SpectralFieldData() = default; // Default constructor SpectralFieldData& operator=(SpectralFieldData&& field_data) = default; ~SpectralFieldData(); @@ -41,10 +48,10 @@ class SpectralFieldData const int field_index, const int i_comp); void BackwardTransform( amrex::MultiFab& mf, const int field_index, const int i_comp); - - private: // `fields` stores fields in spectral space, as multicomponent FabArray SpectralField fields; + + private: // tmpRealField and tmpSpectralField store fields // right before/after the Fourier transform SpectralField tmpSpectralField; // contains Complexs diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp index 948baf0a6..8f0853484 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp @@ -5,14 +5,14 @@ using namespace amrex; /* \brief Initialize fields in spectral space, and FFT plans */ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, const SpectralKSpace& k_space, - const DistributionMapping& dm ) + const DistributionMapping& dm, + const int n_field_required ) { const BoxArray& spectralspace_ba = k_space.spectralspace_ba; // Allocate the arrays that contain the fields in spectral space // (one component per field) - fields = SpectralField(spectralspace_ba, dm, - SpectralFieldIndex::n_fields, 0); + fields = SpectralField(spectralspace_ba, dm, n_field_required, 0); // Allocate temporary arrays - in real space and spectral space // These arrays will store the data just before/after the FFT diff --git a/Source/FieldSolver/SpectralSolver/SpectralSolver.H b/Source/FieldSolver/SpectralSolver/SpectralSolver.H index d4019a9a3..c570b017b 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralSolver.H +++ b/Source/FieldSolver/SpectralSolver/SpectralSolver.H @@ -23,7 +23,8 @@ class SpectralSolver const amrex::DistributionMapping& dm, const int norder_x, const int norder_y, const int norder_z, const bool nodal, - const amrex::RealVect dx, const amrex::Real dt ); + const amrex::RealVect dx, const amrex::Real dt, + const bool pml=false ); /* \brief Transform the component `i_comp` of MultiFab `mf` * to spectral space, and store the corresponding result internally diff --git a/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp b/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp index c21c3cfb1..4b9def013 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp @@ -1,19 +1,29 @@ #include <SpectralKSpace.H> #include <SpectralSolver.H> #include <PsatdAlgorithm.H> +#include <PMLPsatdAlgorithm.H> /* \brief Initialize the spectral Maxwell solver * * This function selects the spectral algorithm to be used, allocates the * corresponding coefficients for the discretized field update equation, * and prepares the structures that store the fields in spectral space. + * + * \param norder_x Order of accuracy of the spatial derivatives along x + * \param norder_y Order of accuracy of the spatial derivatives along y + * \param norder_z Order of accuracy of the spatial derivatives along z + * \param nodal Whether the solver is applied to a nodal or staggered grid + * \param dx Cell size along each dimension + * \param dt Time step + * \param pml Whether the boxes in which the solver is applied are PML boxes */ SpectralSolver::SpectralSolver( const amrex::BoxArray& realspace_ba, const amrex::DistributionMapping& dm, const int norder_x, const int norder_y, const int norder_z, const bool nodal, - const amrex::RealVect dx, const amrex::Real dt ) { + const amrex::RealVect dx, const amrex::Real dt, + const bool pml ) { // Initialize all structures using the same distribution mapping dm @@ -24,12 +34,16 @@ SpectralSolver::SpectralSolver( // - Select the algorithm depending on the input parameters // Initialize the corresponding coefficients over k space - // TODO: Add more algorithms + selection depending on input parameters - // For the moment, this only uses the standard PsatdAlgorithm - algorithm = std::unique_ptr<PsatdAlgorithm>( new PsatdAlgorithm( + if (pml) { + algorithm = std::unique_ptr<PMLPsatdAlgorithm>( new PMLPsatdAlgorithm( + k_space, dm, norder_x, norder_y, norder_z, nodal, dt ) ); + } else { + algorithm = std::unique_ptr<PsatdAlgorithm>( new PsatdAlgorithm( k_space, dm, norder_x, norder_y, norder_z, nodal, dt ) ); + } // - Initialize arrays for fields in spectral space + FFT plans - field_data = SpectralFieldData( realspace_ba, k_space, dm ); + field_data = SpectralFieldData( realspace_ba, k_space, dm, + algorithm->getRequiredNumberOfFields() ); }; diff --git a/Source/FieldSolver/WarpXPushFieldsEM.cpp b/Source/FieldSolver/WarpXPushFieldsEM.cpp index 4fce4717b..1df05bc0f 100644 --- a/Source/FieldSolver/WarpXPushFieldsEM.cpp +++ b/Source/FieldSolver/WarpXPushFieldsEM.cpp @@ -18,6 +18,40 @@ using namespace amrex; #ifdef WARPX_USE_PSATD +namespace { + void + PushPSATDSinglePatch ( + SpectralSolver& solver, + std::array<std::unique_ptr<amrex::MultiFab>,3>& Efield, + std::array<std::unique_ptr<amrex::MultiFab>,3>& Bfield, + std::array<std::unique_ptr<amrex::MultiFab>,3>& current, + std::unique_ptr<amrex::MultiFab>& rho ) { + + using Idx = SpectralFieldIndex; + + // Perform forward Fourier transform + solver.ForwardTransform(*Efield[0], Idx::Ex); + solver.ForwardTransform(*Efield[1], Idx::Ey); + solver.ForwardTransform(*Efield[2], Idx::Ez); + solver.ForwardTransform(*Bfield[0], Idx::Bx); + solver.ForwardTransform(*Bfield[1], Idx::By); + solver.ForwardTransform(*Bfield[2], Idx::Bz); + solver.ForwardTransform(*current[0], Idx::Jx); + solver.ForwardTransform(*current[1], Idx::Jy); + solver.ForwardTransform(*current[2], Idx::Jz); + solver.ForwardTransform(*rho, Idx::rho_old, 0); + solver.ForwardTransform(*rho, Idx::rho_new, 1); + // Advance fields in spectral space + solver.pushSpectralFields(); + // Perform backward Fourier Transform + solver.BackwardTransform(*Efield[0], Idx::Ex); + solver.BackwardTransform(*Efield[1], Idx::Ey); + solver.BackwardTransform(*Efield[2], Idx::Ez); + solver.BackwardTransform(*Bfield[0], Idx::Bx); + solver.BackwardTransform(*Bfield[1], Idx::By); + solver.BackwardTransform(*Bfield[2], Idx::Bz); + } +} void WarpX::PushPSATD (amrex::Real a_dt) @@ -31,38 +65,25 @@ WarpX::PushPSATD (amrex::Real a_dt) } else { PushPSATD_localFFT(lev, a_dt); } + + // Evolve the fields in the PML boxes + if (do_pml && pml[lev]->ok()) { + pml[lev]->PushPSATD(); + } } } -void WarpX::PushPSATD_localFFT (int lev, amrex::Real /* dt */) +void +WarpX::PushPSATD_localFFT (int lev, amrex::Real /* dt */) { - auto& solver = *spectral_solver_fp[lev]; - - // Perform forward Fourier transform - solver.ForwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex); - solver.ForwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey); - solver.ForwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez); - solver.ForwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx); - solver.ForwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By); - solver.ForwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz); - solver.ForwardTransform(*current_fp[lev][0], SpectralFieldIndex::Jx); - solver.ForwardTransform(*current_fp[lev][1], SpectralFieldIndex::Jy); - solver.ForwardTransform(*current_fp[lev][2], SpectralFieldIndex::Jz); - solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_old, 0); - solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_new, 1); - - // Advance fields in spectral space - solver.pushSpectralFields(); - - // Perform backward Fourier Transform - solver.BackwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex); - solver.BackwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey); - solver.BackwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez); - solver.BackwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx); - solver.BackwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By); - solver.BackwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz); + // Update the fields on the fine and coarse patch + PushPSATDSinglePatch( *spectral_solver_fp[lev], + Efield_fp[lev], Bfield_fp[lev], current_fp[lev], rho_fp[lev] ); + if (spectral_solver_cp[lev]) { + PushPSATDSinglePatch( *spectral_solver_cp[lev], + Efield_cp[lev], Bfield_cp[lev], current_cp[lev], rho_cp[lev] ); + } } - #endif void @@ -560,3 +581,143 @@ WarpX::EvolveF (int lev, PatchType patch_type, Real a_dt, DtType a_dt_type) } } +#ifdef WARPX_DIM_RZ +// This scales the current by the inverse volume and wraps around the depostion at negative radius. +// It is faster to apply this on the grid than to do it particle by particle. +// It is put here since there isn't another nice place for it. +void +WarpX::ApplyInverseVolumeScalingToCurrentDensity (MultiFab* Jx, MultiFab* Jy, MultiFab* Jz, int lev) +{ + const long ngJ = Jx->nGrow(); + const std::array<Real,3>& dx = WarpX::CellSize(lev); + const Real dr = dx[0]; + + Box tilebox; + + for ( MFIter mfi(*Jx, TilingIfNotGPU()); mfi.isValid(); ++mfi ) + { + + Array4<Real> const& Jr_arr = Jx->array(mfi); + Array4<Real> const& Jt_arr = Jy->array(mfi); + Array4<Real> const& Jz_arr = Jz->array(mfi); + + tilebox = mfi.tilebox(); + Box tbr = convert(tilebox, WarpX::jx_nodal_flag); + Box tbt = convert(tilebox, WarpX::jy_nodal_flag); + Box tbz = convert(tilebox, WarpX::jz_nodal_flag); + + // Lower corner of tile box physical domain + // Note that this is done before the tilebox.grow so that + // these do not include the guard cells. + const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, lev); + const Dim3 lo = lbound(tilebox); + const Real rmin = xyzmin[0]; + const int irmin = lo.x; + + // Rescale current in r-z mode since the inverse volume factor was not + // included in the current deposition. + amrex::ParallelFor(tbr, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + // Wrap the current density deposited in the guard cells around + // to the cells above the axis. + // Note that Jr(i==0) is at 1/2 dr. + if (rmin == 0. && 0 <= i && i < ngJ) { + Jr_arr(i,j,0) -= Jr_arr(-1-i,j,0); + } + // Apply the inverse volume scaling + // Since Jr is not cell centered in r, no need for distinction + // between on axis and off-axis factors + const amrex::Real r = std::abs(rmin + (i - irmin + 0.5)*dr); + Jr_arr(i,j,0) /= (2.*MathConst::pi*r); + }); + amrex::ParallelFor(tbt, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + // Wrap the current density deposited in the guard cells around + // to the cells above the axis. + // Jt is located on the boundary + if (rmin == 0. && 0 < i && i <= ngJ) { + Jt_arr(i,j,0) += Jt_arr(-i,j,0); + } + + // Apply the inverse volume scaling + // Jt is forced to zero on axis. + const amrex::Real r = std::abs(rmin + (i - irmin)*dr); + if (r == 0.) { + Jt_arr(i,j,0) = 0.; + } else { + Jt_arr(i,j,0) /= (2.*MathConst::pi*r); + } + }); + amrex::ParallelFor(tbz, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + // Wrap the current density deposited in the guard cells around + // to the cells above the axis. + // Jz is located on the boundary + if (rmin == 0. && 0 < i && i <= ngJ) { + Jz_arr(i,j,0) += Jz_arr(-i,j,0); + } + + // Apply the inverse volume scaling + const amrex::Real r = std::abs(rmin + (i - irmin)*dr); + if (r == 0.) { + // Verboncoeur JCP 164, 421-427 (2001) : corrected volume on axis + Jz_arr(i,j,0) /= (MathConst::pi*dr/3.); + } else { + Jz_arr(i,j,0) /= (2.*MathConst::pi*r); + } + }); + } +} + +void +WarpX::ApplyInverseVolumeScalingToChargeDensity (MultiFab* Rho, int lev) +{ + const long ngRho = Rho->nGrow(); + const std::array<Real,3>& dx = WarpX::CellSize(lev); + const Real dr = dx[0]; + + Box tilebox; + + for ( MFIter mfi(*Rho, TilingIfNotGPU()); mfi.isValid(); ++mfi ) + { + + Array4<Real> const& Rho_arr = Rho->array(mfi); + + tilebox = mfi.tilebox(); + Box tb = convert(tilebox, IntVect::TheUnitVector()); + + // Lower corner of tile box physical domain + // Note that this is done before the tilebox.grow so that + // these do not include the guard cells. + const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, lev); + const Dim3 lo = lbound(tilebox); + const Real rmin = xyzmin[0]; + const int irmin = lo.x; + + // Rescale charge in r-z mode since the inverse volume factor was not + // included in the charge deposition. + amrex::ParallelFor(tb, Rho->nComp(), + [=] AMREX_GPU_DEVICE (int i, int j, int k, int icomp) + { + // Wrap the charge density deposited in the guard cells around + // to the cells above the axis. + // Rho is located on the boundary + if (rmin == 0. && 0 < i && i <= ngRho) { + Rho_arr(i,j,0,icomp) += Rho_arr(-i,j,0,icomp); + } + + // Apply the inverse volume scaling + const amrex::Real r = std::abs(rmin + (i - irmin)*dr); + if (r == 0.) { + // Verboncoeur JCP 164, 421-427 (2001) : corrected volume on axis + Rho_arr(i,j,0,icomp) /= (MathConst::pi*dr/3.); + } else { + Rho_arr(i,j,0,icomp) /= (2.*MathConst::pi*r); + } + }); + } +} +#endif diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H index 0440148eb..aac23f781 100644 --- a/Source/FortranInterface/WarpX_f.H +++ b/Source/FortranInterface/WarpX_f.H @@ -62,7 +62,7 @@ #define WRPX_PUSH_LEAPFROG warpx_push_leapfrog_2d #define WRPX_PUSH_LEAPFROG_POSITIONS warpx_push_leapfrog_positions_2d -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ #define WRPX_COMPUTE_DIVE warpx_compute_dive_rz #else #define WRPX_COMPUTE_DIVE warpx_compute_dive_2d @@ -75,22 +75,6 @@ extern "C" { #endif - // Charge deposition - void warpx_charge_deposition(amrex::Real* rho, - const long* np, const amrex::Real* xp, const amrex::Real* yp, const amrex::Real* zp, const amrex::Real* w, - const amrex::Real* q, const amrex::Real* xmin, const amrex::Real* ymin, const amrex::Real* zmin, - const amrex::Real* dx, const amrex::Real* dy, const amrex::Real* dz, - const long* nx, const long* ny, const long* nz, - const long* nxguard, const long* nyguard, const long* nzguard, - const long* nox, const long* noy,const long* noz, - const long* lvect, const long* charge_depo_algo); - - // Charge deposition finalize for RZ - void warpx_charge_deposition_rz_volume_scaling( - amrex::Real* rho, const long* rho_ng, const int* rho_ntot, - const amrex::Real* rmin, - const amrex::Real* dr); - // Current deposition void warpx_current_deposition( amrex::Real* jx, const long* jx_ng, const int* jx_ntot, @@ -106,34 +90,6 @@ extern "C" const long* nox, const long* noy,const long* noz, const int* l_nodal, const long* lvect, const long* current_depo_algo); - // Current deposition finalize for RZ - void warpx_current_deposition_rz_volume_scaling( - amrex::Real* jx, const long* jx_ng, const int* jx_ntot, - amrex::Real* jy, const long* jy_ng, const int* jy_ntot, - amrex::Real* jz, const long* jz_ng, const int* jz_ntot, - const amrex::Real* rmin, - const amrex::Real* dr); - - // Field gathering - - void warpx_geteb_energy_conserving(const long* np, - const amrex::Real* xp, const amrex::Real* yp, const amrex::Real* zp, - amrex::Real* exp, amrex::Real* eyp, amrex::Real* ezp, - amrex::Real* bxp, amrex::Real* byp, amrex::Real* bzp, - const int* ixyzmin, - const amrex::Real* xmin, const amrex::Real* ymin, const amrex::Real* zmin, - const amrex::Real* dx, const amrex::Real* dy, const amrex::Real* dz, - const long* nox, const long* noy, const long* noz, - const amrex::Real* exg, const int* exg_lo, const int* exg_hi, - const amrex::Real* eyg, const int* eyg_lo, const int* eyg_hi, - const amrex::Real* ezg, const int* ezg_lo, const int* ezg_hi, - const amrex::Real* bxg, const int* bxg_lo, const int* bxg_hi, - const amrex::Real* byg, const int* byg_lo, const int* byg_hi, - const amrex::Real* bzg, const int* bzg_lo, const int* bzg_hi, - const int* ll4symtry, const int* l_lower_order_in_v, - const int* l_nodal, const long* lvect, - const long* field_gathe_algo); - // Particle pusher (velocity and position) void warpx_particle_pusher(const long* np, @@ -342,7 +298,7 @@ extern "C" const BL_FORT_FAB_ARG_ANYD(ey), const BL_FORT_FAB_ARG_ANYD(ez), const amrex::Real* dx -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ ,const amrex::Real* rmin #endif ); diff --git a/Source/FortranInterface/WarpX_picsar.F90 b/Source/FortranInterface/WarpX_picsar.F90 index dc47245dd..34084d753 100644 --- a/Source/FortranInterface/WarpX_picsar.F90 +++ b/Source/FortranInterface/WarpX_picsar.F90 @@ -1,20 +1,15 @@ #if (AMREX_SPACEDIM == 3) -#define WRPX_PXR_GETEB_ENERGY_CONSERVING geteb3d_energy_conserving_generic #define WRPX_PXR_CURRENT_DEPOSITION depose_jxjyjz_generic #elif (AMREX_SPACEDIM == 2) -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ -#define WRPX_PXR_GETEB_ENERGY_CONSERVING geteb2drz_energy_conserving_generic #define WRPX_PXR_CURRENT_DEPOSITION depose_jrjtjz_generic_rz -#define WRPX_PXR_RZ_VOLUME_SCALING_RHO apply_rz_volume_scaling_rho -#define WRPX_PXR_RZ_VOLUME_SCALING_J apply_rz_volume_scaling_j #else -#define WRPX_PXR_GETEB_ENERGY_CONSERVING geteb2dxz_energy_conserving_generic #define WRPX_PXR_CURRENT_DEPOSITION depose_jxjyjz_generic_2d #endif @@ -56,227 +51,6 @@ contains ! _________________________________________________________________ !> !> @brief - !> Main subroutine for the field gathering process - !> - !> @param[in] np number of particles - !> @param[in] xp,yp,zp particle position arrays - !> @param[in] ex,ey,ez particle electric fields in each direction - !> @param[in] bx,by,bz particle magnetic fields in each direction - !> @param[in] ixyzmin tile grid minimum index - !> @param[in] xmin,ymin,zmin tile grid minimum position - !> @param[in] dx,dy,dz space discretization steps - !> @param[in] xyzmin grid minimum position - !> @param[in] dxyz space discretization steps - !> @param[in] nox,noy,noz interpolation order - !> @param[in] exg,eyg,ezg electric field grid arrays - !> @param[in] bxg,byg,bzg electric field grid arrays - !> @param[in] lvect vector length - !> - subroutine warpx_geteb_energy_conserving(np,xp,yp,zp, & - ex,ey,ez,bx,by,bz,ixyzmin,xmin,ymin,zmin,dx,dy,dz,nox,noy,noz, & - exg,exg_lo,exg_hi,eyg,eyg_lo,eyg_hi,ezg,ezg_lo,ezg_hi, & - bxg,bxg_lo,bxg_hi,byg,byg_lo,byg_hi,bzg,bzg_lo,bzg_hi, & - ll4symtry,l_lower_order_in_v, l_nodal,& - lvect,field_gathe_algo) & - bind(C, name="warpx_geteb_energy_conserving") - - integer, intent(in) :: exg_lo(AMREX_SPACEDIM), eyg_lo(AMREX_SPACEDIM), ezg_lo(AMREX_SPACEDIM), & - bxg_lo(AMREX_SPACEDIM), byg_lo(AMREX_SPACEDIM), bzg_lo(AMREX_SPACEDIM) - integer, intent(in) :: exg_hi(AMREX_SPACEDIM), eyg_hi(AMREX_SPACEDIM), ezg_hi(AMREX_SPACEDIM), & - bxg_hi(AMREX_SPACEDIM), byg_hi(AMREX_SPACEDIM), bzg_hi(AMREX_SPACEDIM) - integer, intent(in) :: ixyzmin(AMREX_SPACEDIM) - real(amrex_real), intent(in) :: xmin,ymin,zmin,dx,dy,dz - integer(c_long), intent(in) :: field_gathe_algo - integer(c_long), intent(in) :: np,nox,noy,noz - integer(c_int), intent(in) :: ll4symtry,l_lower_order_in_v, l_nodal - integer(c_long),intent(in) :: lvect - real(amrex_real), intent(in), dimension(np) :: xp,yp,zp - real(amrex_real), intent(out), dimension(np) :: ex,ey,ez,bx,by,bz - real(amrex_real),intent(in):: exg(*), eyg(*), ezg(*), bxg(*), byg(*), bzg(*) - logical(pxr_logical) :: pxr_ll4symtry, pxr_l_lower_order_in_v, pxr_l_nodal - - ! Compute the number of valid cells and guard cells - integer(c_long) :: exg_nvalid(AMREX_SPACEDIM), eyg_nvalid(AMREX_SPACEDIM), ezg_nvalid(AMREX_SPACEDIM), & - bxg_nvalid(AMREX_SPACEDIM), byg_nvalid(AMREX_SPACEDIM), bzg_nvalid(AMREX_SPACEDIM), & - exg_nguards(AMREX_SPACEDIM), eyg_nguards(AMREX_SPACEDIM), ezg_nguards(AMREX_SPACEDIM), & - bxg_nguards(AMREX_SPACEDIM), byg_nguards(AMREX_SPACEDIM), bzg_nguards(AMREX_SPACEDIM) - - pxr_ll4symtry = ll4symtry .eq. 1 - pxr_l_lower_order_in_v = l_lower_order_in_v .eq. 1 - pxr_l_nodal = l_nodal .eq. 1 - - exg_nguards = ixyzmin - exg_lo - eyg_nguards = ixyzmin - eyg_lo - ezg_nguards = ixyzmin - ezg_lo - bxg_nguards = ixyzmin - bxg_lo - byg_nguards = ixyzmin - byg_lo - bzg_nguards = ixyzmin - bzg_lo - exg_nvalid = exg_lo + exg_hi - 2_c_long*ixyzmin + 1_c_long - eyg_nvalid = eyg_lo + eyg_hi - 2_c_long*ixyzmin + 1_c_long - ezg_nvalid = ezg_lo + ezg_hi - 2_c_long*ixyzmin + 1_c_long - bxg_nvalid = bxg_lo + bxg_hi - 2_c_long*ixyzmin + 1_c_long - byg_nvalid = byg_lo + byg_hi - 2_c_long*ixyzmin + 1_c_long - bzg_nvalid = bzg_lo + bzg_hi - 2_c_long*ixyzmin + 1_c_long - - CALL WRPX_PXR_GETEB_ENERGY_CONSERVING(np,xp,yp,zp, & - ex,ey,ez,bx,by,bz,xmin,ymin,zmin,dx,dy,dz,nox,noy,noz, & - exg,exg_nguards,exg_nvalid,& - eyg,eyg_nguards,eyg_nvalid,& - ezg,ezg_nguards,ezg_nvalid,& - bxg,bxg_nguards,bxg_nvalid,& - byg,byg_nguards,byg_nvalid,& - bzg,bzg_nguards,bzg_nvalid,& - pxr_ll4symtry, pxr_l_lower_order_in_v, pxr_l_nodal, & - lvect, field_gathe_algo ) - - end subroutine warpx_geteb_energy_conserving - -! _________________________________________________________________ -!> -!> @brief -!> Main subroutine for the charge deposition -!> -!> @details -!> This subroutines enable to controle the interpolation order -!> via the parameters nox,noy,noz and the type of algorithm via -!> the parameter charge_depo_algo -! -!> @param[inout] rho charge array -!> @param[in] np number of particles -!> @param[in] xp,yp,zp particle position arrays -!> @param[in] w particle weight arrays -!> @param[in] q particle species charge -!> @param[in] xmin,ymin,zmin tile grid minimum position -!> @param[in] dx,dy,dz space discretization steps -!> @param[in] nx,ny,nz number of cells -!> @param[in] nxguard,nyguard,nzguard number of guard cells -!> @param[in] nox,noy,noz interpolation order -!> @param[in] lvect vector length -!> @param[in] charge_depo_algo algorithm choice for the charge deposition -!> -subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,& - nxguard,nyguard,nzguard,nox,noy,noz,lvect,charge_depo_algo) & - bind(C, name="warpx_charge_deposition") - - integer(c_long), intent(IN) :: np - integer(c_long), intent(IN) :: nx,ny,nz - integer(c_long), intent(IN) :: nxguard,nyguard,nzguard - integer(c_long), intent(IN) :: nox,noy,noz - real(amrex_real), intent(IN OUT) :: rho(*) - real(amrex_real), intent(IN) :: q - real(amrex_real), intent(IN) :: dx,dy,dz - real(amrex_real), intent(IN) :: xmin,ymin,zmin - real(amrex_real), intent(IN), dimension(np) :: xp,yp,zp,w - integer(c_long), intent(IN) :: lvect - integer(c_long), intent(IN) :: charge_depo_algo - - - ! Dimension 3 -#if (AMREX_SPACEDIM==3) - - SELECT CASE(charge_depo_algo) - - ! Scalar classical charge deposition subroutines - CASE(1) - IF ((nox.eq.1).and.(noy.eq.1).and.(noz.eq.1)) THEN - - CALL depose_rho_scalar_1_1_1(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,& - nxguard,nyguard,nzguard,lvect) - - ELSE IF ((nox.eq.2).and.(noy.eq.2).and.(noz.eq.2)) THEN - - CALL depose_rho_scalar_2_2_2(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,& - nxguard,nyguard,nzguard,lvect) - - ELSE IF ((nox.eq.3).and.(noy.eq.3).and.(noz.eq.3)) THEN - - CALL depose_rho_scalar_3_3_3(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,& - nxguard,nyguard,nzguard,lvect) - - ELSE - CALL pxr_depose_rho_n(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,& - nxguard,nyguard,nzguard,nox,noy,noz, & - .TRUE._c_long,.FALSE._c_long) - ENDIF - - ! Optimized subroutines - CASE DEFAULT - - IF ((nox.eq.1).and.(noy.eq.1).and.(noz.eq.1)) THEN - CALL depose_rho_vecHVv2_1_1_1(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,& - nxguard,nyguard,nzguard,lvect) - - ELSE IF ((nox.eq.2).and.(noy.eq.2).and.(noz.eq.2)) THEN - CALL depose_rho_vecHVv2_2_2_2(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,& - nxguard,nyguard,nzguard,lvect) - - ELSE - CALL pxr_depose_rho_n(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,nx,ny,nz,& - nxguard,nyguard,nzguard,nox,noy,noz, & - .TRUE._c_long,.FALSE._c_long) - ENDIF - END SELECT - - ! Dimension 2 -#elif (AMREX_SPACEDIM==2) - -#ifdef WARPX_RZ - logical(pxr_logical) :: l_2drz = .TRUE._c_long -#else - logical(pxr_logical) :: l_2drz = .FALSE._c_long -#endif - - CALL pxr_depose_rho_n_2dxz(rho,np,xp,yp,zp,w,q,xmin,zmin,dx,dz,nx,nz,& - nxguard,nzguard,nox,noz, & - .TRUE._c_long, .FALSE._c_long, l_2drz, 0_c_long) - -#endif - - end subroutine warpx_charge_deposition - - ! _________________________________________________________________ - !> - !> @brief - !> Applies the inverse volume scaling for RZ charge deposition - !> - !> @details - !> The scaling is done for both single mode (FDTD) and - !> multi mode (spectral) (todo) - ! - !> @param[inout] rho charge array - !> @param[in] rmin tile grid minimum radius - !> @param[in] dr radial space discretization steps - !> @param[in] nx,ny,nz number of cells - !> @param[in] nxguard,nyguard,nzguard number of guard cells - !> - subroutine warpx_charge_deposition_rz_volume_scaling(rho,rho_ng,rho_ntot,rmin,dr) & - bind(C, name="warpx_charge_deposition_rz_volume_scaling") - - integer, intent(in) :: rho_ntot(AMREX_SPACEDIM) - integer(c_long), intent(in) :: rho_ng - real(amrex_real), intent(IN OUT):: rho(*) - real(amrex_real), intent(IN) :: rmin, dr - -#ifdef WARPX_RZ - integer(c_long) :: type_rz_depose = 1 -#endif - - ! Compute the number of valid cells and guard cells - integer(c_long) :: rho_nvalid(AMREX_SPACEDIM), rho_nguards(AMREX_SPACEDIM) - rho_nvalid = rho_ntot - 2*rho_ng - rho_nguards = rho_ng - -#ifdef WARPX_RZ - CALL WRPX_PXR_RZ_VOLUME_SCALING_RHO( & - rho,rho_nguards,rho_nvalid, & - rmin,dr,type_rz_depose) -#endif - - end subroutine warpx_charge_deposition_rz_volume_scaling - - ! _________________________________________________________________ - !> - !> @brief !> Main subroutine for the current deposition !> !> @details @@ -355,165 +129,4 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n end subroutine warpx_current_deposition - ! _________________________________________________________________ - !> - !> @brief - !> Applies the inverse volume scaling for RZ current deposition - !> - !> @details - !> The scaling is done for single mode only - ! - !> @param[inout] jx,jy,jz current arrays - !> @param[in] jx_ntot,jy_ntot,jz_ntot vectors with total number of - !> cells (including guard cells) along each axis for each current - !> @param[in] jx_ng,jy_ng,jz_ng vectors with number of guard cells along each - !> axis for each current - !> @param[in] rmin tile grid minimum radius - !> @param[in] dr radial space discretization steps - !> - subroutine warpx_current_deposition_rz_volume_scaling( & - jx,jx_ng,jx_ntot,jy,jy_ng,jy_ntot,jz,jz_ng,jz_ntot, & - rmin,dr) & - bind(C, name="warpx_current_deposition_rz_volume_scaling") - - integer, intent(in) :: jx_ntot(AMREX_SPACEDIM), jy_ntot(AMREX_SPACEDIM), jz_ntot(AMREX_SPACEDIM) - integer(c_long), intent(in) :: jx_ng, jy_ng, jz_ng - real(amrex_real), intent(IN OUT):: jx(*), jy(*), jz(*) - real(amrex_real), intent(IN) :: rmin, dr - -#ifdef WARPX_RZ - integer(c_long) :: type_rz_depose = 1 -#endif - ! Compute the number of valid cells and guard cells - integer(c_long) :: jx_nvalid(AMREX_SPACEDIM), jy_nvalid(AMREX_SPACEDIM), jz_nvalid(AMREX_SPACEDIM), & - jx_nguards(AMREX_SPACEDIM), jy_nguards(AMREX_SPACEDIM), jz_nguards(AMREX_SPACEDIM) - jx_nvalid = jx_ntot - 2*jx_ng - jy_nvalid = jy_ntot - 2*jy_ng - jz_nvalid = jz_ntot - 2*jz_ng - jx_nguards = jx_ng - jy_nguards = jy_ng - jz_nguards = jz_ng - -#ifdef WARPX_RZ - CALL WRPX_PXR_RZ_VOLUME_SCALING_J( & - jx,jx_nguards,jx_nvalid, & - jy,jy_nguards,jy_nvalid, & - jz,jz_nguards,jz_nvalid, & - rmin,dr,type_rz_depose) -#endif - - end subroutine warpx_current_deposition_rz_volume_scaling - - ! _________________________________________________________________ - !> - !> @brief - !> Main subroutine for the particle pusher (velocity and position) - !> - !> @param[in] np number of super-particles - !> @param[in] xp,yp,zp particle position arrays - !> @param[in] uxp,uyp,uzp normalized momentum in each direction - !> @param[in] gaminv particle Lorentz factors - !> @param[in] ex,ey,ez particle electric fields in each direction - !> @param[in] bx,by,bz particle magnetic fields in each direction - !> @param[in] q charge - !> @param[in] m masse - !> @param[in] dt time step - !> @param[in] particle_pusher_algo Particle pusher algorithm - subroutine warpx_particle_pusher(np,xp,yp,zp,uxp,uyp,uzp, & - gaminv,& - ex,ey,ez,bx,by,bz,q,m,dt, & - particle_pusher_algo) & - bind(C, name="warpx_particle_pusher") - - INTEGER(c_long), INTENT(IN) :: np - REAL(amrex_real),INTENT(INOUT) :: gaminv(np) - REAL(amrex_real),INTENT(INOUT) :: xp(np),yp(np),zp(np) - REAL(amrex_real),INTENT(INOUT) :: uxp(np),uyp(np),uzp(np) - REAL(amrex_real),INTENT(IN) :: ex(np),ey(np),ez(np) - REAL(amrex_real),INTENT(IN) :: bx(np),by(np),bz(np) - REAL(amrex_real),INTENT(IN) :: q,m,dt - INTEGER(c_long), INTENT(IN) :: particle_pusher_algo - - SELECT CASE (particle_pusher_algo) - - !! Vay pusher -- Full push - CASE (1_c_long) - CALL pxr_set_gamma(np,uxp,uyp,uzp,gaminv) - - CALL pxr_ebcancelpush3d(np,uxp,uyp,uzp,gaminv, & - ex,ey,ez, & - bx,by,bz,q,m,dt,0_c_long) - CASE DEFAULT - - ! Momentum pusher in a single loop - CALL pxr_boris_push_u_3d(np,uxp,uyp,uzp,& - gaminv, & - ex,ey,ez, & - bx,by,bz, & - q,m,dt) - - END SELECT - - !!!! --- push particle species positions a time step -#if (AMREX_SPACEDIM == 3) || (defined WARPX_RZ) - CALL pxr_pushxyz(np,xp,yp,zp,uxp,uyp,uzp,gaminv,dt) -#elif (AMREX_SPACEDIM == 2) - CALL pxr_pushxz(np,xp,zp,uxp,uzp,gaminv,dt) -#endif - - end subroutine warpx_particle_pusher - - - ! _________________________________________________________________ - !> - !> @brief - !> Main subroutine for the particle pusher (velocity) - !> - !> @param[in] np number of super-particles - !> @param[in] xp,yp,zp particle position arrays - !> @param[in] uxp,uyp,uzp normalized momentum in each direction - !> @param[in] gaminv particle Lorentz factors - !> @param[in] ex,ey,ez particle electric fields in each direction - !> @param[in] bx,by,bz particle magnetic fields in each direction - !> @param[in] q charge - !> @param[in] m masse - !> @param[in] dt time step - !> @param[in] particle_pusher_algo Particle pusher algorithm - subroutine warpx_particle_pusher_momenta(np,xp,yp,zp,uxp,uyp,uzp, & - gaminv,& - ex,ey,ez,bx,by,bz,q,m,dt, & - particle_pusher_algo) & - bind(C, name="warpx_particle_pusher_momenta") - - INTEGER(c_long), INTENT(IN) :: np - REAL(amrex_real),INTENT(INOUT) :: gaminv(np) - REAL(amrex_real),INTENT(IN) :: xp(np),yp(np),zp(np) - REAL(amrex_real),INTENT(INOUT) :: uxp(np),uyp(np),uzp(np) - REAL(amrex_real),INTENT(IN) :: ex(np),ey(np),ez(np) - REAL(amrex_real),INTENT(IN) :: bx(np),by(np),bz(np) - REAL(amrex_real),INTENT(IN) :: q,m,dt - INTEGER(c_long), INTENT(IN) :: particle_pusher_algo - - SELECT CASE (particle_pusher_algo) - - !! Vay pusher -- Full push - CASE (1_c_long) - CALL pxr_set_gamma(np,uxp,uyp,uzp,gaminv) - - CALL pxr_ebcancelpush3d(np,uxp,uyp,uzp,gaminv, & - ex,ey,ez, & - bx,by,bz,q,m,dt,0_c_long) - CASE DEFAULT - - ! Momentum pusher in a single loop - CALL pxr_boris_push_u_3d(np,uxp,uyp,uzp,& - gaminv, & - ex,ey,ez, & - bx,by,bz, & - q,m,dt) - - END SELECT - - end subroutine warpx_particle_pusher_momenta - end module warpx_to_pxr_module diff --git a/Source/Initialization/CustomDensityProb.H b/Source/Initialization/CustomDensityProb.H new file mode 100644 index 000000000..b00830e6c --- /dev/null +++ b/Source/Initialization/CustomDensityProb.H @@ -0,0 +1,49 @@ +#ifndef CUSTOM_DENSITY_PROB_H_ +#define CUSTOM_DENSITY_PROB_H_ + +#include <AMReX_ParmParse.H> +#include <AMReX_Arena.H> +#include <AMReX_Gpu.H> +#include <AMReX_Dim3.H> + +// An example of Custom Density Profile + +// struct whose getDensity returns density at a given position computed from +// a custom function, with runtime input parameters. +struct InjectorDensityCustom +{ + InjectorDensityCustom (std::string const& species_name) + : p(nullptr) + { + // Read parameters for custom density profile from file, and + // store them in managed memory. + amrex::ParmParse pp(species_name); + std::vector<amrex::Real> v; + pp.getarr("custom_profile_params", v); + p = static_cast<amrex::Real*> + (amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real)*v.size())); + for (int i = 0; i < static_cast<int>(v.size()); ++i) { + p[i] = v[i]; + } + } + + // Return density at given position, using user-defined parameters + // stored in p. + AMREX_GPU_HOST_DEVICE + amrex::Real + getDensity (amrex::Real, amrex::Real, amrex::Real) const noexcept + { + return p[0]; + } + + // Note that we are not allowed to have non-trivial destructor. + // So we rely on clear() to free memory. + void clear () { + amrex::The_Managed_Arena()->free(p); + } + +private: + amrex::Real* p; +}; + +#endif diff --git a/Source/Initialization/CustomDensityProb.cpp b/Source/Initialization/CustomDensityProb.cpp deleted file mode 100644 index 3efcb13c5..000000000 --- a/Source/Initialization/CustomDensityProb.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include <PlasmaInjector.H> - -#include <iostream> - -using namespace amrex; - -/// -/// This "custom" density profile just does constant -/// -Real CustomDensityProfile::getDensity(Real x, Real y, Real z) const { - return params[0]; -} diff --git a/Source/Initialization/CustomMomentumProb.H b/Source/Initialization/CustomMomentumProb.H new file mode 100644 index 000000000..f8bc29a05 --- /dev/null +++ b/Source/Initialization/CustomMomentumProb.H @@ -0,0 +1,30 @@ +#ifndef CUSTOM_MOMENTUM_PROB_H +#define CUSTOM_MOMENTUM_PROB_H + +#include <AMReX_ParmParse.H> +#include <AMReX_Gpu.H> +#include <AMReX_Arena.H> +#include <AMReX_Dim3.H> + +// An example of Custom Momentum Profile + +// struct whose getDensity returns momentum at a given position computed from +// a custom function. +struct InjectorMomentumCustom +{ + InjectorMomentumCustom (std::string const& /*a_species_name*/) {} + + // Return momentum at given position (illustration: momentum=0). + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getMomentum (amrex::Real, amrex::Real, amrex::Real) const noexcept + { + return {0., 0., 0.}; + } + + // Note that we are not allowed to have non-trivial destructor. + // So we rely on clear() to free memory if needed. + void clear () { } +}; + +#endif diff --git a/Source/Initialization/CustomMomentumProb.cpp b/Source/Initialization/CustomMomentumProb.cpp deleted file mode 100644 index fa21252d0..000000000 --- a/Source/Initialization/CustomMomentumProb.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include <PlasmaInjector.H> - -#include <iostream> - -using namespace amrex; - -/// -/// This "custom" momentum distribution just does 0 momentum -/// -void CustomMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) { - u[0] = 0; - u[1] = 0; - u[2] = 0; -} diff --git a/Source/Initialization/InjectorDensity.H b/Source/Initialization/InjectorDensity.H new file mode 100644 index 000000000..b7f5c26eb --- /dev/null +++ b/Source/Initialization/InjectorDensity.H @@ -0,0 +1,202 @@ +#ifndef INJECTOR_DENSITY_H_ +#define INJECTOR_DENSITY_H_ + +#include <AMReX_Gpu.H> +#include <AMReX_Dim3.H> +#include <GpuParser.H> +#include <CustomDensityProb.H> +#include <WarpXConst.H> + +// struct whose getDensity returns constant density. +struct InjectorDensityConstant +{ + InjectorDensityConstant (amrex::Real a_rho) noexcept : m_rho(a_rho) {} + + AMREX_GPU_HOST_DEVICE + amrex::Real + getDensity (amrex::Real, amrex::Real, amrex::Real) const noexcept + { + return m_rho; + } + +private: + amrex::Real m_rho; +}; + +// struct whose getDensity returns local density computed from parser. +struct InjectorDensityParser +{ + InjectorDensityParser (WarpXParser const& a_parser) noexcept + : m_parser(a_parser) {} + + AMREX_GPU_HOST_DEVICE + amrex::Real + getDensity (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + return m_parser(x,y,z); + } + + // InjectorDensityParser constructs this GpuParser from WarpXParser. + GpuParser m_parser; +}; + +// struct whose getDensity returns local density computed from predefined profile. +struct InjectorDensityPredefined +{ + InjectorDensityPredefined (std::string const& a_species_name) noexcept; + + void clear (); + + AMREX_GPU_HOST_DEVICE + amrex::Real + getDensity (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + // Choices for profile are: + // - parabolic_channel + switch (profile) + { + case Profile::parabolic_channel: + { + amrex::Real z_start = p[0]; + amrex::Real ramp_up = p[1]; + amrex::Real plateau = p[2]; + amrex::Real ramp_down = p[3]; + amrex::Real rc = p[4]; + amrex::Real n0 = p[5]; + amrex::Real n; + amrex::Real kp = PhysConst::q_e/PhysConst::c + *std::sqrt( n0/(PhysConst::m_e*PhysConst::ep0) ); + + if ((z-z_start)>=0 and + (z-z_start)<ramp_up ) { + n = (z-z_start)/ramp_up; + } else if ((z-z_start)>=ramp_up and + (z-z_start)< ramp_up+plateau ) { + n = 1.; + } else if ((z-z_start)>=ramp_up+plateau and + (z-z_start)< ramp_up+plateau+ramp_down) { + n = 1.-((z-z_start)-ramp_up-plateau)/ramp_down; + } else { + n = 0.; + } + n *= n0*(1.+4.*(x*x+y*y)/(kp*kp*rc*rc*rc*rc)); + return n; + } + default: + amrex::Abort("InjectorDensityPredefined: how did we get here?"); + return 0.0; + } + } + +private: + enum struct Profile { null, parabolic_channel }; + Profile profile; + amrex::Real* p; +}; + +// Base struct for density injector. +// InjectorDensity contains a union (called Object) that holds any one +// instance of: +// - InjectorDensityConstant : to generate constant density; +// - InjectorDensityParser : to generate density from parser; +// - InjectorDensityCustom : to generate density from custom profile; +// - InjectorDensityPredefined: to generate density from predefined profile; +// The choice is made at runtime, depending in the constructor called. +// This mimics virtual functions, except the struct is stored in managed memory +// and member functions are made __host__ __device__ to run on CPU and GPU. +// This struct inherits from amrex::Gpu::Managed to provide new and delete +// operators in managed memory when running on GPU. Nothing special on CPU. +struct InjectorDensity + : public amrex::Gpu::Managed +{ + // This constructor stores a InjectorDensityConstant in union object. + InjectorDensity (InjectorDensityConstant* t, amrex::Real a_rho) + : type(Type::constant), + object(t,a_rho) + { } + + // This constructor stores a InjectorDensityParser in union object. + InjectorDensity (InjectorDensityParser* t, WarpXParser const& a_parser) + : type(Type::parser), + object(t,a_parser) + { } + + // This constructor stores a InjectorDensityCustom in union object. + InjectorDensity (InjectorDensityCustom* t, std::string const& a_species_name) + : type(Type::custom), + object(t,a_species_name) + { } + + // This constructor stores a InjectorDensityPredefined in union object. + InjectorDensity (InjectorDensityPredefined* t, std::string const& a_species_name) + : type(Type::predefined), + object(t,a_species_name) + { } + + // Explicitly prevent the compiler from generating copy constructors + // and copy assignment operators. + InjectorDensity (InjectorDensity const&) = delete; + InjectorDensity (InjectorDensity&&) = delete; + void operator= (InjectorDensity const&) = delete; + void operator= (InjectorDensity &&) = delete; + + ~InjectorDensity (); + + std::size_t sharedMemoryNeeded () const noexcept; + + // call getDensity from the object stored in the union + // (the union is called Object, and the instance is called object). + AMREX_GPU_HOST_DEVICE + amrex::Real + getDensity (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + switch (type) + { + case Type::parser: + { + return object.parser.getDensity(x,y,z); + } + case Type::constant: + { + return object.constant.getDensity(x,y,z); + } + case Type::custom: + { + return object.custom.getDensity(x,y,z); + } + case Type::predefined: + { + return object.predefined.getDensity(x,y,z); + } + default: + { + amrex::Abort("InjectorDensity: unknown type"); + return 0.0; + } + } + } + +private: + enum struct Type { constant, custom, predefined, parser }; + Type type; + + // An instance of union Object constructs and stores any one of + // the objects declared (constant or parser or custom or predefined). + union Object { + Object (InjectorDensityConstant*, amrex::Real a_rho) noexcept + : constant(a_rho) {} + Object (InjectorDensityParser*, WarpXParser const& a_parser) noexcept + : parser(a_parser) {} + Object (InjectorDensityCustom*, std::string const& a_species_name) noexcept + : custom(a_species_name) {} + Object (InjectorDensityPredefined*, std::string const& a_species_name) noexcept + : predefined(a_species_name) {} + InjectorDensityConstant constant; + InjectorDensityParser parser; + InjectorDensityCustom custom; + InjectorDensityPredefined predefined; + }; + Object object; +}; + +#endif diff --git a/Source/Initialization/InjectorDensity.cpp b/Source/Initialization/InjectorDensity.cpp new file mode 100644 index 000000000..54df4b14d --- /dev/null +++ b/Source/Initialization/InjectorDensity.cpp @@ -0,0 +1,77 @@ +#include <PlasmaInjector.H> + +using namespace amrex; + +InjectorDensity::~InjectorDensity () +{ + switch (type) + { + case Type::parser: + { + object.parser.m_parser.clear(); + break; + } + case Type::custom: + { + object.custom.clear(); + break; + } + case Type::predefined: + { + object.predefined.clear(); + break; + } + } +} + +// Compute the amount of memory needed in GPU Shared Memory. +std::size_t +InjectorDensity::sharedMemoryNeeded () const noexcept +{ + switch (type) + { + case Type::parser: + { + // For parser injector, the 3D position of each particle + // is stored in shared memory. + return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 3; + } + default: + return 0; + } +} + +InjectorDensityPredefined::InjectorDensityPredefined ( + std::string const& a_species_name) noexcept + : profile(Profile::null) +{ + ParmParse pp(a_species_name); + + std::vector<amrex::Real> v; + // Read parameters for the predefined plasma profile, + // and store them in managed memory + pp.getarr("predefined_profile_params", v); + p = static_cast<amrex::Real*> + (amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real)*v.size())); + for (int i = 0; i < static_cast<int>(v.size()); ++i) { + p[i] = v[i]; + } + + // Parse predefined profile name, and update member variable profile. + std::string which_profile_s; + pp.query("predefined_profile_name", which_profile_s); + std::transform(which_profile_s.begin(), which_profile_s.end(), + which_profile_s.begin(), ::tolower); + if (which_profile_s == "parabolic_channel"){ + profile = Profile::parabolic_channel; + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(v.size() > 5, + "InjectorDensityPredefined::parabolic_channel: not enough parameters"); + } +} + +// Note that we are not allowed to have non-trivial destructor. +// So we rely on clear() to free memory. +void InjectorDensityPredefined::clear () +{ + amrex::The_Managed_Arena()->free(p); +} diff --git a/Source/Initialization/InjectorMomentum.H b/Source/Initialization/InjectorMomentum.H new file mode 100644 index 000000000..399ee7759 --- /dev/null +++ b/Source/Initialization/InjectorMomentum.H @@ -0,0 +1,223 @@ +#ifndef INJECTOR_MOMENTUM_H_ +#define INJECTOR_MOMENTUM_H_ + +#include <AMReX_Gpu.H> +#include <AMReX_Dim3.H> +#include <GpuParser.H> +#include <CustomMomentumProb.H> + +// struct whose getMomentum returns constant momentum. +struct InjectorMomentumConstant +{ + InjectorMomentumConstant (amrex::Real a_ux, amrex::Real a_uy, amrex::Real a_uz) noexcept + : m_ux(a_ux), m_uy(a_uy), m_uz(a_uz) {} + + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getMomentum (amrex::Real, amrex::Real, amrex::Real) const noexcept + { + return amrex::XDim3{m_ux,m_uy,m_uz}; + } +private: + amrex::Real m_ux, m_uy, m_uz; +}; + +// struct whose getMomentum returns momentum for 1 particle, from random +// gaussian distribution. +struct InjectorMomentumGaussian +{ + InjectorMomentumGaussian (amrex::Real a_ux_m, amrex::Real a_uy_m, + amrex::Real a_uz_m, amrex::Real a_ux_th, + amrex::Real a_uy_th, amrex::Real a_uz_th) noexcept + : m_ux_m(a_ux_m), m_uy_m(a_uy_m), m_uz_m(a_uz_m), + m_ux_th(a_ux_th), m_uy_th(a_uy_th), m_uz_th(a_uz_th) + {} + + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + return amrex::XDim3{amrex::RandomNormal(m_ux_m, m_ux_th), + amrex::RandomNormal(m_uy_m, m_uy_th), + amrex::RandomNormal(m_uz_m, m_uz_th)}; + } +private: + amrex::Real m_ux_m, m_uy_m, m_uz_m; + amrex::Real m_ux_th, m_uy_th, m_uz_th; +}; + +// struct whose getMomentum returns momentum for 1 particle, for +// radial expansion +struct InjectorMomentumRadialExpansion +{ + InjectorMomentumRadialExpansion (amrex::Real a_u_over_r) noexcept + : u_over_r(a_u_over_r) + {} + + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + return {x*u_over_r, y*u_over_r, z*u_over_r}; + } + +private: + amrex::Real u_over_r; +}; + +// struct whose getMomentumm returns local momentum computed from parser. +struct InjectorMomentumParser +{ + InjectorMomentumParser (WarpXParser const& a_ux_parser, + WarpXParser const& a_uy_parser, + WarpXParser const& a_uz_parser) noexcept + : m_ux_parser(a_ux_parser), m_uy_parser(a_uy_parser), + m_uz_parser(a_uz_parser) {} + + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + return amrex::XDim3{m_ux_parser(x,y,z),m_uy_parser(x,y,z),m_uz_parser(x,y,z)}; + } + + GpuParser m_ux_parser, m_uy_parser, m_uz_parser; +}; + +// Base struct for momentum injector. +// InjectorMomentum contains a union (called Object) that holds any one +// instance of: +// - InjectorMomentumConstant : to generate constant density; +// - InjectorMomentumGaussian : to generate gaussian distribution; +// - InjectorMomentumRadialExpansion: to generate radial expansion; +// - InjectorMomentumParser : to generate momentum from parser; +// The choice is made at runtime, depending in the constructor called. +// This mimics virtual functions, except the struct is stored in managed memory +// and member functions are made __host__ __device__ to run on CPU and GPU. +// This struct inherits from amrex::Gpu::Managed to provide new and delete +// operators in managed memory when running on GPU. Nothing special on CPU. +struct InjectorMomentum + : public amrex::Gpu::Managed +{ + // This constructor stores a InjectorMomentumConstant in union object. + InjectorMomentum (InjectorMomentumConstant* t, + amrex::Real a_ux, amrex::Real a_uy, amrex::Real a_uz) + : type(Type::constant), + object(t, a_ux, a_uy, a_uz) + { } + + // This constructor stores a InjectorMomentumParser in union object. + InjectorMomentum (InjectorMomentumParser* t, + WarpXParser const& a_ux_parser, + WarpXParser const& a_uy_parser, + WarpXParser const& a_uz_parser) + : type(Type::parser), + object(t, a_ux_parser, a_uy_parser, a_uz_parser) + { } + + // This constructor stores a InjectorMomentumGaussian in union object. + InjectorMomentum (InjectorMomentumGaussian* t, + amrex::Real a_ux_m, amrex::Real a_uy_m, amrex::Real a_uz_m, + amrex::Real a_ux_th, amrex::Real a_uy_th, amrex::Real a_uz_th) + : type(Type::gaussian), + object(t,a_ux_m,a_uy_m,a_uz_m,a_ux_th,a_uy_th,a_uz_th) + { } + + // This constructor stores a InjectorMomentumCustom in union object. + InjectorMomentum (InjectorMomentumCustom* t, + std::string const& a_species_name) + : type(Type::custom), + object(t, a_species_name) + { } + + // This constructor stores a InjectorMomentumRadialExpansion in union object. + InjectorMomentum (InjectorMomentumRadialExpansion* t, + amrex::Real u_over_r) + : type(Type::radial_expansion), + object(t, u_over_r) + { } + + // Explicitly prevent the compiler from generating copy constructors + // and copy assignment operators. + InjectorMomentum (InjectorMomentum const&) = delete; + InjectorMomentum (InjectorMomentum&&) = delete; + void operator= (InjectorMomentum const&) = delete; + void operator= (InjectorMomentum &&) = delete; + + ~InjectorMomentum (); + + std::size_t sharedMemoryNeeded () const noexcept; + + // call getMomentum from the object stored in the union + // (the union is called Object, and the instance is called object). + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + switch (type) + { + case Type::parser: + { + return object.parser.getMomentum(x,y,z); + } + case Type::gaussian: + { + return object.gaussian.getMomentum(x,y,z); + } + case Type::constant: + { + return object.constant.getMomentum(x,y,z); + } + case Type::radial_expansion: + { + return object.radial_expansion.getMomentum(x,y,z); + } + case Type::custom: + { + return object.custom.getMomentum(x,y,z); + } + default: + { + amrex::Abort("InjectorMomentum: unknown type"); + return {0.0,0.0,0.0}; + } + } + } + +private: + enum struct Type { constant, custom, gaussian, radial_expansion, parser }; + Type type; + + // An instance of union Object constructs and stores any one of + // the objects declared (constant or custom or gaussian or + // radial_expansion or parser). + union Object { + Object (InjectorMomentumConstant*, + amrex::Real a_ux, amrex::Real a_uy, amrex::Real a_uz) noexcept + : constant(a_ux,a_uy,a_uz) {} + Object (InjectorMomentumCustom*, + std::string const& a_species_name) noexcept + : custom(a_species_name) {} + Object (InjectorMomentumGaussian*, + amrex::Real a_ux_m, amrex::Real a_uy_m, + amrex::Real a_uz_m, amrex::Real a_ux_th, + amrex::Real a_uy_th, amrex::Real a_uz_th) noexcept + : gaussian(a_ux_m,a_uy_m,a_uz_m,a_ux_th,a_uy_th,a_uz_th) {} + Object (InjectorMomentumRadialExpansion*, + amrex::Real u_over_r) noexcept + : radial_expansion(u_over_r) {} + Object (InjectorMomentumParser*, + WarpXParser const& a_ux_parser, + WarpXParser const& a_uy_parser, + WarpXParser const& a_uz_parser) noexcept + : parser(a_ux_parser, a_uy_parser, a_uz_parser) {} + InjectorMomentumConstant constant; + InjectorMomentumCustom custom; + InjectorMomentumGaussian gaussian; + InjectorMomentumRadialExpansion radial_expansion; + InjectorMomentumParser parser; + }; + Object object; +}; + +#endif diff --git a/Source/Initialization/InjectorMomentum.cpp b/Source/Initialization/InjectorMomentum.cpp new file mode 100644 index 000000000..a197b5bef --- /dev/null +++ b/Source/Initialization/InjectorMomentum.cpp @@ -0,0 +1,40 @@ +#include <PlasmaInjector.H> + +using namespace amrex; + +InjectorMomentum::~InjectorMomentum () +{ + switch (type) + { + case Type::parser: + { + object.parser.m_ux_parser.clear(); + object.parser.m_uy_parser.clear(); + object.parser.m_uz_parser.clear(); + break; + } + case Type::custom: + { + object.custom.clear(); + break; + } + } +} + +// Compute the amount of memory needed in GPU Shared Memory. +std::size_t +InjectorMomentum::sharedMemoryNeeded () const noexcept +{ + switch (type) + { + case Type::parser: + { + // For parser injector, the 3D position of each particle + // is stored in shared memory. + return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 3; + } + default: + return 0; + } +} + diff --git a/Source/Initialization/InjectorPosition.H b/Source/Initialization/InjectorPosition.H new file mode 100644 index 000000000..19bb092dd --- /dev/null +++ b/Source/Initialization/InjectorPosition.H @@ -0,0 +1,146 @@ +#ifndef INJECTOR_POSITION_H_ +#define INJECTOR_POSITION_H_ + +#include <AMReX_Gpu.H> +#include <AMReX_Dim3.H> +#include <AMReX_Utility.H> + +// struct whose getPositionUnitBox returns x, y and z for a particle with +// random distribution inside a unit cell. +struct InjectorPositionRandom +{ + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getPositionUnitBox (int i_part, int ref_fac=1) const noexcept + { + return amrex::XDim3{amrex::Random(), amrex::Random(), amrex::Random()}; + } +}; + +// struct whose getPositionUnitBox returns x, y and z for a particle with +// regular distribution inside a unit cell. +struct InjectorPositionRegular +{ + InjectorPositionRegular (amrex::Dim3 const& a_ppc) noexcept : ppc(a_ppc) {} + + // i_part: particle number within the cell, required to evenly space + // particles within the cell. + // ref_fac: the number of particles evenly-spaced within a cell + // is a_ppc*(ref_fac**AMREX_SPACEDIM). + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getPositionUnitBox (int i_part, int ref_fac=1) const noexcept + { + int nx = ref_fac*ppc.x; + int ny = ref_fac*ppc.y; +#if (AMREX_SPACEDIM == 3) + int nz = ref_fac*ppc.z; +#else + int nz = 1; +#endif + int ix_part = i_part/(ny*nz); // written this way backward compatibility + int iz_part = (i_part-ix_part*(ny*nz)) / ny; + int iy_part = (i_part-ix_part*(ny*nz)) - ny*iz_part; + return amrex::XDim3{(0.5+ix_part)/nx, (0.5+iy_part)/ny, (0.5+iz_part) / nz}; + } +private: + amrex::Dim3 ppc; +}; + +// Base struct for position injector. +// InjectorPosition contains a union (called Object) that holds any one +// instance of: +// - InjectorPositionRandom : to generate random distribution; +// - InjectorPositionRegular: to generate regular distribution. +// The choice is made at runtime, depending in the constructor called. +// This mimics virtual functions, except the struct is stored in managed memory +// and member functions are made __host__ __device__ to run on CPU and GPU. +// This struct inherits from amrex::Gpu::Managed to provide new and delete +// operators in managed memory when running on GPU. Nothing special on CPU. +struct InjectorPosition + : public amrex::Gpu::Managed +{ + // This constructor stores a InjectorPositionRandom in union object. + InjectorPosition (InjectorPositionRandom* t, + amrex::Real a_xmin, amrex::Real a_xmax, + amrex::Real a_ymin, amrex::Real a_ymax, + amrex::Real a_zmin, amrex::Real a_zmax) + : type(Type::random), + object(t), + xmin(a_xmin), xmax(a_xmax), + ymin(a_ymin), ymax(a_ymax), + zmin(a_zmin), zmax(a_zmax) + { } + + // This constructor stores a InjectorPositionRegular in union object. + InjectorPosition (InjectorPositionRegular* t, + amrex::Real a_xmin, amrex::Real a_xmax, + amrex::Real a_ymin, amrex::Real a_ymax, + amrex::Real a_zmin, amrex::Real a_zmax, + amrex::Dim3 const& a_ppc) + : type(Type::regular), + object(t, a_ppc), + xmin(a_xmin), xmax(a_xmax), + ymin(a_ymin), ymax(a_ymax), + zmin(a_zmin), zmax(a_zmax) + { } + + // Explicitly prevent the compiler from generating copy constructors + // and copy assignment operators. + InjectorPosition (InjectorPosition const&) = delete; + InjectorPosition (InjectorPosition&&) = delete; + void operator= (InjectorPosition const&) = delete; + void operator= (InjectorPosition &&) = delete; + + std::size_t sharedMemoryNeeded () const noexcept { return 0; } + + // call getPositionUnitBox from the object stored in the union + // (the union is called Object, and the instance is called object). + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getPositionUnitBox (int i_part, int ref_fac=1) const noexcept + { + switch (type) + { + case Type::regular: + { + return object.regular.getPositionUnitBox(i_part, ref_fac); + } + default: + { + return object.random.getPositionUnitBox(i_part, ref_fac); + } + }; + } + + // bool: whether position specified is within bounds. + AMREX_GPU_HOST_DEVICE + bool + insideBounds (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + return (x < xmax and x >= xmin and + y < ymax and y >= ymin and + z < zmax and z >= zmin); + } + +private: + enum struct Type { random, regular }; + Type type; + + // An instance of union Object constructs and stores any one of + // the objects declared (random or regular). + union Object { + Object (InjectorPositionRandom*) noexcept : random() {} + Object (InjectorPositionRegular*, amrex::Dim3 const& a_ppc) noexcept + : regular(a_ppc) {} + InjectorPositionRandom random; + InjectorPositionRegular regular; + }; + Object object; + + amrex::Real xmin, xmax; + amrex::Real ymin, ymax; + amrex::Real zmin, zmax; +}; + +#endif diff --git a/Source/Initialization/Make.package b/Source/Initialization/Make.package index edcf402c9..2c6458b6d 100644 --- a/Source/Initialization/Make.package +++ b/Source/Initialization/Make.package @@ -1,9 +1,18 @@ -CEXE_sources += CustomDensityProb.cpp -CEXE_sources += PlasmaProfiles.cpp CEXE_sources += WarpXInitData.cpp -CEXE_sources += CustomMomentumProb.cpp + CEXE_sources += PlasmaInjector.cpp CEXE_headers += PlasmaInjector.H +CEXE_headers += InjectorPosition.H + +CEXE_headers += InjectorDensity.H +CEXE_sources += InjectorDensity.cpp + +CEXE_headers += InjectorMomentum.H +CEXE_sources += InjectorMomentum.cpp + +CEXE_headers += CustomDensityProb.H +CEXE_headers += CustomMomentumProb.H + INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Initialization VPATH_LOCATIONS += $(WARPX_HOME)/Source/Initialization diff --git a/Source/Initialization/PlasmaInjector.H b/Source/Initialization/PlasmaInjector.H index f998e217e..f7e86bff5 100644 --- a/Source/Initialization/PlasmaInjector.H +++ b/Source/Initialization/PlasmaInjector.H @@ -1,250 +1,16 @@ #ifndef PLASMA_INJECTOR_H_ #define PLASMA_INJECTOR_H_ -#include <array> +#include <InjectorPosition.H> +#include <InjectorDensity.H> +#include <InjectorMomentum.H> -#include "AMReX_REAL.H" +#include <array> #include <AMReX_Vector.H> #include <WarpXConst.H> #include <WarpXParser.H> -#include "AMReX_ParmParse.H" -#include "AMReX_Utility.H" - -enum class predefined_profile_flag { Null, parabolic_channel }; - -/// -/// PlasmaDensityProfile describes how the charge density -/// is set in particle initialization. Subclasses must define a -/// getDensity function that describes the charge density as a -/// function of x, y, and z. -/// -class PlasmaDensityProfile -{ -public: - virtual ~PlasmaDensityProfile() {}; - virtual amrex::Real getDensity(amrex::Real x, - amrex::Real y, - amrex::Real z) const = 0; -protected: - std::string _species_name; -}; - -/// -/// This describes a constant density distribution. -/// -class ConstantDensityProfile : public PlasmaDensityProfile -{ -public: - ConstantDensityProfile(amrex::Real _density); - virtual amrex::Real getDensity(amrex::Real x, - amrex::Real y, - amrex::Real z) const override; - -private: - amrex::Real _density; -}; - -/// -/// This describes a custom density distribution. Users can supply -/// in their problem directory. -/// -/// -class CustomDensityProfile : public PlasmaDensityProfile -{ -public: - CustomDensityProfile(const std::string& species_name); - virtual amrex::Real getDensity(amrex::Real x, - amrex::Real y, - amrex::Real z) const override; -private: - amrex::Vector<amrex::Real> params; -}; - -/// -/// This describes predefined density distributions. -/// -class PredefinedDensityProfile : public PlasmaDensityProfile -{ -public: - PredefinedDensityProfile(const std::string& species_name); - virtual amrex::Real getDensity(amrex::Real x, - amrex::Real y, - amrex::Real z) const override; - amrex::Real ParabolicChannel(amrex::Real x, - amrex::Real y, - amrex::Real z) const; -private: - predefined_profile_flag which_profile = predefined_profile_flag::Null; - amrex::Vector<amrex::Real> params; -}; - -/// -/// This describes a density function parsed in the input file. -/// -class ParseDensityProfile : public PlasmaDensityProfile -{ -public: - ParseDensityProfile(const std::string _parse_density_function); - virtual amrex::Real getDensity(amrex::Real x, - amrex::Real y, - amrex::Real z) const override; -private: - std::string _parse_density_function; - WarpXParser parser_density; -}; - -/// -/// PlasmaMomentumDistribution describes how the particle momenta -/// are set. Subclasses must define a getMomentum method that fills -/// a u with the 3 components of the particle momentum -/// -class PlasmaMomentumDistribution -{ -public: - using vec3 = std::array<amrex::Real, 3>; - virtual ~PlasmaMomentumDistribution() {}; - virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) = 0; -}; - -/// -/// This is a constant momentum distribution - all particles will -/// have the same ux, uy, and uz -/// -class ConstantMomentumDistribution : public PlasmaMomentumDistribution -{ -public: - ConstantMomentumDistribution(amrex::Real ux, - amrex::Real uy, - amrex::Real uz); - virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override; - -private: - amrex::Real _ux; - amrex::Real _uy; - amrex::Real _uz; -}; - -/// -/// This describes a custom momentum distribution. Users can supply -/// in their problem directory. -/// -/// -class CustomMomentumDistribution : public PlasmaMomentumDistribution -{ -public: - CustomMomentumDistribution(const std::string& species_name); - virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override; - -private: - amrex::Vector<amrex::Real> params; -}; - - -/// -/// This is a Gaussian Random momentum distribution. -/// Particles will get random momenta, drawn from a normal. -/// ux_m, ux_y, and ux_z describe the mean components in the x, y, and z -/// directions, while u_th is the standard deviation of the random -/// component. -/// -class GaussianRandomMomentumDistribution : public PlasmaMomentumDistribution -{ -public: - GaussianRandomMomentumDistribution(amrex::Real ux_m, - amrex::Real uy_m, - amrex::Real uz_m, - amrex::Real ux_th, - amrex::Real uy_th, - amrex::Real uz_th); - virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override; -private: - amrex::Real _ux_m; - amrex::Real _uy_m; - amrex::Real _uz_m; - amrex::Real _ux_th; - amrex::Real _uy_th; - amrex::Real _uz_th; -}; - -/// -/// This is a radially expanding momentum distribution -/// Particles will have a radial momentum proportional to their -/// radius, with proportionality constant u_over_r -class RadialExpansionMomentumDistribution : public PlasmaMomentumDistribution -{ -public: - RadialExpansionMomentumDistribution( amrex::Real u_over_r ); - virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override; -private: - amrex::Real _u_over_r; -}; - -/// -/// This describes a momentum distribution function parsed in the input file. -/// -class ParseMomentumFunction : public PlasmaMomentumDistribution -{ -public: - ParseMomentumFunction(const std::string _parse_momentum_function_ux, - const std::string _parse_momentum_function_uy, - const std::string _parse_momentum_function_uz); - virtual void getMomentum(vec3& u, - amrex::Real x, - amrex::Real y, - amrex::Real z) override; -private: - std::string _parse_momentum_function_ux; - std::string _parse_momentum_function_uy; - std::string _parse_momentum_function_uz; - WarpXParser parser_ux; - WarpXParser parser_uy; - WarpXParser parser_uz; -}; - - -/// -/// PlasmaParticlePosition describes how particles are initialized -/// into each cell box. Subclasses must define a -/// getPositionUnitBox function that returns the position of -/// particle number i_part in a unitary box. -/// -class PlasmaParticlePosition{ -public: - using vec3 = std::array<amrex::Real, 3>; - virtual ~PlasmaParticlePosition() {}; - virtual void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1) = 0; -}; - -/// -/// Particles are initialized with a random uniform -/// distribution inside each cell -/// -class RandomPosition : public PlasmaParticlePosition{ -public: - RandomPosition(int num_particles_per_cell); - virtual void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1) override; -private: - amrex::Real _x; - amrex::Real _y; - amrex::Real _z; - int _num_particles_per_cell; -}; - -/// -/// Particles are regularly distributed inside each cell. The user provides -/// a 3d (resp. 2d) vector num_particles_per_cell_each_dim that contains -/// the number of particles per cell along each dimension. -/// -class RegularPosition : public PlasmaParticlePosition{ -public: - RegularPosition(const amrex::Vector<int>& num_particles_per_cell_each_dim); - virtual void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1) override; -private: - amrex::Real _x; - amrex::Real _y; - amrex::Real _z; - amrex::Vector<int> _num_particles_per_cell_each_dim; -}; +#include <AMReX_ParmParse.H> +#include <AMReX_Utility.H> /// /// The PlasmaInjector class parses and stores information about the plasma @@ -256,28 +22,23 @@ class PlasmaInjector public: - using vec3 = std::array<amrex::Real, 3>; - - PlasmaInjector(); - - PlasmaInjector(int ispecies, const std::string& name); + PlasmaInjector (); - amrex::Real getDensity(amrex::Real x, amrex::Real y, amrex::Real z); + PlasmaInjector (int ispecies, const std::string& name); - bool insideBounds(amrex::Real x, amrex::Real y, amrex::Real z); + bool insideBounds (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept; int num_particles_per_cell; amrex::Vector<int> num_particles_per_cell_each_dim; - void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z); + // gamma * beta + amrex::XDim3 getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept; - void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1); + amrex::Real getCharge () {return charge;} + amrex::Real getMass () {return mass;} - amrex::Real getCharge() {return charge;} - amrex::Real getMass() {return mass;} - - bool doInjection() { return part_pos != NULL;} + bool doInjection () const noexcept { return inj_pos != NULL;} bool add_single_particle = false; amrex::Vector<amrex::Real> single_particle_pos; @@ -305,6 +66,21 @@ public: amrex::Real xmin, xmax; amrex::Real ymin, ymax; amrex::Real zmin, zmax; + amrex::Real density_min = 0; + amrex::Real density_max = std::numeric_limits<amrex::Real>::max(); + + InjectorPosition* getInjectorPosition (); + InjectorDensity* getInjectorDensity (); + InjectorMomentum* getInjectorMomentum (); + + // When running on GPU, injector for position, momentum and density store + // particle 3D positions in shared memory IF using the parser. + std::size_t + sharedMemoryNeeded () const noexcept { + return amrex::max(inj_pos->sharedMemoryNeeded(), + inj_rho->sharedMemoryNeeded(), + inj_mom->sharedMemoryNeeded()); + } protected: @@ -315,13 +91,12 @@ protected: int species_id; std::string species_name; - std::unique_ptr<PlasmaDensityProfile> rho_prof; - std::unique_ptr<PlasmaMomentumDistribution> mom_dist; - std::unique_ptr<PlasmaParticlePosition> part_pos; - - void parseDensity(amrex::ParmParse pp); - void parseMomentum(amrex::ParmParse pp); + std::unique_ptr<InjectorPosition> inj_pos; + std::unique_ptr<InjectorDensity > inj_rho; + std::unique_ptr<InjectorMomentum> inj_mom; + void parseDensity (amrex::ParmParse& pp); + void parseMomentum (amrex::ParmParse& pp); }; #endif diff --git a/Source/Initialization/PlasmaInjector.cpp b/Source/Initialization/PlasmaInjector.cpp index f9642d1b6..541999789 100644 --- a/Source/Initialization/PlasmaInjector.cpp +++ b/Source/Initialization/PlasmaInjector.cpp @@ -55,192 +55,34 @@ namespace { } } -ConstantDensityProfile::ConstantDensityProfile(Real density) - : _density(density) -{} +PlasmaInjector::PlasmaInjector () {} -Real ConstantDensityProfile::getDensity(Real x, Real y, Real z) const -{ - return _density; -} - -CustomDensityProfile::CustomDensityProfile(const std::string& species_name) -{ - ParmParse pp(species_name); - pp.getarr("custom_profile_params", params); -} - -PredefinedDensityProfile::PredefinedDensityProfile(const std::string& species_name) +PlasmaInjector::PlasmaInjector (int ispecies, const std::string& name) + : species_id(ispecies), species_name(name) { ParmParse pp(species_name); - std::string which_profile_s; - pp.getarr("predefined_profile_params", params); - pp.query("predefined_profile_name", which_profile_s); - if (which_profile_s == "parabolic_channel"){ - which_profile = predefined_profile_flag::parabolic_channel; - } -} - -ParseDensityProfile::ParseDensityProfile(std::string parse_density_function) - : _parse_density_function(parse_density_function) -{ - parser_density.define(parse_density_function); - parser_density.registerVariables({"x","y","z"}); - - ParmParse pp("my_constants"); - std::set<std::string> symbols = parser_density.symbols(); - symbols.erase("x"); - symbols.erase("y"); - symbols.erase("z"); // after removing variables, we are left with constants - for (auto it = symbols.begin(); it != symbols.end(); ) { - Real v; - if (pp.query(it->c_str(), v)) { - parser_density.setConstant(*it, v); - it = symbols.erase(it); - } else { - ++it; - } - } - for (auto const& s : symbols) { // make sure there no unknown symbols - amrex::Abort("ParseDensityProfile: Unknown symbol "+s); - } -} - -Real ParseDensityProfile::getDensity(Real x, Real y, Real z) const -{ - return parser_density.eval(x,y,z); -} - -ConstantMomentumDistribution::ConstantMomentumDistribution(Real ux, - Real uy, - Real uz) - : _ux(ux), _uy(uy), _uz(uz) -{} - -void ConstantMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) { - u[0] = _ux; - u[1] = _uy; - u[2] = _uz; -} -CustomMomentumDistribution::CustomMomentumDistribution(const std::string& species_name) -{ - ParmParse pp(species_name); - pp.getarr("custom_momentum_params", params); -} - -GaussianRandomMomentumDistribution::GaussianRandomMomentumDistribution(Real ux_m, - Real uy_m, - Real uz_m, - Real ux_th, - Real uy_th, - Real uz_th) - : _ux_m(ux_m), _uy_m(uy_m), _uz_m(uz_m), _ux_th(ux_th), _uy_th(uy_th), _uz_th(uz_th) -{ -} - -void GaussianRandomMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) { - Real ux_th = amrex::RandomNormal(0.0, _ux_th); - Real uy_th = amrex::RandomNormal(0.0, _uy_th); - Real uz_th = amrex::RandomNormal(0.0, _uz_th); - - u[0] = _ux_m + ux_th; - u[1] = _uy_m + uy_th; - u[2] = _uz_m + uz_th; -} -RadialExpansionMomentumDistribution::RadialExpansionMomentumDistribution(Real u_over_r) : _u_over_r( u_over_r ) -{ -} - -void RadialExpansionMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) { - u[0] = _u_over_r * x; - u[1] = _u_over_r * y; - u[2] = _u_over_r * z; -} - -ParseMomentumFunction::ParseMomentumFunction(std::string parse_momentum_function_ux, - std::string parse_momentum_function_uy, - std::string parse_momentum_function_uz) - : _parse_momentum_function_ux(parse_momentum_function_ux), - _parse_momentum_function_uy(parse_momentum_function_uy), - _parse_momentum_function_uz(parse_momentum_function_uz) -{ - parser_ux.define(parse_momentum_function_ux); - parser_uy.define(parse_momentum_function_uy); - parser_uz.define(parse_momentum_function_uz); - - amrex::Array<std::reference_wrapper<WarpXParser>,3> parsers{parser_ux, parser_uy, parser_uz}; - ParmParse pp("my_constants"); - for (auto& p : parsers) { - auto& parser = p.get(); - parser.registerVariables({"x","y","z"}); - std::set<std::string> symbols = parser.symbols(); - symbols.erase("x"); - symbols.erase("y"); - symbols.erase("z"); // after removing variables, we are left with constants - for (auto it = symbols.begin(); it != symbols.end(); ) { - Real v; - if (pp.query(it->c_str(), v)) { - parser.setConstant(*it, v); - it = symbols.erase(it); - } else { - ++it; - } - } - for (auto const& s : symbols) { // make sure there no unknown symbols - amrex::Abort("ParseMomentumFunction: Unknown symbol "+s); - } - } -} - -void ParseMomentumFunction::getMomentum(vec3& u, Real x, Real y, Real z) -{ - u[0] = parser_ux.eval(x,y,z); - u[1] = parser_uy.eval(x,y,z); - u[2] = parser_uz.eval(x,y,z); -} - -RandomPosition::RandomPosition(int num_particles_per_cell): - _num_particles_per_cell(num_particles_per_cell) -{} - -void RandomPosition::getPositionUnitBox(vec3& r, int i_part, int ref_fac){ - r[0] = amrex::Random(); - r[1] = amrex::Random(); - r[2] = amrex::Random(); -} - -RegularPosition::RegularPosition(const amrex::Vector<int>& num_particles_per_cell_each_dim) - : _num_particles_per_cell_each_dim(num_particles_per_cell_each_dim) -{} + pp.query("radially_weighted", radially_weighted); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(radially_weighted, "ERROR: Only radially_weighted=true is supported"); -void RegularPosition::getPositionUnitBox(vec3& r, int i_part, int ref_fac) -{ - int nx = ref_fac*_num_particles_per_cell_each_dim[0]; - int ny = ref_fac*_num_particles_per_cell_each_dim[1]; -#if AMREX_SPACEDIM == 3 - int nz = ref_fac*_num_particles_per_cell_each_dim[2]; -#else - int nz = 1; -#endif - - int ix_part = i_part/(ny * nz); - int iy_part = (i_part % (ny * nz)) % ny; - int iz_part = (i_part % (ny * nz)) / ny; + // parse plasma boundaries + xmin = std::numeric_limits<amrex::Real>::lowest(); + ymin = std::numeric_limits<amrex::Real>::lowest(); + zmin = std::numeric_limits<amrex::Real>::lowest(); - r[0] = (0.5+ix_part)/nx; - r[1] = (0.5+iy_part)/ny; - r[2] = (0.5+iz_part)/nz; -} + xmax = std::numeric_limits<amrex::Real>::max(); + ymax = std::numeric_limits<amrex::Real>::max(); + zmax = std::numeric_limits<amrex::Real>::max(); -PlasmaInjector::PlasmaInjector(){ - part_pos = NULL; -} + pp.query("xmin", xmin); + pp.query("ymin", ymin); + pp.query("zmin", zmin); + pp.query("xmax", xmax); + pp.query("ymax", ymax); + pp.query("zmax", zmax); -PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name) - : species_id(ispecies), species_name(name) -{ - ParmParse pp(species_name); + pp.query("density_min", density_min); + pp.query("density_max", density_max); // parse charge and mass std::string charge_s; @@ -290,9 +132,14 @@ PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name) gaussian_beam = true; parseMomentum(pp); } + // Depending on injection type at runtime, initialize inj_pos + // so that inj_pos->getPositionUnitBox calls + // InjectorPosition[Random or Regular].getPositionUnitBox. else if (part_pos_s == "nrandompercell") { pp.query("num_particles_per_cell", num_particles_per_cell); - part_pos.reset(new RandomPosition(num_particles_per_cell)); + // Construct InjectorPosition with InjectorPositionRandom. + inj_pos.reset(new InjectorPosition((InjectorPositionRandom*)nullptr, + xmin, xmax, ymin, ymax, zmin, zmax)); parseDensity(pp); parseMomentum(pp); } else if (part_pos_s == "nuniformpercell") { @@ -301,7 +148,12 @@ PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name) #if ( AMREX_SPACEDIM == 2 ) num_particles_per_cell_each_dim[2] = 1; #endif - part_pos.reset(new RegularPosition(num_particles_per_cell_each_dim)); + // Construct InjectorPosition from InjectorPositionRegular. + inj_pos.reset(new InjectorPosition((InjectorPositionRegular*)nullptr, + xmin, xmax, ymin, ymax, zmin, zmax, + Dim3{num_particles_per_cell_each_dim[0], + num_particles_per_cell_each_dim[1], + num_particles_per_cell_each_dim[2]})); num_particles_per_cell = num_particles_per_cell_each_dim[0] * num_particles_per_cell_each_dim[1] * num_particles_per_cell_each_dim[2]; @@ -310,52 +162,75 @@ PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name) } else { StringParseAbortMessage("Injection style", part_pos_s); } +} - pp.query("radially_weighted", radially_weighted); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(radially_weighted, "ERROR: Only radially_weighted=true is supported"); - - // parse plasma boundaries - xmin = std::numeric_limits<amrex::Real>::lowest(); - ymin = std::numeric_limits<amrex::Real>::lowest(); - zmin = std::numeric_limits<amrex::Real>::lowest(); - - xmax = std::numeric_limits<amrex::Real>::max(); - ymax = std::numeric_limits<amrex::Real>::max(); - zmax = std::numeric_limits<amrex::Real>::max(); +namespace { +WarpXParser makeParser (std::string const& parse_function) +{ + WarpXParser parser(parse_function); + parser.registerVariables({"x","y","z"}); - pp.query("xmin", xmin); - pp.query("ymin", ymin); - pp.query("zmin", zmin); - pp.query("xmax", xmax); - pp.query("ymax", ymax); - pp.query("zmax", zmax); + ParmParse pp("my_constants"); + std::set<std::string> symbols = parser.symbols(); + symbols.erase("x"); + symbols.erase("y"); + symbols.erase("z"); // after removing variables, we are left with constants + for (auto it = symbols.begin(); it != symbols.end(); ) { + Real v; + if (pp.query(it->c_str(), v)) { + parser.setConstant(*it, v); + it = symbols.erase(it); + } else { + ++it; + } + } + for (auto const& s : symbols) { // make sure there no unknown symbols + amrex::Abort("PlasmaInjector::makeParser: Unknown symbol "+s); + } + return parser; +} } -void PlasmaInjector::parseDensity(ParmParse pp){ +// Depending on injection type at runtime, initialize inj_rho +// so that inj_rho->getDensity calls +// InjectorPosition[Constant or Custom or etc.].getDensity. +void PlasmaInjector::parseDensity (ParmParse& pp) +{ // parse density information std::string rho_prof_s; pp.get("profile", rho_prof_s); - std::transform(rho_prof_s.begin(), - rho_prof_s.end(), - rho_prof_s.begin(), - ::tolower); + std::transform(rho_prof_s.begin(), rho_prof_s.end(), + rho_prof_s.begin(), ::tolower); if (rho_prof_s == "constant") { pp.get("density", density); - rho_prof.reset(new ConstantDensityProfile(density)); + // Construct InjectorDensity with InjectorDensityConstant. + inj_rho.reset(new InjectorDensity((InjectorDensityConstant*)nullptr, density)); } else if (rho_prof_s == "custom") { - rho_prof.reset(new CustomDensityProfile(species_name)); + // Construct InjectorDensity with InjectorDensityCustom. + inj_rho.reset(new InjectorDensity((InjectorDensityCustom*)nullptr, species_name)); } else if (rho_prof_s == "predefined") { - rho_prof.reset(new PredefinedDensityProfile(species_name)); + // Construct InjectorDensity with InjectorDensityPredefined. + inj_rho.reset(new InjectorDensity((InjectorDensityPredefined*)nullptr,species_name)); } else if (rho_prof_s == "parse_density_function") { - pp.get("density_function(x,y,z)", str_density_function); - rho_prof.reset(new ParseDensityProfile(str_density_function)); + std::vector<std::string> f; + pp.getarr("density_function(x,y,z)", f); + for (auto const& s : f) { + str_density_function += s; + } + // Construct InjectorDensity with InjectorDensityParser. + inj_rho.reset(new InjectorDensity((InjectorDensityParser*)nullptr, + makeParser(str_density_function))); } else { StringParseAbortMessage("Density profile type", rho_prof_s); } } -void PlasmaInjector::parseMomentum(ParmParse pp){ +// Depending on injection type at runtime, initialize inj_mom +// so that inj_mom->getMomentum calls +// InjectorMomentum[Constant or Custom or etc.].getMomentum. +void PlasmaInjector::parseMomentum (ParmParse& pp) +{ // parse momentum information std::string mom_dist_s; pp.get("momentum_distribution_type", mom_dist_s); @@ -370,9 +245,11 @@ void PlasmaInjector::parseMomentum(ParmParse pp){ pp.query("ux", ux); pp.query("uy", uy); pp.query("uz", uz); - mom_dist.reset(new ConstantMomentumDistribution(ux, uy, uz)); + // Construct InjectorMomentum with InjectorMomentumConstant. + inj_mom.reset(new InjectorMomentum((InjectorMomentumConstant*)nullptr, ux,uy, uz)); } else if (mom_dist_s == "custom") { - mom_dist.reset(new CustomMomentumDistribution(species_name)); + // Construct InjectorMomentum with InjectorMomentumCustom. + inj_mom.reset(new InjectorMomentum((InjectorMomentumCustom*)nullptr, species_name)); } else if (mom_dist_s == "gaussian") { Real ux_m = 0.; Real uy_m = 0.; @@ -386,42 +263,68 @@ void PlasmaInjector::parseMomentum(ParmParse pp){ pp.query("ux_th", ux_th); pp.query("uy_th", uy_th); pp.query("uz_th", uz_th); - mom_dist.reset(new GaussianRandomMomentumDistribution(ux_m, uy_m, uz_m, - ux_th, uy_th, uz_th)); + // Construct InjectorMomentum with InjectorMomentumGaussian. + inj_mom.reset(new InjectorMomentum((InjectorMomentumGaussian*)nullptr, + ux_m, uy_m, uz_m, ux_th, uy_th, uz_th)); } else if (mom_dist_s == "radial_expansion") { Real u_over_r = 0.; pp.query("u_over_r", u_over_r); - mom_dist.reset(new RadialExpansionMomentumDistribution(u_over_r)); + // Construct InjectorMomentum with InjectorMomentumRadialExpansion. + inj_mom.reset(new InjectorMomentum + ((InjectorMomentumRadialExpansion*)nullptr, u_over_r)); } else if (mom_dist_s == "parse_momentum_function") { - pp.get("momentum_function_ux(x,y,z)", str_momentum_function_ux); - pp.get("momentum_function_uy(x,y,z)", str_momentum_function_uy); - pp.get("momentum_function_uz(x,y,z)", str_momentum_function_uz); - mom_dist.reset(new ParseMomentumFunction(str_momentum_function_ux, - str_momentum_function_uy, - str_momentum_function_uz)); + std::vector<std::string> f; + pp.getarr("momentum_function_ux(x,y,z)", f); + for (auto const& s : f) { + str_momentum_function_ux += s; + } + f.clear(); + pp.getarr("momentum_function_uy(x,y,z)", f); + for (auto const& s : f) { + str_momentum_function_uy += s; + } + f.clear(); + pp.getarr("momentum_function_uz(x,y,z)", f); + for (auto const& s : f) { + str_momentum_function_uz += s; + } + // Construct InjectorMomentum with InjectorMomentumParser. + inj_mom.reset(new InjectorMomentum((InjectorMomentumParser*)nullptr, + makeParser(str_momentum_function_ux), + makeParser(str_momentum_function_uy), + makeParser(str_momentum_function_uz))); } else { StringParseAbortMessage("Momentum distribution type", mom_dist_s); } } -void PlasmaInjector::getPositionUnitBox(vec3& r, int i_part, int ref_fac) { - return part_pos->getPositionUnitBox(r, i_part, ref_fac); +XDim3 PlasmaInjector::getMomentum (Real x, Real y, Real z) const noexcept +{ + return inj_mom->getMomentum(x, y, z); // gamma*beta +} + +bool PlasmaInjector::insideBounds (Real x, Real y, Real z) const noexcept +{ + return (x < xmax and x >= xmin and + y < ymax and y >= ymin and + z < zmax and z >= zmin); } -void PlasmaInjector::getMomentum(vec3& u, Real x, Real y, Real z) { - mom_dist->getMomentum(u, x, y, z); - u[0] *= PhysConst::c; - u[1] *= PhysConst::c; - u[2] *= PhysConst::c; +InjectorPosition* +PlasmaInjector::getInjectorPosition () +{ + return inj_pos.get(); } -bool PlasmaInjector::insideBounds(Real x, Real y, Real z) { - if (x >= xmax || x < xmin || - y >= ymax || y < ymin || - z >= zmax || z < zmin ) return false; - return true; +InjectorDensity* +PlasmaInjector::getInjectorDensity () +{ + return inj_rho.get(); } -Real PlasmaInjector::getDensity(Real x, Real y, Real z) { - return rho_prof->getDensity(x, y, z); +InjectorMomentum* +PlasmaInjector::getInjectorMomentum () +{ + return inj_mom.get(); } + diff --git a/Source/Initialization/PlasmaProfiles.cpp b/Source/Initialization/PlasmaProfiles.cpp deleted file mode 100644 index d9d207f7e..000000000 --- a/Source/Initialization/PlasmaProfiles.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include <PlasmaInjector.H> -#include <cmath> -#include <iostream> -#include <WarpXConst.H> - -using namespace amrex; - -Real PredefinedDensityProfile::getDensity(Real x, Real y, Real z) const { - Real n; - if ( which_profile == predefined_profile_flag::parabolic_channel ) { - n = ParabolicChannel(x,y,z); - } - return n; -} - -/// -/// plateau between linear upramp and downramp, and parab transverse profile -/// -Real PredefinedDensityProfile::ParabolicChannel(Real x, Real y, Real z) const { - // params = [z_start ramp_up plateau ramp_down rc n0] - Real z_start = params[0]; - Real ramp_up = params[1]; - Real plateau = params[2]; - Real ramp_down = params[3]; - Real rc = params[4]; - Real n0 = params[5]; - Real n; - Real kp = PhysConst::q_e/PhysConst::c*sqrt( n0/(PhysConst::m_e*PhysConst::ep0) ); - - if ((z-z_start)>=0 and (z-z_start)<ramp_up ) { - n = (z-z_start)/ramp_up; - } else if ((z-z_start)>=ramp_up and (z-z_start)<ramp_up+plateau ) { - n = 1; - } else if ((z-z_start)>=ramp_up+plateau and (z-z_start)<ramp_up+plateau+ramp_down) { - n = 1-((z-z_start)-ramp_up-plateau)/ramp_down; - } else { - n = 0; - } - n *= n0*(1+4*(x*x+y*y)/(kp*kp*std::pow(rc,4))); - return n; -} diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp index 2442e0205..590c11b84 100644 --- a/Source/Initialization/WarpXInitData.cpp +++ b/Source/Initialization/WarpXInitData.cpp @@ -1,6 +1,4 @@ -#include <numeric> - #include <AMReX_ParallelDescriptor.H> #include <AMReX_ParmParse.H> @@ -88,7 +86,7 @@ WarpX::InitDiagnostics () { const Real* current_lo = geom[0].ProbLo(); const Real* current_hi = geom[0].ProbHi(); Real dt_boost = dt[0]; - + // Find the positions of the lab-frame box that corresponds to the boosted-frame box at t=0 Real zmin_lab = current_lo[moving_window_dir]/( (1.+beta_boost)*gamma_boost ); Real zmax_lab = current_hi[moving_window_dir]/( (1.+beta_boost)*gamma_boost ); @@ -97,7 +95,7 @@ WarpX::InitDiagnostics () { zmax_lab, moving_window_v, dt_snapshots_lab, num_snapshots_lab, gamma_boost, - t_new[0], dt_boost, + t_new[0], dt_boost, moving_window_dir, geom[0])); } } @@ -118,10 +116,10 @@ WarpX::InitFromScratch () InitPML(); -#ifdef WARPX_DO_ELECTROSTATIC +#ifdef WARPX_DO_ELECTROSTATIC if (do_electrostatic) { getLevelMasks(masks); - + // the plus one is to convert from num_cells to num_nodes getLevelMasks(gather_masks, n_buffer + 1); } @@ -133,14 +131,35 @@ WarpX::InitPML () { if (do_pml) { + amrex::IntVect do_pml_Lo_corrected = do_pml_Lo; + +#ifdef WARPX_DIM_RZ + do_pml_Lo_corrected[0] = 0; // no PML at r=0, in cylindrical geometry +#endif pml[0].reset(new PML(boxArray(0), DistributionMap(0), &Geom(0), nullptr, - pml_ncell, pml_delta, 0, do_dive_cleaning, do_moving_window)); + pml_ncell, pml_delta, 0, +#ifdef WARPX_USE_PSATD + dt[0], nox_fft, noy_fft, noz_fft, do_nodal, +#endif + do_dive_cleaning, do_moving_window, + do_pml_Lo_corrected, do_pml_Hi)); for (int lev = 1; lev <= finest_level; ++lev) { + amrex::IntVect do_pml_Lo_MR = amrex::IntVect::TheUnitVector(); +#ifdef WARPX_DIM_RZ + //In cylindrical geometry, if the edge of the patch is at r=0, do not add PML + if ((max_level > 0) && (fine_tag_lo[0]==0.)) { + do_pml_Lo_MR[0] = 0; + } +#endif pml[lev].reset(new PML(boxArray(lev), DistributionMap(lev), &Geom(lev), &Geom(lev-1), - pml_ncell, pml_delta, refRatio(lev-1)[0], do_dive_cleaning, - do_moving_window)); + pml_ncell, pml_delta, refRatio(lev-1)[0], +#ifdef WARPX_USE_PSATD + dt[lev], nox_fft, noy_fft, noz_fft, do_nodal, +#endif + do_dive_cleaning, do_moving_window, + do_pml_Lo_MR, amrex::IntVect::TheUnitVector())); } } } @@ -226,7 +245,7 @@ WarpX::InitOpenbc () Vector<int> alllohi(6*nprocs,100000); MPI_Allgather(lohi, 6, MPI_INT, alllohi.data(), 6, MPI_INT, ParallelDescriptor::Communicator()); - + BoxList bl{IndexType::TheNodeType()}; for (int i = 0; i < nprocs; ++i) { @@ -252,7 +271,7 @@ WarpX::InitOpenbc () rho_openbc.copy(*rho, 0, 0, 1, rho->nGrow(), 0, gm.periodicity(), FabArrayBase::ADD); const Real* dx = gm.CellSize(); - + warpx_openbc_potential(rho_openbc[myproc].dataPtr(), phi_openbc[myproc].dataPtr(), dx); BoxArray nba = boxArray(lev); @@ -322,7 +341,7 @@ WarpX::InitLevelData (int lev, Real time) void WarpX::InitLevelDataFFT (int lev, Real time) { - + Efield_fp_fft[lev][0]->setVal(0.0); Efield_fp_fft[lev][1]->setVal(0.0); Efield_fp_fft[lev][2]->setVal(0.0); diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp index 3d3447a3c..786ebc622 100644 --- a/Source/Laser/LaserParticleContainer.cpp +++ b/Source/Laser/LaserParticleContainer.cpp @@ -453,7 +453,12 @@ LaserParticleContainer::Evolve (int lev, pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); BL_PROFILE_VAR_STOP(blp_copy); - if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev); + if (rho) { + DepositCharge(pti, wp, rho, 0, 0, np_current, thread_num, lev, lev); + if (crho) { + DepositCharge(pti, wp, crho, 0, np_current, np-np_current, thread_num, lev, lev-1); + } + } // // Particle Push @@ -504,15 +509,15 @@ LaserParticleContainer::Evolve (int lev, // Current Deposition // // Deposit inside domains - DepositCurrentFortran(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, - 0, np_current, thread_num, - lev, lev, dt); + DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, + 0, np_current, thread_num, + lev, lev, dt); bool has_buffer = cjx; if (has_buffer){ // Deposit in buffers - DepositCurrentFortran(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, - np_current, np-np_current, thread_num, - lev, lev-1, dt); + DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, + np_current, np-np_current, thread_num, + lev, lev-1, dt); } // @@ -522,7 +527,12 @@ LaserParticleContainer::Evolve (int lev, pti.SetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); BL_PROFILE_VAR_STOP(blp_copy); - if (rho) DepositCharge(pti, wp, rho, crho, 1, np_current, np, thread_num, lev); + if (rho) { + DepositCharge(pti, wp, rho, 1, 0, np_current, thread_num, lev, lev); + if (crho) { + DepositCharge(pti, wp, crho, 1, np_current, np-np_current, thread_num, lev, lev-1); + } + } if (cost) { const Box& tbx = pti.tilebox(); diff --git a/Source/Make.WarpX b/Source/Make.WarpX index 3060ae8f0..e3a33a00f 100644 --- a/Source/Make.WarpX +++ b/Source/Make.WarpX @@ -97,16 +97,24 @@ ifeq ($(USE_OPENBC_POISSON),TRUE) endif ifeq ($(USE_OPENPMD), TRUE) - OPENPMD_LIB_PATH ?= NOT_SET - ifneq ($(OPENPMD_LIB_PATH),NOT_SET) - LIBRARY_LOCATIONS += $(OPENPMD_LIB_PATH) + # try pkg-config query + ifeq (0, $(shell pkg-config "openPMD >= 0.9.0"; echo $$?)) + CXXFLAGS += $(shell pkg-config --cflags openPMD) + LDFLAGS += $(shell pkg-config --libs openPMD) + LDFLAGS += -Xlinker -rpath -Xlinker $(shell pkg-config --variable=libdir openPMD) + # fallback to manual settings + else + OPENPMD_LIB_PATH ?= NOT_SET + ifneq ($(OPENPMD_LIB_PATH),NOT_SET) + LIBRARY_LOCATIONS += $(OPENPMD_LIB_PATH) + endif + OPENPMD_INCLUDE_PATH ?= NOT_SET + ifneq ($(OPENPMD_INCLUDE_PATH),NOT_SET) + INCLUDE_LOCATIONS += $(OPENPMD_INCLUDE_PATH) + endif + libraries += -lopenPMD endif - OPENPMD_INCLUDE_PATH ?= NOT_SET - ifneq ($(OPENPMD_INCLUDE_PATH),NOT_SET) - INCLUDE_LOCATIONS += $(OPENPMD_INCLUDE_PATH) - endif - DEFINES += -DWARPX_USE_OPENPMD -DopenPMD_HAVE_MPI=1 - LIBRARIES += -lopenPMD -lhdf5 + DEFINES += -DWARPX_USE_OPENPMD endif @@ -115,7 +123,7 @@ ifeq ($(USE_PSATD),TRUE) DEFINES += -DWARPX_USE_PSATD ifeq ($(USE_CUDA),FALSE) # Running on CPU # Use FFTW - LIBRARIES += -lfftw3_mpi -lfftw3 -lfftw3_threads + libraries += -lfftw3_mpi -lfftw3 -lfftw3_threads FFTW_HOME ?= NOT_SET ifneq ($(FFTW_HOME),NOT_SET) VPATH_LOCATIONS += $(FFTW_HOME)/include @@ -127,13 +135,12 @@ ifeq ($(USE_PSATD),TRUE) DEFINES += -DFFTW # PICSAR uses it else # Use cuFFT - LIBRARIES += -lcufft + libraries += -lcufft endif endif ifeq ($(USE_RZ),TRUE) USERSuffix := $(USERSuffix).RZ - DEFINES += -DWARPX_RZ endif ifeq ($(DO_ELECTROSTATIC),TRUE) @@ -151,7 +158,7 @@ ifeq ($(USE_HDF5),TRUE) LIBRARY_LOCATIONS += $(HDF5_HOME)/lib endif DEFINES += -DWARPX_USE_HDF5 - LIBRARIES += -lhdf5 -lz + libraries += -lhdf5 -lz endif # job_info support diff --git a/Source/Parser/GpuParser.H b/Source/Parser/GpuParser.H new file mode 100644 index 000000000..1533ee6b9 --- /dev/null +++ b/Source/Parser/GpuParser.H @@ -0,0 +1,72 @@ +#ifndef WARPX_GPU_PARSER_H_ +#define WARPX_GPU_PARSER_H_ + +#include <WarpXParser.H> +#include <AMReX_Gpu.H> + +// When compiled for CPU, wrap WarpXParser and enable threading. +// When compiled for GPU, store one copy of the parser in +// CUDA managed memory for __device__ code, and one copy of the parser +// in CUDA managed memory for __host__ code. This way, the parser can be +// efficiently called from both host and device. +class GpuParser +{ +public: + GpuParser (WarpXParser const& wp); + void clear (); + + AMREX_GPU_HOST_DEVICE + double + operator() (double x, double y, double z) const noexcept + { +#ifdef AMREX_USE_GPU + +#ifdef AMREX_DEVICE_COMPILE +// WarpX compiled for GPU, function compiled for __device__ + // the 3D position of each particle is stored in shared memory. + amrex::Gpu::SharedMemory<double> gsm; + double* p = gsm.dataPtr(); + int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y); + p[tid*3] = x; + p[tid*3+1] = y; + p[tid*3+2] = z; + return wp_ast_eval(m_gpu_parser.ast); +#else +// WarpX compiled for GPU, function compiled for __host__ + m_var.x = x; + m_var.y = y; + m_var.z = z; + return wp_ast_eval(m_cpu_parser.ast); +#endif + +#else +// WarpX compiled for CPU +#ifdef _OPENMP + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + m_var[tid].x = x; + m_var[tid].y = y; + m_var[tid].z = z; + return wp_ast_eval(m_parser[tid]->ast); +#endif + } + +private: + +#ifdef AMREX_USE_GPU + // Copy of the parser running on __device__ + struct wp_parser m_gpu_parser; + // Copy of the parser running on __host__ + struct wp_parser m_cpu_parser; + mutable amrex::XDim3 m_var; +#else + // Only one parser + struct wp_parser** m_parser; + mutable amrex::XDim3* m_var; + int nthreads; +#endif +}; + +#endif diff --git a/Source/Parser/GpuParser.cpp b/Source/Parser/GpuParser.cpp new file mode 100644 index 000000000..db1c2287d --- /dev/null +++ b/Source/Parser/GpuParser.cpp @@ -0,0 +1,73 @@ +#include <GpuParser.H> + +GpuParser::GpuParser (WarpXParser const& wp) +{ +#ifdef AMREX_USE_GPU + + struct wp_parser* a_wp = wp.m_parser; + // Initialize GPU parser: allocate memory in CUDA managed memory, + // copy all data needed on GPU to m_gpu_parser + m_gpu_parser.sz_mempool = wp_ast_size(a_wp->ast); + m_gpu_parser.p_root = (struct wp_node*) + amrex::The_Managed_Arena()->alloc(m_gpu_parser.sz_mempool); + m_gpu_parser.p_free = m_gpu_parser.p_root; + // 0: don't free the source + m_gpu_parser.ast = wp_parser_ast_dup(&m_gpu_parser, a_wp->ast, 0); + wp_parser_regvar_gpu(&m_gpu_parser, "x", 0); + wp_parser_regvar_gpu(&m_gpu_parser, "y", 1); + wp_parser_regvar_gpu(&m_gpu_parser, "z", 2); + + // Initialize CPU parser: allocate memory in CUDA managed memory, + // copy all data needed on CPU to m_cpu_parser + m_cpu_parser.sz_mempool = wp_ast_size(a_wp->ast); + m_cpu_parser.p_root = (struct wp_node*) + amrex::The_Managed_Arena()->alloc(m_cpu_parser.sz_mempool); + m_cpu_parser.p_free = m_cpu_parser.p_root; + // 0: don't free the source + m_cpu_parser.ast = wp_parser_ast_dup(&m_cpu_parser, a_wp->ast, 0); + wp_parser_regvar(&m_cpu_parser, "x", &(m_var.x)); + wp_parser_regvar(&m_cpu_parser, "y", &(m_var.y)); + wp_parser_regvar(&m_cpu_parser, "z", &(m_var.z)); + +#else // not defined AMREX_USE_GPU + +#ifdef _OPENMP + nthreads = omp_get_max_threads(); +#else // _OPENMP + nthreads = 1; +#endif // _OPENMP + + m_parser = ::new struct wp_parser*[nthreads]; + m_var = ::new amrex::XDim3[nthreads]; + + for (int tid = 0; tid < nthreads; ++tid) + { +#ifdef _OPENMP + m_parser[tid] = wp_parser_dup(wp.m_parser[tid]); +#else // _OPENMP + m_parser[tid] = wp_parser_dup(wp.m_parser); +#endif // _OPENMP + wp_parser_regvar(m_parser[tid], "x", &(m_var[tid].x)); + wp_parser_regvar(m_parser[tid], "y", &(m_var[tid].y)); + wp_parser_regvar(m_parser[tid], "z", &(m_var[tid].z)); + } + +#endif // AMREX_USE_GPU +} + +void +GpuParser::clear () +{ +#ifdef AMREX_USE_GPU + amrex::The_Managed_Arena()->free(m_gpu_parser.ast); + amrex::The_Managed_Arena()->free(m_cpu_parser.ast); +#else + for (int tid = 0; tid < nthreads; ++tid) + { + wp_parser_delete(m_parser[tid]); + } + ::delete[] m_parser; + ::delete[] m_var; +#endif +} + diff --git a/Source/Parser/Make.package b/Source/Parser/Make.package index 26ef4fb43..5ce02cbda 100644 --- a/Source/Parser/Make.package +++ b/Source/Parser/Make.package @@ -3,6 +3,8 @@ cEXE_sources += wp_parser_y.c wp_parser.tab.c wp_parser.lex.c wp_parser_c.c cEXE_headers += wp_parser_y.h wp_parser.tab.h wp_parser.lex.h wp_parser_c.h CEXE_sources += WarpXParser.cpp CEXE_headers += WarpXParser.H +CEXE_headers += GpuParser.H +CEXE_sources += GpuParser.cpp INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Parser VPATH_LOCATIONS += $(WARPX_HOME)/Source/Parser diff --git a/Source/Parser/WarpXParser.H b/Source/Parser/WarpXParser.H index 046491e29..ffa61e457 100644 --- a/Source/Parser/WarpXParser.H +++ b/Source/Parser/WarpXParser.H @@ -13,6 +13,8 @@ #include <omp.h> #endif +class GpuParser; + class WarpXParser { public: @@ -46,6 +48,8 @@ public: std::set<std::string> symbols () const; + friend class GpuParser; + private: void clear (); diff --git a/Source/Parser/wp_parser_c.h b/Source/Parser/wp_parser_c.h index d810bd685..3aafdec65 100644 --- a/Source/Parser/wp_parser_c.h +++ b/Source/Parser/wp_parser_c.h @@ -2,6 +2,8 @@ #define WP_PARSER_C_H_ #include "wp_parser_y.h" +#include <AMReX_GpuQualifiers.H> +#include <AMReX_Extension.H> #ifdef __cplusplus extern "C" { @@ -18,71 +20,167 @@ extern "C" { #include <set> #include <string> -inline -double +AMREX_GPU_HOST_DEVICE +inline double wp_ast_eval (struct wp_node* node) { double result; +#ifdef AMREX_DEVICE_COMPILE + extern __shared__ double extern_xyz[]; + int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y); + double* x = extern_xyz + tid*3; +#endif + switch (node->type) { case WP_NUMBER: + { result = ((struct wp_number*)node)->value; break; + } case WP_SYMBOL: - result = *(((struct wp_symbol*)node)->pointer); + { +#ifdef AMREX_DEVICE_COMPILE + int i =((struct wp_symbol*)node)->ip.i; + result = x[i]; +#else + result = *(((struct wp_symbol*)node)->ip.p); +#endif break; + } case WP_ADD: + { result = wp_ast_eval(node->l) + wp_ast_eval(node->r); break; + } case WP_SUB: + { result = wp_ast_eval(node->l) - wp_ast_eval(node->r); break; + } case WP_MUL: + { result = wp_ast_eval(node->l) * wp_ast_eval(node->r); break; + } case WP_DIV: + { result = wp_ast_eval(node->l) / wp_ast_eval(node->r); break; + } case WP_NEG: + { result = -wp_ast_eval(node->l); break; + } case WP_F1: + { result = wp_call_f1(((struct wp_f1*)node)->ftype, wp_ast_eval(((struct wp_f1*)node)->l)); break; + } case WP_F2: + { result = wp_call_f2(((struct wp_f2*)node)->ftype, wp_ast_eval(((struct wp_f2*)node)->l), wp_ast_eval(((struct wp_f2*)node)->r)); break; + } case WP_ADD_VP: - result = node->lvp.v + *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->rip.i; + result = node->lvp.v + x[i]; +#else + result = node->lvp.v + *(node->rip.p); +#endif break; + } case WP_ADD_PP: - result = *(node->lvp.p) + *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->lvp.ip.i; + int j = node->rip.i; + result = x[i] + x[j]; +#else + result = *(node->lvp.ip.p) + *(node->rip.p); +#endif break; + } case WP_SUB_VP: - result = node->lvp.v - *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->rip.i; + result = node->lvp.v - x[i]; +#else + result = node->lvp.v - *(node->rip.p); +#endif break; + } case WP_SUB_PP: - result = *(node->lvp.p) - *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->lvp.ip.i; + int j = node->rip.i; + result = x[i] - x[j]; +#else + result = *(node->lvp.ip.p) - *(node->rip.p); +#endif break; + } case WP_MUL_VP: - result = node->lvp.v * *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->rip.i; + result = node->lvp.v * x[i]; +#else + result = node->lvp.v * *(node->rip.p); +#endif break; + } case WP_MUL_PP: - result = *(node->lvp.p) * *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->lvp.ip.i; + int j = node->rip.i; + result = x[i] * x[j]; +#else + result = *(node->lvp.ip.p) * *(node->rip.p); +#endif break; + } case WP_DIV_VP: - result = node->lvp.v / *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->rip.i; + result = node->lvp.v / x[i]; +#else + result = node->lvp.v / *(node->rip.p); +#endif break; + } case WP_DIV_PP: - result = *(node->lvp.p) / *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->lvp.ip.i; + int j = node->rip.i; + result = x[i] / x[j]; +#else + result = *(node->lvp.ip.p) / *(node->rip.p); +#endif break; + } case WP_NEG_P: - result = -*(node->lvp.p); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->rip.i; + result = -x[i]; +#else + result = -*(node->lvp.ip.p); +#endif break; + } default: yyerror("wp_ast_eval: unknown node type %d\n", node->type); } diff --git a/Source/Parser/wp_parser_y.c b/Source/Parser/wp_parser_y.c index 46cb199db..259f9368b 100644 --- a/Source/Parser/wp_parser_y.c +++ b/Source/Parser/wp_parser_y.c @@ -6,6 +6,8 @@ #include "wp_parser_y.h" #include "wp_parser.tab.h" +#include <AMReX_GpuQualifiers.H> + static struct wp_node* wp_root = NULL; /* This is called by a bison rule to store the original AST in a @@ -33,7 +35,7 @@ wp_makesymbol (char* name) struct wp_symbol* symbol = (struct wp_symbol*) malloc(sizeof(struct wp_symbol)); symbol->type = WP_SYMBOL; symbol->name = strdup(name); - symbol->pointer = NULL; + symbol->ip.p = NULL; return symbol; } @@ -74,13 +76,19 @@ wp_newf2 (enum wp_f2_t ftype, struct wp_node* l, struct wp_node* r) return (struct wp_node*) tmp; } +AMREX_GPU_HOST_DEVICE void yyerror (char const *s, ...) { va_list vl; va_start(vl, s); +#ifdef AMREX_DEVICE_COMPILE + printf(s,"\n"); + assert(0); +#else vfprintf(stderr, s, vl); fprintf(stderr, "\n"); +#endif va_end(vl); } @@ -97,7 +105,7 @@ wp_parser_new (void) my_parser->ast = wp_parser_ast_dup(my_parser, wp_root,1); /* 1: free the source wp_root */ - if (my_parser->p_root + my_parser->sz_mempool != my_parser->p_free) { + if ((char*)my_parser->p_root + my_parser->sz_mempool != (char*)my_parser->p_free) { yyerror("wp_parser_new: error in memory size"); exit(1); } @@ -145,6 +153,7 @@ wp_parser_dup (struct wp_parser* source) return dest; } +AMREX_GPU_HOST_DEVICE double wp_call_f1 (enum wp_f1_t type, double a) { @@ -175,6 +184,7 @@ wp_call_f1 (enum wp_f1_t type, double a) } } +AMREX_GPU_HOST_DEVICE double wp_call_f2 (enum wp_f2_t type, double a, double b) { @@ -346,23 +356,23 @@ wp_parser_ast_dup (struct wp_parser* my_parser, struct wp_node* node, int move) #define WP_MOVEUP_R(node, v) \ struct wp_node* n = node->r->r; \ - double* p = node->r->rp; \ + double* p = node->r->rip.p; \ node->r = n; \ node->lvp.v = v; \ - node->rp = p; + node->rip.p = p; #define WP_MOVEUP_L(node, v) \ struct wp_node* n = node->l->r; \ - double* p = node->l->rp; \ + double* p = node->l->rip.p; \ node->r = n; \ node->lvp.v = v; \ - node->rp = p; + node->rip.p = p; #define WP_EVAL_R(node) node->r->lvp.v #define WP_EVAL_L(node) node->l->lvp.v #define WP_NEG_MOVEUP(node) \ node->r = node->l->r; \ node->lvp.v = -node->l->lvp.v; \ - node->rp = node->l->rp; + node->rip.p = node->l->rip.p; void wp_ast_optimize (struct wp_node* node) @@ -391,22 +401,22 @@ wp_ast_optimize (struct wp_node* node) node->r->type == WP_SYMBOL) { node->lvp.v = ((struct wp_number*)(node->l))->value; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_ADD_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_NUMBER) { node->lvp.v = ((struct wp_number*)(node->r))->value; - node->rp = ((struct wp_symbol*)(node->l))->pointer; + node->rip.p = ((struct wp_symbol*)(node->l))->ip.p; node->r = node->l; node->type = WP_ADD_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_SYMBOL) { - node->lvp.p = ((struct wp_symbol*)(node->l))->pointer; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_ADD_PP; } else if (node->l->type == WP_NUMBER && @@ -454,22 +464,22 @@ wp_ast_optimize (struct wp_node* node) node->r->type == WP_SYMBOL) { node->lvp.v = ((struct wp_number*)(node->l))->value; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_SUB_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_NUMBER) { node->lvp.v = -((struct wp_number*)(node->r))->value; - node->rp = ((struct wp_symbol*)(node->l))->pointer; + node->rip.p = ((struct wp_symbol*)(node->l))->ip.p; node->r = node->l; node->type = WP_ADD_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_SYMBOL) { - node->lvp.p = ((struct wp_symbol*)(node->l))->pointer; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_SUB_PP; } else if (node->l->type == WP_NUMBER && @@ -517,22 +527,22 @@ wp_ast_optimize (struct wp_node* node) node->r->type == WP_SYMBOL) { node->lvp.v = ((struct wp_number*)(node->l))->value; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_MUL_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_NUMBER) { node->lvp.v = ((struct wp_number*)(node->r))->value; - node->rp = ((struct wp_symbol*)(node->l))->pointer; + node->rip.p = ((struct wp_symbol*)(node->l))->ip.p; node->r = node->l; node->type = WP_MUL_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_SYMBOL) { - node->lvp.p = ((struct wp_symbol*)(node->l))->pointer; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_MUL_PP; } else if (node->l->type == WP_NUMBER && @@ -580,22 +590,22 @@ wp_ast_optimize (struct wp_node* node) node->r->type == WP_SYMBOL) { node->lvp.v = ((struct wp_number*)(node->l))->value; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_DIV_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_NUMBER) { node->lvp.v = 1./((struct wp_number*)(node->r))->value; - node->rp = ((struct wp_symbol*)(node->l))->pointer; + node->rip.p = ((struct wp_symbol*)(node->l))->ip.p; node->r = node->l; node->type = WP_MUL_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_SYMBOL) { - node->lvp.p = ((struct wp_symbol*)(node->l))->pointer; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_DIV_PP; } else if (node->l->type == WP_NUMBER && @@ -637,7 +647,7 @@ wp_ast_optimize (struct wp_node* node) } else if (node->l->type == WP_SYMBOL) { - node->lvp.p = ((struct wp_symbol*)(node->l))->pointer; + node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p; node->type = WP_NEG_P; } else if (node->l->type == WP_ADD_VP) @@ -936,7 +946,7 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p) break; case WP_SYMBOL: if (strcmp(name, ((struct wp_symbol*)node)->name) == 0) { - ((struct wp_symbol*)node)->pointer = p; + ((struct wp_symbol*)node)->ip.p = p; } break; case WP_ADD: @@ -961,11 +971,11 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p) case WP_MUL_VP: case WP_DIV_VP: wp_ast_regvar(node->r, name, p); - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; break; case WP_NEG_P: wp_ast_regvar(node->l, name, p); - node->lvp.p = ((struct wp_symbol*)(node->l))->pointer; + node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p; break; case WP_ADD_PP: case WP_SUB_PP: @@ -973,8 +983,8 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p) case WP_DIV_PP: wp_ast_regvar(node->l, name, p); wp_ast_regvar(node->r, name, p); - node->lvp.p = ((struct wp_symbol*)(node->l))->pointer; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; break; default: yyerror("wp_ast_regvar: unknown node type %d\n", node->type); @@ -982,6 +992,61 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p) } } +void +wp_ast_regvar_gpu (struct wp_node* node, char const* name, int i) +{ + switch (node->type) + { + case WP_NUMBER: + break; + case WP_SYMBOL: + if (strcmp(name, ((struct wp_symbol*)node)->name) == 0) { + ((struct wp_symbol*)node)->ip.i = i; + } + break; + case WP_ADD: + case WP_SUB: + case WP_MUL: + case WP_DIV: + wp_ast_regvar_gpu(node->l, name, i); + wp_ast_regvar_gpu(node->r, name, i); + break; + case WP_NEG: + wp_ast_regvar_gpu(node->l, name, i); + break; + case WP_F1: + wp_ast_regvar_gpu(node->l, name, i); + break; + case WP_F2: + wp_ast_regvar_gpu(node->l, name, i); + wp_ast_regvar_gpu(node->r, name, i); + break; + case WP_ADD_VP: + case WP_SUB_VP: + case WP_MUL_VP: + case WP_DIV_VP: + wp_ast_regvar_gpu(node->r, name, i); + node->rip.i = ((struct wp_symbol*)(node->r))->ip.i; + break; + case WP_NEG_P: + wp_ast_regvar_gpu(node->l, name, i); + node->lvp.ip.i = ((struct wp_symbol*)(node->l))->ip.i; + break; + case WP_ADD_PP: + case WP_SUB_PP: + case WP_MUL_PP: + case WP_DIV_PP: + wp_ast_regvar_gpu(node->l, name, i); + wp_ast_regvar_gpu(node->r, name, i); + node->lvp.ip.i = ((struct wp_symbol*)(node->l))->ip.i; + node->rip.i = ((struct wp_symbol*)(node->r))->ip.i; + break; + default: + yyerror("wp_ast_regvar_gpu: unknown node type %d\n", node->type); + exit(1); + } +} + void wp_ast_setconst (struct wp_node* node, char const* name, double c) { switch (node->type) @@ -1040,6 +1105,12 @@ wp_parser_regvar (struct wp_parser* parser, char const* name, double* p) } void +wp_parser_regvar_gpu (struct wp_parser* parser, char const* name, int i) +{ + wp_ast_regvar_gpu(parser->ast, name, i); +} + +void wp_parser_setconst (struct wp_parser* parser, char const* name, double c) { wp_ast_setconst(parser->ast, name, c); diff --git a/Source/Parser/wp_parser_y.h b/Source/Parser/wp_parser_y.h index 4a3aeda40..8c9f8e4e4 100644 --- a/Source/Parser/wp_parser_y.h +++ b/Source/Parser/wp_parser_y.h @@ -1,6 +1,8 @@ #ifndef WP_PARSER_Y_H_ #define WP_PARSER_Y_H_ +#include <AMReX_GpuQualifiers.H> + #ifdef __cplusplus #include <cstdlib> extern "C" { @@ -73,17 +75,22 @@ enum wp_node_t { * wp_node_t type can be safely checked to determine their real type. */ -union wp_vp { - double v; +union wp_ip { + int i; double* p; }; +union wp_vp { + double v; + union wp_ip ip; +}; + struct wp_node { enum wp_node_t type; struct wp_node* l; struct wp_node* r; union wp_vp lvp; // After optimization, this may store left value/pointer. - double* rp; // this may store right pointer. + union wp_ip rip; // this may store right pointer. }; struct wp_number { @@ -94,7 +101,7 @@ struct wp_number { struct wp_symbol { enum wp_node_t type; char* name; - double* pointer; + union wp_ip ip; }; struct wp_f1 { /* Builtin functions with one argument */ @@ -124,6 +131,7 @@ struct wp_node* wp_newf1 (enum wp_f1_t ftype, struct wp_node* l); struct wp_node* wp_newf2 (enum wp_f2_t ftype, struct wp_node* l, struct wp_node* r); +AMREX_GPU_HOST_DEVICE void yyerror (char const *s, ...); /*******************************************************************/ @@ -146,6 +154,7 @@ struct wp_parser* wp_parser_dup (struct wp_parser* source); struct wp_node* wp_parser_ast_dup (struct wp_parser* parser, struct wp_node* src, int move); void wp_parser_regvar (struct wp_parser* parser, char const* name, double* p); +void wp_parser_regvar_gpu (struct wp_parser* parser, char const* name, int i); void wp_parser_setconst (struct wp_parser* parser, char const* name, double c); /* We need to walk the tree in these functions */ @@ -153,10 +162,11 @@ void wp_ast_optimize (struct wp_node* node); size_t wp_ast_size (struct wp_node* node); void wp_ast_print (struct wp_node* node); void wp_ast_regvar (struct wp_node* node, char const* name, double* p); +void wp_ast_regvar_gpu (struct wp_node* node, char const* name, int i); void wp_ast_setconst (struct wp_node* node, char const* name, double c); -double wp_call_f1 (enum wp_f1_t type, double a); -double wp_call_f2 (enum wp_f2_t type, double a, double b); +AMREX_GPU_HOST_DEVICE double wp_call_f1 (enum wp_f1_t type, double a); +AMREX_GPU_HOST_DEVICE double wp_call_f2 (enum wp_f2_t type, double a, double b); #ifdef __cplusplus } diff --git a/Source/Particles/Deposition/ChargeDeposition.H b/Source/Particles/Deposition/ChargeDeposition.H new file mode 100755 index 000000000..a6573b7ab --- /dev/null +++ b/Source/Particles/Deposition/ChargeDeposition.H @@ -0,0 +1,97 @@ +#ifndef CHARGEDEPOSITION_H_ +#define CHARGEDEPOSITION_H_ + +#include "ShapeFactors.H" + +/* \brief Charge Deposition for thread thread_num + * /param xp, yp, zp : Pointer to arrays of particle positions. + * \param wp : Pointer to array of particle weights. + * \param rho_arr : Array4 of charge density, either full array or tile. + * \param np_to_depose : Number of particles for which current is deposited. + * \param dx : 3D cell size + * \param xyzmin : Physical lower bounds of domain. + * \param lo : Index lower bounds of domain. + * /param q : species charge. + */ +template <int depos_order> +void doChargeDepositionShapeN(const amrex::Real * const xp, + const amrex::Real * const yp, + const amrex::Real * const zp, + const amrex::Real * const wp, + const amrex::Array4<amrex::Real>& rho_arr, + const long np_to_depose, + const std::array<amrex::Real,3>& dx, + const std::array<amrex::Real, 3> xyzmin, + const amrex::Dim3 lo, + const amrex::Real q) +{ + const amrex::Real dxi = 1.0/dx[0]; + const amrex::Real dzi = 1.0/dx[2]; +#if (AMREX_SPACEDIM == 2) + const amrex::Real invvol = dxi*dzi; +#elif (defined WARPX_DIM_3D) + const amrex::Real dyi = 1.0/dx[1]; + const amrex::Real invvol = dxi*dyi*dzi; +#endif + + const amrex::Real xmin = xyzmin[0]; + const amrex::Real ymin = xyzmin[1]; + const amrex::Real zmin = xyzmin[2]; + + // Loop over particles and deposit into rho_arr + amrex::ParallelFor( + np_to_depose, + [=] AMREX_GPU_DEVICE (long ip) { + // --- Get particle quantities + const amrex::Real wq = q*wp[ip]*invvol; + + // --- Compute shape factors + // x direction + // Get particle position in grid coordinates +#if (defined WARPX_DIM_RZ) + const amrex::Real r = std::sqrt(xp[ip]*xp[ip] + yp[ip]*yp[ip]); + const amrex::Real x = (r - xmin)*dxi; +#else + const amrex::Real x = (xp[ip] - xmin)*dxi; +#endif + // Compute shape factors for node-centered quantities + amrex::Real AMREX_RESTRICT sx[depos_order + 1]; + // i: leftmost grid point (node-centered) that the particle touches + const int i = compute_shape_factor<depos_order>(sx, x); + +#if (defined WARPX_DIM_3D) + // y direction + const amrex::Real y = (yp[ip] - ymin)*dyi; + amrex::Real AMREX_RESTRICT sy[depos_order + 1]; + const int j = compute_shape_factor<depos_order>(sy, y); +#endif + // z direction + const amrex::Real z = (zp[ip] - zmin)*dzi; + amrex::Real AMREX_RESTRICT sz[depos_order + 1]; + const int k = compute_shape_factor<depos_order>(sz, z); + + // Deposit charge into rho_arr +#if (defined WARPX_DIM_2D) || (defined WARPX_DIM_RZ) + for (int iz=0; iz<=depos_order; iz++){ + for (int ix=0; ix<=depos_order; ix++){ + amrex::Gpu::Atomic::Add( + &rho_arr(lo.x+i+ix, lo.y+k+iz, 0), + sx[ix]*sz[iz]*wq); + } + } +#elif (defined WARPX_DIM_3D) + for (int iz=0; iz<=depos_order; iz++){ + for (int iy=0; iy<=depos_order; iy++){ + for (int ix=0; ix<=depos_order; ix++){ + amrex::Gpu::Atomic::Add( + &rho_arr(lo.x+i+ix, lo.y+j+iy, lo.z+k+iz), + sx[ix]*sy[iy]*sz[iz]*wq); + } + } + } +#endif + } + ); +} + +#endif // CHARGEDEPOSITION_H_ diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index 97bc53c20..4a392b57e 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -1,52 +1,7 @@ #ifndef CURRENTDEPOSITION_H_ #define CURRENTDEPOSITION_H_ -using namespace amrex; - -// Compute shape factor and return index of leftmost cell where -// particle writes. -// Specialized templates are defined below for orders 1, 2 and 3. -template <int depos_order> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shape_factor(Real* const sx, Real xint); - -// Compute shape factor for order 1. -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shape_factor <1> (Real* const sx, Real xmid){ - int j = (int) xmid; - Real xint = xmid-j; - sx[0] = 1.0 - xint; - sx[1] = xint; - return j; -} - -// Compute shape factor for order 2. -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shape_factor <2> (Real* const sx, Real xmid){ - int j = (int) (xmid+0.5); - Real xint = xmid-j; - sx[0] = 0.5*(0.5-xint)*(0.5-xint); - sx[1] = 0.75-xint*xint; - sx[2] = 0.5*(0.5+xint)*(0.5+xint); - // index of the leftmost cell where particle deposits - return j-1; -} - -// Compute shape factor for order 3. -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shape_factor <3> (Real* const sx, Real xmid){ - int j = (int) xmid; - Real xint = xmid-j; - sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); - sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0); - sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); - sx[3] = 1.0/6.0*xint*xint*xint; - // index of the leftmost cell where particle deposits - return j-1; -} +#include "ShapeFactors.H" /* \brief Current Deposition for thread thread_num * /param xp, yp, zp : Pointer to arrays of particle positions. @@ -55,9 +10,7 @@ int compute_shape_factor <3> (Real* const sx, Real xmid){ * \param jx_arr : Array4 of current density, either full array or tile. * \param jy_arr : Array4 of current density, either full array or tile. * \param jz_arr : Array4 of current density, either full array or tile. - * \param offset : Index of first particle for which current is deposited * \param np_to_depose : Number of particles for which current is deposited. - Particles [offset,offset+np_tp_depose] deposit current. * \param dt : Time step for particle level * \param dx : 3D cell size * \param xyzmin : Physical lower bounds of domain. @@ -66,164 +19,144 @@ int compute_shape_factor <3> (Real* const sx, Real xmid){ * /param q : species charge. */ template <int depos_order> -void doDepositionShapeN(const Real * const xp, const Real * const yp, const Real * const zp, - const Real * const wp, const Real * const uxp, - const Real * const uyp, const Real * const uzp, - const amrex::Array4<amrex::Real>& jx_arr, - const amrex::Array4<amrex::Real>& jy_arr, +void doDepositionShapeN(const amrex::Real * const xp, + const amrex::Real * const yp, + const amrex::Real * const zp, + const amrex::Real * const wp, + const amrex::Real * const uxp, + const amrex::Real * const uyp, + const amrex::Real * const uzp, + const amrex::Array4<amrex::Real>& jx_arr, + const amrex::Array4<amrex::Real>& jy_arr, const amrex::Array4<amrex::Real>& jz_arr, - const long offset, const long np_to_depose, - const amrex::Real dt, const std::array<amrex::Real,3>& dx, - const std::array<Real, 3> xyzmin, - const Dim3 lo, + const long np_to_depose, const amrex::Real dt, + const std::array<amrex::Real,3>& dx, + const std::array<amrex::Real, 3> xyzmin, + const amrex::Dim3 lo, const amrex::Real stagger_shift, const amrex::Real q) { - const Real dxi = 1.0/dx[0]; - const Real dzi = 1.0/dx[2]; - const Real dts2dx = 0.5*dt*dxi; - const Real dts2dz = 0.5*dt*dzi; + const amrex::Real dxi = 1.0/dx[0]; + const amrex::Real dzi = 1.0/dx[2]; + const amrex::Real dts2dx = 0.5*dt*dxi; + const amrex::Real dts2dz = 0.5*dt*dzi; #if (AMREX_SPACEDIM == 2) - const Real invvol = dxi*dzi; -#else // (AMREX_SPACEDIM == 3) - const Real dyi = 1.0/dx[1]; - const Real dts2dy = 0.5*dt*dyi; - const Real invvol = dxi*dyi*dzi; + const amrex::Real invvol = dxi*dzi; +#elif (defined WARPX_DIM_3D) + const amrex::Real dyi = 1.0/dx[1]; + const amrex::Real dts2dy = 0.5*dt*dyi; + const amrex::Real invvol = dxi*dyi*dzi; #endif - const Real xmin = xyzmin[0]; - const Real ymin = xyzmin[1]; - const Real zmin = xyzmin[2]; - const Real clightsq = 1.0/PhysConst::c/PhysConst::c; + const amrex::Real xmin = xyzmin[0]; + const amrex::Real ymin = xyzmin[1]; + const amrex::Real zmin = xyzmin[2]; + const amrex::Real clightsq = 1.0/PhysConst::c/PhysConst::c; // Loop over particles and deposit into jx_arr, jy_arr and jz_arr - ParallelFor( np_to_depose, - [=] AMREX_GPU_DEVICE (long ip) { - // --- Get particle quantities - const Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq - + uyp[ip]*uyp[ip]*clightsq - + uzp[ip]*uzp[ip]*clightsq); - const Real wq = q*wp[ip]; - const Real vx = uxp[ip]*gaminv; - const Real vy = uyp[ip]*gaminv; - const Real vz = uzp[ip]*gaminv; - // wqx, wqy wqz are particle current in each direction - const Real wqx = wq*invvol*vx; - const Real wqy = wq*invvol*vy; - const Real wqz = wq*invvol*vz; - - // --- Compute shape factors - // x direction - // Get particle position after 1/2 push back in position - const Real xmid = (xp[ip]-xmin)*dxi-dts2dx*vx; - // Compute shape factors for node-centered quantities - Real AMREX_RESTRICT sx [depos_order + 1]; - // j: leftmost grid point (node-centered) that the particle touches - const int j = compute_shape_factor<depos_order>(sx, xmid); - // Compute shape factors for cell-centered quantities - Real AMREX_RESTRICT sx0[depos_order + 1]; - // j0: leftmost grid point (cell-centered) that the particle touches - const int j0 = compute_shape_factor<depos_order>(sx0, xmid-stagger_shift); + amrex::ParallelFor( + np_to_depose, + [=] AMREX_GPU_DEVICE (long ip) { + // --- Get particle quantities + const amrex::Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + const amrex::Real wq = q*wp[ip]; + const amrex::Real vx = uxp[ip]*gaminv; + const amrex::Real vy = uyp[ip]*gaminv; + const amrex::Real vz = uzp[ip]*gaminv; + // wqx, wqy wqz are particle current in each direction +#if (defined WARPX_DIM_RZ) + // In RZ, wqx is actually wqr, and wqy is wqtheta + // Convert to cylinderical at the mid point + const amrex::Real xpmid = xp[ip] - 0.5*dt*vx; + const amrex::Real ypmid = yp[ip] - 0.5*dt*vy; + const amrex::Real rpmid = std::sqrt(xpmid*xpmid + ypmid*ypmid); + amrex::Real costheta; + amrex::Real sintheta; + if (rpmid > 0.) { + costheta = xpmid/rpmid; + sintheta = ypmid/rpmid; + } else { + costheta = 1.; + sintheta = 0.; + } + const amrex::Real wqx = wq*invvol*(+vx*costheta + vy*sintheta); + const amrex::Real wqy = wq*invvol*(-vx*sintheta + vy*costheta); +#else + const amrex::Real wqx = wq*invvol*vx; + const amrex::Real wqy = wq*invvol*vy; +#endif + const amrex::Real wqz = wq*invvol*vz; + + // --- Compute shape factors + // x direction + // Get particle position after 1/2 push back in position +#if (defined WARPX_DIM_RZ) + const amrex::Real xmid = (rpmid-xmin)*dxi; +#else + const amrex::Real xmid = (xp[ip]-xmin)*dxi-dts2dx*vx; +#endif + // Compute shape factors for node-centered quantities + amrex::Real AMREX_RESTRICT sx [depos_order + 1]; + // j: leftmost grid point (node-centered) that the particle touches + const int j = compute_shape_factor<depos_order>(sx, xmid); + // Compute shape factors for cell-centered quantities + amrex::Real AMREX_RESTRICT sx0[depos_order + 1]; + // j0: leftmost grid point (cell-centered) that the particle touches + const int j0 = compute_shape_factor<depos_order>(sx0, xmid-stagger_shift); -#if (AMREX_SPACEDIM == 3) - // y direction - const Real ymid= (yp[ip]-ymin)*dyi-dts2dy*vy; - Real AMREX_RESTRICT sy [depos_order + 1]; - const int k = compute_shape_factor<depos_order>(sy, ymid); - Real AMREX_RESTRICT sy0[depos_order + 1]; - const int k0 = compute_shape_factor<depos_order>(sy0, ymid-stagger_shift); +#if (defined WARPX_DIM_3D) + // y direction + const amrex::Real ymid= (yp[ip]-ymin)*dyi-dts2dy*vy; + amrex::Real AMREX_RESTRICT sy [depos_order + 1]; + const int k = compute_shape_factor<depos_order>(sy, ymid); + amrex::Real AMREX_RESTRICT sy0[depos_order + 1]; + const int k0 = compute_shape_factor<depos_order>(sy0, ymid-stagger_shift); #endif - // z direction - const Real zmid= (zp[ip]-zmin)*dzi-dts2dz*vz; - Real AMREX_RESTRICT sz [depos_order + 1]; - const int l = compute_shape_factor<depos_order>(sz, zmid); - Real AMREX_RESTRICT sz0[depos_order + 1]; - const int l0 = compute_shape_factor<depos_order>(sz0, zmid-stagger_shift); - - // Deposit current into jx_arr, jy_arr and jz_arr -#if (AMREX_SPACEDIM == 2) - for (int iz=0; iz<=depos_order; iz++){ - for (int ix=0; ix<=depos_order; ix++){ - amrex::Gpu::Atomic::Add( - &jx_arr(lo.x+j0+ix, lo.y+l +iz, 0), - sx0[ix]*sz [iz]*wqx); - amrex::Gpu::Atomic::Add( - &jy_arr(lo.x+j +ix, lo.y+l +iz, 0), - sx [ix]*sz [iz]*wqy); - amrex::Gpu::Atomic::Add( - &jz_arr(lo.x+j +ix, lo.y+l0+iz, 0), - sx [ix]*sz0[iz]*wqz); - } - } -#else // (AMREX_SPACEDIM == 3) - for (int iz=0; iz<=depos_order; iz++){ - for (int iy=0; iy<=depos_order; iy++){ - for (int ix=0; ix<=depos_order; ix++){ - amrex::Gpu::Atomic::Add( - &jx_arr(lo.x+j0+ix, lo.y+k +iy, lo.z+l +iz), - sx0[ix]*sy [iy]*sz [iz]*wqx); - amrex::Gpu::Atomic::Add( - &jy_arr(lo.x+j +ix, lo.y+k0+iy, lo.z+l +iz), - sx [ix]*sy0[iy]*sz [iz]*wqy); - amrex::Gpu::Atomic::Add( - &jz_arr(lo.x+j +ix, lo.y+k +iy, lo.z+l0+iz), - sx [ix]*sy [iy]*sz0[iz]*wqz); - } - } - } + // z direction + const amrex::Real zmid= (zp[ip]-zmin)*dzi-dts2dz*vz; + amrex::Real AMREX_RESTRICT sz [depos_order + 1]; + const int l = compute_shape_factor<depos_order>(sz, zmid); + amrex::Real AMREX_RESTRICT sz0[depos_order + 1]; + const int l0 = compute_shape_factor<depos_order>(sz0, zmid-stagger_shift); + + // Deposit current into jx_arr, jy_arr and jz_arr +#if (defined WARPX_DIM_2D) || (defined WARPX_DIM_RZ) + for (int iz=0; iz<=depos_order; iz++){ + for (int ix=0; ix<=depos_order; ix++){ + amrex::Gpu::Atomic::Add( + &jx_arr(lo.x+j0+ix, lo.y+l +iz, 0), + sx0[ix]*sz [iz]*wqx); + amrex::Gpu::Atomic::Add( + &jy_arr(lo.x+j +ix, lo.y+l +iz, 0), + sx [ix]*sz [iz]*wqy); + amrex::Gpu::Atomic::Add( + &jz_arr(lo.x+j +ix, lo.y+l0+iz, 0), + sx [ix]*sz0[iz]*wqz); + } + } +#elif (defined WARPX_DIM_3D) + for (int iz=0; iz<=depos_order; iz++){ + for (int iy=0; iy<=depos_order; iy++){ + for (int ix=0; ix<=depos_order; ix++){ + amrex::Gpu::Atomic::Add( + &jx_arr(lo.x+j0+ix, lo.y+k +iy, lo.z+l +iz), + sx0[ix]*sy [iy]*sz [iz]*wqx); + amrex::Gpu::Atomic::Add( + &jy_arr(lo.x+j +ix, lo.y+k0+iy, lo.z+l +iz), + sx [ix]*sy0[iy]*sz [iz]*wqy); + amrex::Gpu::Atomic::Add( + &jz_arr(lo.x+j +ix, lo.y+k +iy, lo.z+l0+iz), + sx [ix]*sy [iy]*sz0[iz]*wqz); + } + } + } #endif - } + } ); } -// Compute shape factor and return index of leftmost cell where -// particle writes. -// Specialized templates are defined below for orders 1, 2 and 3. -template <int depos_order> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shifted_shape_factor (Real* const sx, const Real x_old, const int i_new); - -// Compute shape factor for order 1. -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shifted_shape_factor <1> (Real* const sx, const Real x_old, const int i_new){ - const int i = (int) x_old; - const int i_shift = i - i_new; - const Real xint = x_old - i; - sx[1+i_shift] = 1.0 - xint; - sx[2+i_shift] = xint; - return i; -} - -// Compute shape factor for order 2. -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shifted_shape_factor <2> (Real* const sx, const Real x_old, const int i_new){ - const int i = (int) (x_old+0.5); - const int i_shift = i - (i_new + 1); - const Real xint = x_old - i; - sx[1+i_shift] = 0.5*(0.5-xint)*(0.5-xint); - sx[2+i_shift] = 0.75-xint*xint; - sx[3+i_shift] = 0.5*(0.5+xint)*(0.5+xint); - // index of the leftmost cell where particle deposits - return i-1; -} - -// Compute shape factor for order 3. -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shifted_shape_factor <3> (Real* const sx, const Real x_old, const int i_new){ - const int i = (int) x_old; - const int i_shift = i - (i_new + 1); - const Real xint = x_old - i; - sx[1+i_shift] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); - sx[2+i_shift] = 2.0/3.0-xint*xint*(1-xint/2.0); - sx[3+i_shift] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); - sx[4+i_shift] = 1.0/6.0*xint*xint*xint; - // index of the leftmost cell where particle deposits - return i-1; -} - /* \brief Esirkepov Current Deposition for thread thread_num * /param xp, yp, zp : Pointer to arrays of particle positions. * \param wp : Pointer to array of particle weights. @@ -239,170 +172,197 @@ int compute_shifted_shape_factor <3> (Real* const sx, const Real x_old, const in * /param q : species charge. */ template <int depos_order> -void doEsirkepovDepositionShapeN (const Real * const xp, const Real * const yp, const Real * const zp, - const Real * const wp, const Real * const uxp, - const Real * const uyp, const Real * const uzp, +void doEsirkepovDepositionShapeN (const amrex::Real * const xp, + const amrex::Real * const yp, + const amrex::Real * const zp, + const amrex::Real * const wp, + const amrex::Real * const uxp, + const amrex::Real * const uyp, + const amrex::Real * const uzp, const amrex::Array4<amrex::Real>& Jx_arr, const amrex::Array4<amrex::Real>& Jy_arr, const amrex::Array4<amrex::Real>& Jz_arr, const long np_to_depose, - const amrex::Real dt, const std::array<amrex::Real,3>& dx, - const std::array<Real, 3> xyzmin, - const Dim3 lo, + const amrex::Real dt, + const std::array<amrex::Real,3>& dx, + const std::array<amrex::Real, 3> xyzmin, + const amrex::Dim3 lo, const amrex::Real q) { - const Real dxi = 1.0/dx[0]; - const Real dtsdx0 = dt*dxi; - const Real xmin = xyzmin[0]; -#if (AMREX_SPACEDIM == 3) - const Real dyi = 1.0/dx[1]; - const Real dtsdy0 = dt*dyi; - const Real ymin = xyzmin[1]; + const amrex::Real dxi = 1.0/dx[0]; + const amrex::Real dtsdx0 = dt*dxi; + const amrex::Real xmin = xyzmin[0]; +#if (defined WARPX_DIM_3D) + const amrex::Real dyi = 1.0/dx[1]; + const amrex::Real dtsdy0 = dt*dyi; + const amrex::Real ymin = xyzmin[1]; #endif - const Real dzi = 1.0/dx[2]; - const Real dtsdz0 = dt*dzi; - const Real zmin = xyzmin[2]; - -#if (AMREX_SPACEDIM == 3) - const Real invdtdx = 1.0/(dt*dx[1]*dx[2]); - const Real invdtdy = 1.0/(dt*dx[0]*dx[2]); - const Real invdtdz = 1.0/(dt*dx[0]*dx[1]); -#elif (AMREX_SPACEDIM == 2) - const Real invdtdx = 1.0/(dt*dx[2]); - const Real invdtdz = 1.0/(dt*dx[0]); - const Real invvol = 1.0/(dx[0]*dx[2]); + const amrex::Real dzi = 1.0/dx[2]; + const amrex::Real dtsdz0 = dt*dzi; + const amrex::Real zmin = xyzmin[2]; + +#if (defined WARPX_DIM_3D) + const amrex::Real invdtdx = 1.0/(dt*dx[1]*dx[2]); + const amrex::Real invdtdy = 1.0/(dt*dx[0]*dx[2]); + const amrex::Real invdtdz = 1.0/(dt*dx[0]*dx[1]); +#elif (defined WARPX_DIM_2D) || (defined WARPX_DIM_RZ) + const amrex::Real invdtdx = 1.0/(dt*dx[2]); + const amrex::Real invdtdz = 1.0/(dt*dx[0]); + const amrex::Real invvol = 1.0/(dx[0]*dx[2]); #endif - const Real clightsq = 1.0/PhysConst::c/PhysConst::c; + const amrex::Real clightsq = 1.0/PhysConst::c/PhysConst::c; // Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr - ParallelFor( np_to_depose, - [=] AMREX_GPU_DEVICE (long ip) { - - // --- Get particle quantities - const Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq - + uyp[ip]*uyp[ip]*clightsq - + uzp[ip]*uzp[ip]*clightsq); - - // wqx, wqy wqz are particle current in each direction - const Real wq = q*wp[ip]; - const Real wqx = wq*invdtdx; -#if (AMREX_SPACEDIM == 3) - const Real wqy = wq*invdtdy; + amrex::ParallelFor( + np_to_depose, + [=] AMREX_GPU_DEVICE (long ip) { + + // --- Get particle quantities + const amrex::Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + + // wqx, wqy wqz are particle current in each direction + const amrex::Real wq = q*wp[ip]; + const amrex::Real wqx = wq*invdtdx; +#if (defined WARPX_DIM_3D) + const amrex::Real wqy = wq*invdtdy; #endif - const Real wqz = wq*invdtdz; - - // computes current and old position in grid units - const Real x_new = (xp[ip] - xmin)*dxi; - const Real x_old = x_new - dtsdx0*uxp[ip]*gaminv; -#if (AMREX_SPACEDIM == 3) - const Real y_new = (yp[ip] - ymin)*dyi; - const Real y_old = y_new - dtsdy0*uyp[ip]*gaminv; + const amrex::Real wqz = wq*invdtdz; + + // computes current and old position in grid units +#if (defined WARPX_DIM_RZ) + const amrex::Real r_new = std::sqrt(xp[ip]*xp[ip] + yp[ip]*yp[ip]); + const amrex::Real r_old = std::sqrt((xp[ip] - dt*uxp[ip]*gaminv)*(xp[ip] - dt*uxp[ip]*gaminv) + + (yp[ip] - dt*uyp[ip]*gaminv)*(yp[ip] - dt*uyp[ip]*gaminv)); + const amrex::Real x_new = (r_new - xmin)*dxi; + const amrex::Real x_old = (r_old - xmin)*dxi; +#else + const amrex::Real x_new = (xp[ip] - xmin)*dxi; + const amrex::Real x_old = x_new - dtsdx0*uxp[ip]*gaminv; #endif - const Real z_new = (zp[ip] - zmin)*dzi; - const Real z_old = z_new - dtsdz0*uzp[ip]*gaminv; - - // Shape factor arrays - // Note that there are extra values above and below - // to possibly hold the factor for the old particle - // which can be at a different grid location. - Real AMREX_RESTRICT sx_new[depos_order + 3] = {0.}; - Real AMREX_RESTRICT sx_old[depos_order + 3] = {0.}; -#if (AMREX_SPACEDIM == 3) - Real AMREX_RESTRICT sy_new[depos_order + 3] = {0.}; - Real AMREX_RESTRICT sy_old[depos_order + 3] = {0.}; +#if (defined WARPX_DIM_3D) + const amrex::Real y_new = (yp[ip] - ymin)*dyi; + const amrex::Real y_old = y_new - dtsdy0*uyp[ip]*gaminv; +#endif + const amrex::Real z_new = (zp[ip] - zmin)*dzi; + const amrex::Real z_old = z_new - dtsdz0*uzp[ip]*gaminv; + +#if (defined WARPX_DIM_RZ) + amrex::Real costheta; + amrex::Real sintheta; + if (r_new > 0.) { + costheta = xp[ip]/r_new; + sintheta = yp[ip]/r_new; + } else { + costheta = 1.; + sintheta = 0.; + } + const amrex::Real vy = (-uxp[ip]*sintheta + uyp[ip]*costheta)*gaminv; +#elif (defined WARPX_DIM_2D) + const amrex::Real vy = uyp[ip]*gaminv; #endif - Real AMREX_RESTRICT sz_new[depos_order + 3] = {0.}; - Real AMREX_RESTRICT sz_old[depos_order + 3] = {0.}; - // --- Compute shape factors - // Compute shape factors for position as they are now and at old positions - // [ijk]_new: leftmost grid point that the particle touches - const int i_new = compute_shape_factor<depos_order>(sx_new+1, x_new); - const int i_old = compute_shifted_shape_factor<depos_order>(sx_old, x_old, i_new); -#if (AMREX_SPACEDIM == 3) - const int j_new = compute_shape_factor<depos_order>(sy_new+1, y_new); - const int j_old = compute_shifted_shape_factor<depos_order>(sy_old, y_old, j_new); + // Shape factor arrays + // Note that there are extra values above and below + // to possibly hold the factor for the old particle + // which can be at a different grid location. + amrex::Real AMREX_RESTRICT sx_new[depos_order + 3] = {0.}; + amrex::Real AMREX_RESTRICT sx_old[depos_order + 3] = {0.}; +#if (defined WARPX_DIM_3D) + amrex::Real AMREX_RESTRICT sy_new[depos_order + 3] = {0.}; + amrex::Real AMREX_RESTRICT sy_old[depos_order + 3] = {0.}; +#endif + amrex::Real AMREX_RESTRICT sz_new[depos_order + 3] = {0.}; + amrex::Real AMREX_RESTRICT sz_old[depos_order + 3] = {0.}; + + // --- Compute shape factors + // Compute shape factors for position as they are now and at old positions + // [ijk]_new: leftmost grid point that the particle touches + const int i_new = compute_shape_factor<depos_order>(sx_new+1, x_new); + const int i_old = compute_shifted_shape_factor<depos_order>(sx_old, x_old, i_new); +#if (defined WARPX_DIM_3D) + const int j_new = compute_shape_factor<depos_order>(sy_new+1, y_new); + const int j_old = compute_shifted_shape_factor<depos_order>(sy_old, y_old, j_new); #endif - const int k_new = compute_shape_factor<depos_order>(sz_new+1, z_new); - const int k_old = compute_shifted_shape_factor<depos_order>(sz_old, z_old, k_new); - - // computes min/max positions of current contributions - int dil = 1, diu = 1; - if (i_old < i_new) dil = 0; - if (i_old > i_new) diu = 0; -#if (AMREX_SPACEDIM == 3) - int djl = 1, dju = 1; - if (j_old < j_new) djl = 0; - if (j_old > j_new) dju = 0; + const int k_new = compute_shape_factor<depos_order>(sz_new+1, z_new); + const int k_old = compute_shifted_shape_factor<depos_order>(sz_old, z_old, k_new); + + // computes min/max positions of current contributions + int dil = 1, diu = 1; + if (i_old < i_new) dil = 0; + if (i_old > i_new) diu = 0; +#if (defined WARPX_DIM_3D) + int djl = 1, dju = 1; + if (j_old < j_new) djl = 0; + if (j_old > j_new) dju = 0; #endif - int dkl = 1, dku = 1; - if (k_old < k_new) dkl = 0; - if (k_old > k_new) dku = 0; - -#if (AMREX_SPACEDIM == 3) - - for (int k=dkl; k<=depos_order+2-dku; k++) { - for (int j=djl; j<=depos_order+2-dju; j++) { - Real sdxi = 0.; - for (int i=dil; i<=depos_order+1-diu; i++) { - sdxi += wqx*(sx_old[i] - sx_new[i])*((sy_new[j] + 0.5*(sy_old[j] - sy_new[j]))*sz_new[k] + - (0.5*sy_new[j] + 1./3.*(sy_old[j] - sy_new[j]))*(sz_old[k] - sz_new[k])); - amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi); - } - } - } - for (int k=dkl; k<=depos_order+2-dku; k++) { - for (int i=dil; i<=depos_order+2-diu; i++) { - Real sdyj = 0.; - for (int j=djl; j<=depos_order+1-dju; j++) { - sdyj += wqy*(sy_old[j] - sy_new[j])*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] + - (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i])); - amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj); - } - } - } - for (int j=djl; j<=depos_order+2-dju; j++) { - for (int i=dil; i<=depos_order+2-diu; i++) { - Real sdzk = 0.; - for (int k=dkl; k<=depos_order+1-dku; k++) { - sdzk += wqz*(sz_old[k] - sz_new[k])*((sx_new[i] + 0.5*(sx_old[i] - sx_new[i]))*sy_new[j] + - (0.5*sx_new[i] + 1./3.*(sx_old[i] - sx_new[i]))*(sy_old[j] - sy_new[j])); - amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk); - } - } - } - -#elif (AMREX_SPACEDIM == 2) - - for (int k=dkl; k<=depos_order+2-dku; k++) { - Real sdxi = 0.; - for (int i=dil; i<=depos_order+1-diu; i++) { - sdxi += wqx*(sx_old[i] - sx_new[i])*(sz_new[k] + 0.5*(sz_old[k] - sz_new[k])); - amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdxi); - } + int dkl = 1, dku = 1; + if (k_old < k_new) dkl = 0; + if (k_old > k_new) dku = 0; + +#if (defined WARPX_DIM_3D) + + for (int k=dkl; k<=depos_order+2-dku; k++) { + for (int j=djl; j<=depos_order+2-dju; j++) { + amrex::Real sdxi = 0.; + for (int i=dil; i<=depos_order+1-diu; i++) { + sdxi += wqx*(sx_old[i] - sx_new[i])*((sy_new[j] + 0.5*(sy_old[j] - sy_new[j]))*sz_new[k] + + (0.5*sy_new[j] + 1./3.*(sy_old[j] - sy_new[j]))*(sz_old[k] - sz_new[k])); + amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi); } - for (int k=dkl; k<=depos_order+2-dku; k++) { - for (int i=dil; i<=depos_order+2-diu; i++) { - const Real sdyj = wq*uyp[ip]*gaminv*invvol*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] + - (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i])); - amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdyj); - } + } + } + for (int k=dkl; k<=depos_order+2-dku; k++) { + for (int i=dil; i<=depos_order+2-diu; i++) { + amrex::Real sdyj = 0.; + for (int j=djl; j<=depos_order+1-dju; j++) { + sdyj += wqy*(sy_old[j] - sy_new[j])*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] + + (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i])); + amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj); } - for (int i=dil; i<=depos_order+2-diu; i++) { - Real sdzk = 0.; - for (int k=dkl; k<=depos_order+1-dku; k++) { - sdzk += wqz*(sz_old[k] - sz_new[k])*(sx_new[i] + 0.5*(sx_old[i] - sx_new[i])); - amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdzk); - } + } + } + for (int j=djl; j<=depos_order+2-dju; j++) { + for (int i=dil; i<=depos_order+2-diu; i++) { + amrex::Real sdzk = 0.; + for (int k=dkl; k<=depos_order+1-dku; k++) { + sdzk += wqz*(sz_old[k] - sz_new[k])*((sx_new[i] + 0.5*(sx_old[i] - sx_new[i]))*sy_new[j] + + (0.5*sx_new[i] + 1./3.*(sx_old[i] - sx_new[i]))*(sy_old[j] - sy_new[j])); + amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk); } + } + } + +#elif (defined WARPX_DIM_2D) || (defined WARPX_DIM_RZ) + + for (int k=dkl; k<=depos_order+2-dku; k++) { + amrex::Real sdxi = 0.; + for (int i=dil; i<=depos_order+1-diu; i++) { + sdxi += wqx*(sx_old[i] - sx_new[i])*(sz_new[k] + 0.5*(sz_old[k] - sz_new[k])); + amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdxi); + } + } + for (int k=dkl; k<=depos_order+2-dku; k++) { + for (int i=dil; i<=depos_order+2-diu; i++) { + const amrex::Real sdyj = wq*vy*invvol*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] + + (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i])); + amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdyj); + } + } + for (int i=dil; i<=depos_order+2-diu; i++) { + amrex::Real sdzk = 0.; + for (int k=dkl; k<=depos_order+1-dku; k++) { + sdzk += wqz*(sz_old[k] - sz_new[k])*(sx_new[i] + 0.5*(sx_old[i] - sx_new[i])); + amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdzk); + } + } + #endif - } + } ); - - - } #endif // CURRENTDEPOSITION_H_ diff --git a/Source/Particles/Deposition/Make.package b/Source/Particles/Deposition/Make.package index 0d5ebe2a7..e1aace998 100644 --- a/Source/Particles/Deposition/Make.package +++ b/Source/Particles/Deposition/Make.package @@ -1,3 +1,4 @@ CEXE_headers += CurrentDeposition.H +CEXE_headers += ChargeDeposition.H INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Deposition VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles/Deposition diff --git a/Source/Particles/Gather/FieldGather.H b/Source/Particles/Gather/FieldGather.H new file mode 100644 index 000000000..8f5e8d4cf --- /dev/null +++ b/Source/Particles/Gather/FieldGather.H @@ -0,0 +1,216 @@ +#ifndef FIELDGATHER_H_ +#define FIELDGATHER_H_ + +#include "ShapeFactors.H" + +/* \brief Field gather for particles handled by thread thread_num + * /param xp, yp, zp : Pointer to arrays of particle positions. + * \param Exp, Eyp, Ezp: Pointer to array of electric field on particles. + * \param Bxp, Byp, Bzp: Pointer to array of magnetic field on particles. + * \param ex_arr ey_arr: Array4 of current density, either full array or tile. + * \param ez_arr bx_arr: Array4 of current density, either full array or tile. + * \param by_arr bz_arr: Array4 of current density, either full array or tile. + * \param np_to_gather : Number of particles for which field is gathered. + * \param dx : 3D cell size + * \param xyzmin : Physical lower bounds of domain. + * \param lo : Index lower bounds of domain. + * \param stagger_shift: 0 if nodal, 0.5 if staggered. + */ +template <int depos_order, int lower_in_v> +void doGatherShapeN(const amrex::Real * const xp, + const amrex::Real * const yp, + const amrex::Real * const zp, + amrex::Real * const Exp, amrex::Real * const Eyp, + amrex::Real * const Ezp, amrex::Real * const Bxp, + amrex::Real * const Byp, amrex::Real * const Bzp, + const amrex::Array4<const amrex::Real>& ex_arr, + const amrex::Array4<const amrex::Real>& ey_arr, + const amrex::Array4<const amrex::Real>& ez_arr, + const amrex::Array4<const amrex::Real>& bx_arr, + const amrex::Array4<const amrex::Real>& by_arr, + const amrex::Array4<const amrex::Real>& bz_arr, + const long np_to_gather, + const std::array<amrex::Real, 3>& dx, + const std::array<amrex::Real, 3> xyzmin, + const amrex::Dim3 lo, + const amrex::Real stagger_shift) +{ + const amrex::Real dxi = 1.0/dx[0]; + const amrex::Real dzi = 1.0/dx[2]; +#if (AMREX_SPACEDIM == 3) + const amrex::Real dyi = 1.0/dx[1]; +#endif + + const amrex::Real xmin = xyzmin[0]; +#if (AMREX_SPACEDIM == 3) + const amrex::Real ymin = xyzmin[1]; +#endif + const amrex::Real zmin = xyzmin[2]; + + // Loop over particles and gather fields from + // {e,b}{x,y,z}_arr to {E,B}{xyz}p. + amrex::ParallelFor( + np_to_gather, + [=] AMREX_GPU_DEVICE (long ip) { + // --- Compute shape factors + // x direction + // Get particle position +#ifdef WARPX_DIM_RZ + const amrex::Real r = std::sqrt(xp[ip]*xp[ip] + yp[ip]*yp[ip]); + const amrex::Real x = (r - xmin)*dxi; +#else + const amrex::Real x = (xp[ip]-xmin)*dxi; +#endif + // Compute shape factors for node-centered quantities + amrex::Real AMREX_RESTRICT sx [depos_order + 1]; + // j: leftmost grid point (node-centered) that particle touches + const int j = compute_shape_factor<depos_order>(sx, x); + // Compute shape factors for cell-centered quantities + amrex::Real AMREX_RESTRICT sx0[depos_order + 1 - lower_in_v]; + // j0: leftmost grid point (cell-centered) that particle touches + const int j0 = compute_shape_factor<depos_order - lower_in_v>( + sx0, x-stagger_shift); +#if (AMREX_SPACEDIM == 3) + // y direction + const amrex::Real y = (yp[ip]-ymin)*dyi; + amrex::Real AMREX_RESTRICT sy [depos_order + 1]; + const int k = compute_shape_factor<depos_order>(sy, y); + amrex::Real AMREX_RESTRICT sy0[depos_order + 1 - lower_in_v]; + const int k0 = compute_shape_factor<depos_order-lower_in_v>( + sy0, y-stagger_shift); +#endif + // z direction + const amrex::Real z = (zp[ip]-zmin)*dzi; + amrex::Real AMREX_RESTRICT sz [depos_order + 1]; + const int l = compute_shape_factor<depos_order>(sz, z); + amrex::Real AMREX_RESTRICT sz0[depos_order + 1 - lower_in_v]; + const int l0 = compute_shape_factor<depos_order - lower_in_v>( + sz0, z-stagger_shift); + + // Set fields on particle to zero + Exp[ip] = 0; + Eyp[ip] = 0; + Ezp[ip] = 0; + Bxp[ip] = 0; + Byp[ip] = 0; + Bzp[ip] = 0; + // Each field is gathered in a separate block of + // AMREX_SPACEDIM nested loops because the deposition + // order can differ for each component of each field + // when lower_in_v is set to 1 +#if (AMREX_SPACEDIM == 2) + // Gather field on particle Eyp[i] from field on grid ey_arr + for (int iz=0; iz<=depos_order; iz++){ + for (int ix=0; ix<=depos_order; ix++){ + Eyp[ip] += sx[ix]*sz[iz]* + ey_arr(lo.x+j+ix, lo.y+l+iz, 0); + } + } + // Gather field on particle Exp[i] from field on grid ex_arr + // Gather field on particle Bzp[i] from field on grid bz_arr + for (int iz=0; iz<=depos_order; iz++){ + for (int ix=0; ix<=depos_order-lower_in_v; ix++){ + Exp[ip] += sx0[ix]*sz[iz]* + ex_arr(lo.x+j0+ix, lo.y+l +iz, 0); + Bzp[ip] += sx0[ix]*sz[iz]* + bz_arr(lo.x+j0+ix, lo.y+l +iz, 0); + } + } + // Gather field on particle Ezp[i] from field on grid ez_arr + // Gather field on particle Bxp[i] from field on grid bx_arr + for (int iz=0; iz<=depos_order-lower_in_v; iz++){ + for (int ix=0; ix<=depos_order; ix++){ + Ezp[ip] += sx[ix]*sz0[iz]* + ez_arr(lo.x+j+ix, lo.y+l0 +iz, 0); + Bxp[ip] += sx[ix]*sz0[iz]* + bx_arr(lo.x+j+ix, lo.y+l0 +iz, 0); + } + } + // Gather field on particle Byp[i] from field on grid by_arr + for (int iz=0; iz<=depos_order-lower_in_v; iz++){ + for (int ix=0; ix<=depos_order-lower_in_v; ix++){ + Byp[ip] += sx0[ix]*sz0[iz]* + by_arr(lo.x+j0+ix, lo.y+l0+iz, 0); + } + } + +#ifdef WARPX_DIM_RZ + // Convert Exp and Eyp (which are actually Er and Etheta) to Ex and Ey + amrex::Real costheta; + amrex::Real sintheta; + if (r > 0.) { + costheta = xp[ip]/r; + sintheta = yp[ip]/r; + } else { + costheta = 1.; + sintheta = 0.; + } + const amrex::Real Exp_save = Exp[ip]; + Exp[ip] = costheta*Exp[ip] - sintheta*Eyp[ip]; + Eyp[ip] = costheta*Eyp[ip] + sintheta*Exp_save; + const amrex::Real Bxp_save = Bxp[ip]; + Bxp[ip] = costheta*Bxp[ip] - sintheta*Byp[ip]; + Byp[ip] = costheta*Byp[ip] + sintheta*Bxp_save; +#endif + +#else // (AMREX_SPACEDIM == 3) + // Gather field on particle Exp[i] from field on grid ex_arr + for (int iz=0; iz<=depos_order; iz++){ + for (int iy=0; iy<=depos_order; iy++){ + for (int ix=0; ix<=depos_order-lower_in_v; ix++){ + Exp[ip] += sx0[ix]*sy[iy]*sz[iz]* + ex_arr(lo.x+j0+ix, lo.y+k+iy, lo.z+l+iz); + } + } + } + // Gather field on particle Eyp[i] from field on grid ey_arr + for (int iz=0; iz<=depos_order; iz++){ + for (int iy=0; iy<=depos_order-lower_in_v; iy++){ + for (int ix=0; ix<=depos_order; ix++){ + Eyp[ip] += sx[ix]*sy0[iy]*sz[iz]* + ey_arr(lo.x+j+ix, lo.y+k0+iy, lo.z+l+iz); + } + } + } + // Gather field on particle Ezp[i] from field on grid ez_arr + for (int iz=0; iz<=depos_order-lower_in_v; iz++){ + for (int iy=0; iy<=depos_order; iy++){ + for (int ix=0; ix<=depos_order; ix++){ + Ezp[ip] += sx[ix]*sy[iy]*sz0[iz]* + ez_arr(lo.x+j+ix, lo.y+k+iy, lo.z+l0+iz); + } + } + } + // Gather field on particle Bzp[i] from field on grid bz_arr + for (int iz=0; iz<=depos_order; iz++){ + for (int iy=0; iy<=depos_order-lower_in_v; iy++){ + for (int ix=0; ix<=depos_order-lower_in_v; ix++){ + Bzp[ip] += sx0[ix]*sy0[iy]*sz[iz]* + bz_arr(lo.x+j0+ix, lo.y+k0+iy, lo.z+l+iz); + } + } + } + // Gather field on particle Byp[i] from field on grid by_arr + for (int iz=0; iz<=depos_order-lower_in_v; iz++){ + for (int iy=0; iy<=depos_order; iy++){ + for (int ix=0; ix<=depos_order-lower_in_v; ix++){ + Byp[ip] += sx0[ix]*sy[iy]*sz0[iz]* + by_arr(lo.x+j0+ix, lo.y+k+iy, lo.z+l0+iz); + } + } + } + // Gather field on particle Bxp[i] from field on grid bx_arr + for (int iz=0; iz<=depos_order-lower_in_v; iz++){ + for (int iy=0; iy<=depos_order-lower_in_v; iy++){ + for (int ix=0; ix<=depos_order; ix++){ + Bxp[ip] += sx[ix]*sy0[iy]*sz0[iz]* + bx_arr(lo.x+j+ix, lo.y+k0+iy, lo.z+l0+iz); + } + } + } +#endif + } + ); +} + +#endif // FIELDGATHER_H_ diff --git a/Source/Particles/Gather/Make.package b/Source/Particles/Gather/Make.package new file mode 100644 index 000000000..10abfcaaf --- /dev/null +++ b/Source/Particles/Gather/Make.package @@ -0,0 +1,3 @@ +CEXE_headers += FieldGather.H +INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Gather +VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles/Gather diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package index 2038472a1..db90de1dc 100644 --- a/Source/Particles/Make.package +++ b/Source/Particles/Make.package @@ -9,9 +9,11 @@ CEXE_headers += MultiParticleContainer.H CEXE_headers += WarpXParticleContainer.H CEXE_headers += RigidInjectedParticleContainer.H CEXE_headers += PhysicalParticleContainer.H +CEXE_headers += ShapeFactors.H include $(WARPX_HOME)/Source/Particles/Pusher/Make.package include $(WARPX_HOME)/Source/Particles/Deposition/Make.package +include $(WARPX_HOME)/Source/Particles/Gather/Make.package INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles diff --git a/Source/Particles/MultiParticleContainer.H b/Source/Particles/MultiParticleContainer.H index 869126fef..7c9ede411 100644 --- a/Source/Particles/MultiParticleContainer.H +++ b/Source/Particles/MultiParticleContainer.H @@ -85,8 +85,9 @@ public: /// in the MultiParticleContainer. This is the electromagnetic version of the field gather. /// void FieldGather (int lev, - const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, const amrex::MultiFab& Ez, - const amrex::MultiFab& Bx, const amrex::MultiFab& By, const amrex::MultiFab& Bz); + const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, + const amrex::MultiFab& Ez, const amrex::MultiFab& Bx, + const amrex::MultiFab& By, const amrex::MultiFab& Bz); /// /// This evolves all the particles by one PIC time step, including current deposition, the diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp index 9d39ec2f9..982e04e39 100644 --- a/Source/Particles/MultiParticleContainer.cpp +++ b/Source/Particles/MultiParticleContainer.cpp @@ -172,30 +172,6 @@ MultiParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<MultiF } void -MultiParticleContainer::Evolve (int lev, - const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez, - const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz, - MultiFab& jx, MultiFab& jy, MultiFab& jz, - MultiFab* cjx, MultiFab* cjy, MultiFab* cjz, - MultiFab* rho, - const MultiFab* cEx, const MultiFab* cEy, const MultiFab* cEz, - const MultiFab* cBx, const MultiFab* cBy, const MultiFab* cBz, - Real t, Real dt) -{ - jx.setVal(0.0); - jy.setVal(0.0); - jz.setVal(0.0); - if (cjx) cjx->setVal(0.0); - if (cjy) cjy->setVal(0.0); - if (cjz) cjz->setVal(0.0); - if (rho) rho->setVal(0.0); - for (auto& pc : allcontainers) { - pc->Evolve(lev, Ex, Ey, Ez, Bx, By, Bz, jx, jy, jz, cjx, cjy, cjz, - rho, cEx, cEy, cEz, cBx, cBy, cBz, t, dt); - } -} - -void MultiParticleContainer::PushXES (Real dt) { for (auto& pc : allcontainers) { @@ -240,8 +216,9 @@ MultiParticleContainer::sumParticleCharge (bool local) void MultiParticleContainer::FieldGather (int lev, - const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez, - const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz) + const MultiFab& Ex, const MultiFab& Ey, + const MultiFab& Ez, const MultiFab& Bx, + const MultiFab& By, const MultiFab& Bz) { for (auto& pc : allcontainers) { pc->FieldGather(lev, Ex, Ey, Ez, Bx, By, Bz); @@ -331,7 +308,7 @@ MultiParticleContainer::RedistributeLocal (const int num_ghost) } Vector<long> -MultiParticleContainer::NumberOfParticlesInGrid(int lev) const +MultiParticleContainer::NumberOfParticlesInGrid (int lev) const { const bool only_valid=true, only_local=true; Vector<long> r = allcontainers[0]->NumberOfParticlesInGrid(lev,only_valid,only_local); diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H index d55764682..b80619733 100644 --- a/Source/Particles/PhysicalParticleContainer.H +++ b/Source/Particles/PhysicalParticleContainer.H @@ -27,17 +27,37 @@ public: const amrex::Vector<std::unique_ptr<amrex::FabArray<amrex::BaseFab<int> > > >& masks) override; virtual void EvolveES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E, - amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rho, + amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rho, amrex::Real t, amrex::Real dt) override; #endif // WARPX_DO_ELECTROSTATIC - virtual void FieldGather(int lev, - const amrex::MultiFab& Ex, - const amrex::MultiFab& Ey, - const amrex::MultiFab& Ez, - const amrex::MultiFab& Bx, - const amrex::MultiFab& By, - const amrex::MultiFab& Bz) final; + virtual void FieldGather (int lev, + const amrex::MultiFab& Ex, + const amrex::MultiFab& Ey, + const amrex::MultiFab& Ez, + const amrex::MultiFab& Bx, + const amrex::MultiFab& By, + const amrex::MultiFab& Bz) final; + + void FieldGather (WarpXParIter& pti, + RealVector& Exp, + RealVector& Eyp, + RealVector& Ezp, + RealVector& Bxp, + RealVector& Byp, + RealVector& Bzp, + amrex::FArrayBox const * exfab, + amrex::FArrayBox const * eyfab, + amrex::FArrayBox const * ezfab, + amrex::FArrayBox const * bxfab, + amrex::FArrayBox const * byfab, + amrex::FArrayBox const * bzfab, + const int ngE, const int e_is_nodal, + const long offset, + const long np_to_gather, + int thread_num, + int lev, + int depos_lev); virtual void Evolve (int lev, const amrex::MultiFab& Ex, @@ -87,11 +107,8 @@ public: // Inject particles in Box 'part_box' virtual void AddParticles (int lev); + void AddPlasma(int lev, amrex::RealBox part_realbox = amrex::RealBox()); - void AddPlasmaCPU (int lev, amrex::RealBox part_realbox); -#ifdef AMREX_USE_GPU - void AddPlasmaGPU (int lev, amrex::RealBox part_realbox); -#endif void MapParticletoBoostedFrame(amrex::Real& x, amrex::Real& y, amrex::Real& z, std::array<amrex::Real, 3>& u); @@ -120,16 +137,8 @@ protected: bool boost_adjust_transverse_positions = false; bool do_backward_propagation = false; - long NumParticlesToAdd (const amrex::Box& overlap_box, - const amrex::RealBox& overlap_realbox, - const amrex::RealBox& tile_real_box, - const amrex::RealBox& particle_real_box); - - int GetRefineFac(const amrex::Real x, const amrex::Real y, const amrex::Real z); - std::unique_ptr<amrex::IArrayBox> m_refined_injection_mask = nullptr; - // Inject particles during the whole simulation - void ContinuousInjection(const amrex::RealBox& injection_box) override; + void ContinuousInjection (const amrex::RealBox& injection_box) override; }; diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index d47a7b220..d10390204 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -6,65 +6,16 @@ #include <WarpX.H> #include <WarpXConst.H> #include <WarpXWrappers.h> +#include <FieldGather.H> +#include <WarpXAlgorithmSelection.H> -using namespace amrex; - -long PhysicalParticleContainer:: -NumParticlesToAdd(const Box& overlap_box, const RealBox& overlap_realbox, - const RealBox& tile_realbox, const RealBox& particle_real_box) -{ - const int lev = 0; - const Geometry& geom = Geom(lev); - int num_ppc = plasma_injector->num_particles_per_cell; - const Real* dx = geom.CellSize(); +// Import low-level single-particle kernels +#include <UpdatePosition.H> +#include <UpdateMomentumBoris.H> +#include <UpdateMomentumVay.H> - long np = 0; - const auto& overlap_corner = overlap_realbox.lo(); - for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv)) - { - int fac; - if (do_continuous_injection) { -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; - Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; -#endif - fac = GetRefineFac(x, y, z); - } else { - fac = 1.0; - } - - int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac); - for (int i_part=0; i_part<ref_num_ppc;i_part++) { - std::array<Real, 3> r; - plasma_injector->getPositionUnitBox(r, i_part, fac); -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1]; - Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1]; -#endif - // If the new particle is not inside the tile box, - // go to the next generated particle. -#if ( AMREX_SPACEDIM == 3 ) - if(!tile_realbox.contains( RealVect{x, y, z} )) continue; -#elif ( AMREX_SPACEDIM == 2 ) - if(!tile_realbox.contains( RealVect{x, z} )) continue; -#endif - ++np; - } - } - - return np; -} +using namespace amrex; PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int ispecies, const std::string& name) @@ -127,9 +78,7 @@ PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core) void PhysicalParticleContainer::InitData() { AddParticles(0); // Note - add on level 0 - if (maxLevel() > 0) { - Redistribute(); // We then redistribute - } + Redistribute(); // We then redistribute } void PhysicalParticleContainer::MapParticletoBoostedFrame(Real& x, Real& y, Real& z, std::array<Real, 3>& u) @@ -193,45 +142,36 @@ PhysicalParticleContainer::AddGaussianBeam(Real x_m, Real y_m, Real z_m, std::normal_distribution<double> distz(z_m, z_rms); if (ParallelDescriptor::IOProcessor()) { - std::array<Real, 3> u; - Real weight; // If do_symmetrize, create 4x fewer particles, and // Replicate each particle 4 times (x,y) (-x,y) (x,-y) (-x,-y) if (do_symmetrize){ npart /= 4; } for (long i = 0; i < npart; ++i) { -#if ( AMREX_SPACEDIM == 3 | WARPX_RZ) - weight = q_tot/npart/charge; +#if ( AMREX_SPACEDIM == 3 | WARPX_DIM_RZ) + Real weight = q_tot/npart/charge; Real x = distx(mt); Real y = disty(mt); Real z = distz(mt); #elif ( AMREX_SPACEDIM == 2 ) - weight = q_tot/npart/charge/y_rms; + Real weight = q_tot/npart/charge/y_rms; Real x = distx(mt); Real y = 0.; Real z = distz(mt); #endif if (plasma_injector->insideBounds(x, y, z)) { - plasma_injector->getMomentum(u, x, y, z); + XDim3 u = plasma_injector->getMomentum(x, y, z); + u.x *= PhysConst::c; + u.y *= PhysConst::c; + u.z *= PhysConst::c; if (do_symmetrize){ - std::array<Real, 3> u_tmp; - Real x_tmp, y_tmp; // Add four particles to the beam: - // (x,ux,y,uy) (-x,-ux,y,uy) (x,ux,-y,-uy) (-x,-ux,-y,-uy) - for (int ix=0; ix<2; ix++){ - for (int iy=0; iy<2; iy++){ - u_tmp = u; - x_tmp = x*std::pow(-1,ix); - u_tmp[0] *= std::pow(-1,ix); - y_tmp = y*std::pow(-1,iy); - u_tmp[1] *= std::pow(-1,iy); - CheckAndAddParticle(x_tmp, y_tmp, z, - u_tmp, weight/4); - } - } + CheckAndAddParticle( x, y, z, { u.x, u.y, u.z}, weight/4. ); + CheckAndAddParticle( x,-y, z, { u.x,-u.y, u.z}, weight/4. ); + CheckAndAddParticle(-x, y, z, {-u.x, u.y, u.z}, weight/4. ); + CheckAndAddParticle(-x,-y, z, {-u.x,-u.y, u.z}, weight/4. ); } else { - CheckAndAddParticle(x, y, z, u, weight); + CheckAndAddParticle(x, y, z, {u.x,u.y,u.z}, weight); } } } @@ -322,28 +262,19 @@ PhysicalParticleContainer::AddParticles (int lev) void PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) { -#ifdef AMREX_USE_GPU - AddPlasmaGPU(lev, part_realbox); -#else - AddPlasmaCPU(lev, part_realbox); -#endif -} - -void -PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox) -{ - BL_PROFILE("PhysicalParticleContainer::AddPlasmaCPU"); + BL_PROFILE("PhysicalParticleContainer::AddPlasma"); // If no part_realbox is provided, initialize particles in the whole domain const Geometry& geom = Geom(lev); if (!part_realbox.ok()) part_realbox = geom.ProbDomain(); int num_ppc = plasma_injector->num_particles_per_cell; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ Real rmax = std::min(plasma_injector->xmax, part_realbox.hi(0)); #endif - const Real* dx = geom.CellSize(); + const auto dx = geom.CellSizeArray(); + const auto problo = geom.ProbLoArray(); Real scale_fac; #if AMREX_SPACEDIM==3 @@ -358,490 +289,341 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox) const int grid_id = mfi.index(); const int tile_id = mfi.LocalTileIndex(); GetParticles(lev)[std::make_pair(grid_id, tile_id)]; + if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) { + DefineAndReturnParticleTile(lev, grid_id, tile_id); + } } #endif MultiFab* cost = WarpX::getCosts(lev); - if ( (not m_refined_injection_mask) and WarpX::do_moving_window) + const int nlevs = numLevels(); + static bool refine_injection = false; + static Box fine_injection_box; + static int rrfac = 1; + // This does not work if the mesh is dynamic. But in that case, we should + // not use refined injected either. We also assume there is only one fine level. + if (WarpX::do_moving_window and WarpX::refine_plasma + and do_continuous_injection and nlevs == 2) { - Box mask_box = geom.Domain(); - mask_box.setSmall(WarpX::moving_window_dir, 0); - mask_box.setBig(WarpX::moving_window_dir, 0); - m_refined_injection_mask.reset( new IArrayBox(mask_box)); - m_refined_injection_mask->setVal(-1); + refine_injection = true; + fine_injection_box = ParticleBoxArray(1).minimalBox(); + fine_injection_box.setSmall(WarpX::moving_window_dir, std::numeric_limits<int>::lowest()); + fine_injection_box.setBig(WarpX::moving_window_dir, std::numeric_limits<int>::max()); + rrfac = m_gdb->refRatio(0)[0]; + fine_injection_box.coarsen(rrfac); } + InjectorPosition* inj_pos = plasma_injector->getInjectorPosition(); + InjectorDensity* inj_rho = plasma_injector->getInjectorDensity(); + InjectorMomentum* inj_mom = plasma_injector->getInjectorMomentum(); + Real gamma_boost = WarpX::gamma_boost; + Real beta_boost = WarpX::beta_boost; + Real t = WarpX::GetInstance().gett_new(lev); + Real density_min = plasma_injector->density_min; + Real density_max = plasma_injector->density_max; + +#ifdef WARPX_DIM_RZ + bool radially_weighted = plasma_injector->radially_weighted; +#endif + MFItInfo info; - if (do_tiling) { + if (do_tiling && Gpu::notInLaunchRegion()) { info.EnableTiling(tile_size); } - info.SetDynamic(true); - #ifdef _OPENMP + info.SetDynamic(true); #pragma omp parallel if (not WarpX::serialize_ics) #endif + for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) { - std::array<Real,PIdx::nattribs> attribs; - attribs.fill(0.0); - - // Loop through the tiles - for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) { - - Real wt = amrex::second(); - - const Box& tile_box = mfi.tilebox(); - const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev); - - // Find the cells of part_box that overlap with tile_realbox - // If there is no overlap, just go to the next tile in the loop - RealBox overlap_realbox; - Box overlap_box; - Real ncells_adjust; - bool no_overlap = 0; - - for (int dir=0; dir<AMREX_SPACEDIM; dir++) { - if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) { - ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] ); - overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]); - } else { - no_overlap = 1; break; - } - if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) { - ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] ); - overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]); - } else { - no_overlap = 1; break; - } - // Count the number of cells in this direction in overlap_realbox - overlap_box.setSmall( dir, 0 ); - overlap_box.setBig( dir, - int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1); + Real wt = amrex::second(); + + const Box& tile_box = mfi.tilebox(); + const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev); + + // Find the cells of part_box that overlap with tile_realbox + // If there is no overlap, just go to the next tile in the loop + RealBox overlap_realbox; + Box overlap_box; + IntVect shifted; + bool no_overlap = false; + + for (int dir=0; dir<AMREX_SPACEDIM; dir++) { + if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) { + Real ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] ); + overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]); + } else { + no_overlap = true; break; } - if (no_overlap == 1) { - continue; // Go to the next tile + if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) { + Real ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] ); + overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]); + } else { + no_overlap = true; break; } + // Count the number of cells in this direction in overlap_realbox + overlap_box.setSmall( dir, 0 ); + overlap_box.setBig( dir, + int( std::round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir)) + /dx[dir] )) - 1); + shifted[dir] = std::round((overlap_realbox.lo(dir)-problo[dir])/dx[dir]); + // shifted is exact in non-moving-window direction. That's all we care. + } + if (no_overlap == 1) { + continue; // Go to the next tile + } - const int grid_id = mfi.index(); - const int tile_id = mfi.LocalTileIndex(); - - // Loop through the cells of overlap_box and inject - // the corresponding particles - const auto& overlap_corner = overlap_realbox.lo(); - for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv)) - { - int fac; - if (do_continuous_injection) { -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; - Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; -#endif - fac = GetRefineFac(x, y, z); - } else { - fac = 1.0; - } - - int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac); - for (int i_part=0; i_part<ref_num_ppc;i_part++) { - std::array<Real, 3> r; - plasma_injector->getPositionUnitBox(r, i_part, fac); -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1]; - Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1]; -#endif - // If the new particle is not inside the tile box, - // go to the next generated particle. -#if ( AMREX_SPACEDIM == 3 ) - if(!tile_realbox.contains( RealVect{x, y, z} )) continue; -#elif ( AMREX_SPACEDIM == 2 ) - if(!tile_realbox.contains( RealVect{x, z} )) continue; -#endif + const int grid_id = mfi.index(); + const int tile_id = mfi.LocalTileIndex(); - // Save the x and y values to use in the insideBounds checks. - // This is needed with WARPX_RZ since x and y are modified. - Real xb = x; - Real yb = y; - -#ifdef WARPX_RZ - // Replace the x and y, choosing the angle randomly. - // These x and y are used to get the momentum and density - Real theta = 2.*MathConst::pi*amrex::Random(); - y = x*std::sin(theta); - x = x*std::cos(theta); -#endif + // Max number of new particles, if particles are created in the whole + // overlap_box. All of them are created, and invalid ones are then + // discaded + int max_new_particles = overlap_box.numPts() * num_ppc; - Real dens; - std::array<Real, 3> u; - if (WarpX::gamma_boost == 1.){ - // Lab-frame simulation - // If the particle is not within the species's - // xmin, xmax, ymin, ymax, zmin, zmax, go to - // the next generated particle. - if (!plasma_injector->insideBounds(xb, yb, z)) continue; - plasma_injector->getMomentum(u, x, y, z); - dens = plasma_injector->getDensity(x, y, z); - } else { - // Boosted-frame simulation - Real c = PhysConst::c; - Real gamma_boost = WarpX::gamma_boost; - Real beta_boost = WarpX::beta_boost; - // Since the user provides the density distribution - // at t_lab=0 and in the lab-frame coordinates, - // we need to find the lab-frame position of this - // particle at t_lab=0, from its boosted-frame coordinates - // Assuming ballistic motion, this is given by: - // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) ) - // where betaz_lab is the speed of the particle in the lab frame - // - // In order for this equation to be solvable, betaz_lab - // is explicitly assumed to have no dependency on z0_lab - plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency - // At this point u is the lab-frame momentum - // => Apply the above formula for z0_lab - Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) ); - Real betaz_lab = u[2]/gamma_lab/c; - Real t = WarpX::GetInstance().gett_new(lev); - Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) ); - // If the particle is not within the lab-frame zmin, zmax, etc. - // go to the next generated particle. - if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue; - // call `getDensity` with lab-frame parameters - dens = plasma_injector->getDensity(x, y, z0_lab); - // At this point u and dens are the lab-frame quantities - // => Perform Lorentz transform - dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab ); - u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab ); - } - Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac)); -#ifdef WARPX_RZ - if (plasma_injector->radially_weighted) { - weight *= 2*MathConst::pi*xb; - } else { - // This is not correct since it might shift the particle - // out of the local grid - x = std::sqrt(xb*rmax); - weight *= dx[0]; - } -#endif - attribs[PIdx::w ] = weight; - attribs[PIdx::ux] = u[0]; - attribs[PIdx::uy] = u[1]; - attribs[PIdx::uz] = u[2]; - - if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) - { - auto& particle_tile = DefineAndReturnParticleTile(lev, grid_id, tile_id); - particle_tile.push_back_real(particle_comps["xold"], x); - particle_tile.push_back_real(particle_comps["yold"], y); - particle_tile.push_back_real(particle_comps["zold"], z); - - particle_tile.push_back_real(particle_comps["uxold"], u[0]); - particle_tile.push_back_real(particle_comps["uyold"], u[1]); - particle_tile.push_back_real(particle_comps["uzold"], u[2]); - } - - AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs); + // If refine injection, build pointer dp_cellid that holds pointer to + // array of refined cell IDs. + Vector<int> cellid_v; + if (refine_injection and lev == 0) + { + // then how many new particles will be injected is not that simple + // We have to shift fine_injection_box because overlap_box has been shifted. + Box fine_overlap_box = overlap_box & amrex::shift(fine_injection_box,shifted); + max_new_particles += fine_overlap_box.numPts() * num_ppc + * (AMREX_D_TERM(rrfac,*rrfac,*rrfac)-1); + for (int icell = 0, ncells = overlap_box.numPts(); icell < ncells; ++icell) { + IntVect iv = overlap_box.atOffset(icell); + int r = (fine_overlap_box.contains(iv)) ? AMREX_D_TERM(rrfac,*rrfac,*rrfac) : 1; + for (int ipart = 0; ipart < r; ++ipart) { + cellid_v.push_back(icell); + cellid_v.push_back(ipart); } } + } + int const* hp_cellid = (cellid_v.empty()) ? nullptr : cellid_v.data(); + amrex::AsyncArray<int> cellid_aa(hp_cellid, cellid_v.size()); + int const* dp_cellid = cellid_aa.data(); - if (cost) { - wt = (amrex::second() - wt) / tile_box.d_numPts(); - Array4<Real> const& costarr = cost->array(mfi); - amrex::ParallelFor(tile_box, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - costarr(i,j,k) += wt; - }); - } + // Update NextID to include particles created in this function + int pid; +#pragma omp critical (add_plasma_nextid) + { + pid = ParticleType::NextID(); + ParticleType::NextID(pid+max_new_particles); } - } -} + const int cpuid = ParallelDescriptor::MyProc(); -#ifdef AMREX_USE_GPU -void -PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox) -{ - BL_PROFILE("PhysicalParticleContainer::AddPlasmaGPU"); + auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)]; + bool do_boosted = false; + if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) { + do_boosted = true; + DefineAndReturnParticleTile(lev, grid_id, tile_id); + } + auto old_size = particle_tile.GetArrayOfStructs().size(); + auto new_size = old_size + max_new_particles; + particle_tile.resize(new_size); + + ParticleType* pp = particle_tile.GetArrayOfStructs()().data() + old_size; + auto& soa = particle_tile.GetStructOfArrays(); + GpuArray<Real*,PIdx::nattribs> pa; + for (int ia = 0; ia < PIdx::nattribs; ++ia) { + pa[ia] = soa.GetRealData(ia).data() + old_size; + } + GpuArray<Real*,6> pb; + if (do_boosted) { + pb[0] = soa.GetRealData(particle_comps[ "xold"]).data() + old_size; + pb[1] = soa.GetRealData(particle_comps[ "yold"]).data() + old_size; + pb[2] = soa.GetRealData(particle_comps[ "zold"]).data() + old_size; + pb[3] = soa.GetRealData(particle_comps["uxold"]).data() + old_size; + pb[4] = soa.GetRealData(particle_comps["uyold"]).data() + old_size; + pb[5] = soa.GetRealData(particle_comps["uzold"]).data() + old_size; + } - // If no part_realbox is provided, initialize particles in the whole domain - const Geometry& geom = Geom(lev); - if (!part_realbox.ok()) part_realbox = geom.ProbDomain(); + const GpuArray<Real,AMREX_SPACEDIM> overlap_corner + {AMREX_D_DECL(overlap_realbox.lo(0), + overlap_realbox.lo(1), + overlap_realbox.lo(2))}; - int num_ppc = plasma_injector->num_particles_per_cell; -#ifdef WARPX_RZ - Real rmax = std::min(plasma_injector->xmax, part_realbox.hi(0)); -#endif + std::size_t shared_mem_bytes = plasma_injector->sharedMemoryNeeded(); + int lrrfac = rrfac; - const Real* dx = geom.CellSize(); + // Loop over all new particles and inject them (creates too many + // particles, in particular does not consider xmin, xmax etc.). + // The invalid ones are given negative ID and are deleted during the + // next redistribute. + amrex::For(max_new_particles, [=] AMREX_GPU_DEVICE (int ip) noexcept + { + ParticleType& p = pp[ip]; + p.id() = pid+ip; + p.cpu() = cpuid; + + int cellid, i_part; + Real fac; + if (dp_cellid == nullptr) { + cellid = ip/num_ppc; + i_part = ip - cellid*num_ppc; + fac = 1.0; + } else { + cellid = dp_cellid[2*ip]; + i_part = dp_cellid[2*ip+1]; + fac = lrrfac; + } - Real scale_fac; -#if AMREX_SPACEDIM==3 - scale_fac = dx[0]*dx[1]*dx[2]/num_ppc; -#elif AMREX_SPACEDIM==2 - scale_fac = dx[0]*dx[1]/num_ppc; -#endif + IntVect iv = overlap_box.atOffset(cellid); -#ifdef _OPENMP - // First touch all tiles in the map in serial - for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) { - const int grid_id = mfi.index(); - const int tile_id = mfi.LocalTileIndex(); - GetParticles(lev)[std::make_pair(grid_id, tile_id)]; - } + const XDim3 r = inj_pos->getPositionUnitBox(i_part, fac); +#if (AMREX_SPACEDIM == 3) + Real x = overlap_corner[0] + (iv[0]+r.x)*dx[0]; + Real y = overlap_corner[1] + (iv[1]+r.y)*dx[1]; + Real z = overlap_corner[2] + (iv[2]+r.z)*dx[2]; +#else + Real x = overlap_corner[0] + (iv[0]+r.x)*dx[0]; + Real y = 0.0; + Real z = overlap_corner[1] + (iv[1]+r.y)*dx[1]; #endif - MultiFab* cost = WarpX::getCosts(lev); - - if ( (not m_refined_injection_mask) and WarpX::do_moving_window) - { - Box mask_box = geom.Domain(); - mask_box.setSmall(WarpX::moving_window_dir, 0); - mask_box.setBig(WarpX::moving_window_dir, 0); - m_refined_injection_mask.reset( new IArrayBox(mask_box)); - m_refined_injection_mask->setVal(-1); - } - - MFItInfo info; - if (do_tiling) { - info.EnableTiling(tile_size); - } - info.SetDynamic(true); - -#ifdef _OPENMP -#pragma omp parallel if (not WarpX::serialize_ics) +#if (AMREX_SPACEDIM == 3) + if (!tile_realbox.contains(XDim3{x,y,z})) { + p.id() = -1; + return; + } +#else + if (!tile_realbox.contains(XDim3{x,z,0.0})) { + p.id() = -1; + return; + } #endif - { - std::array<Real,PIdx::nattribs> attribs; - attribs.fill(0.0); - - // Loop through the tiles - for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) { - Real wt = amrex::second(); - - const Box& tile_box = mfi.tilebox(); - const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev); - - // Find the cells of part_box that overlap with tile_realbox - // If there is no overlap, just go to the next tile in the loop - RealBox overlap_realbox; - Box overlap_box; - Real ncells_adjust; - bool no_overlap = 0; + // Save the x and y values to use in the insideBounds checks. + // This is needed with WARPX_DIM_RZ since x and y are modified. + Real xb = x; + Real yb = y; + +#ifdef WARPX_DIM_RZ + // Replace the x and y, choosing the angle randomly. + // These x and y are used to get the momentum and density + Real theta = 2.*MathConst::pi*amrex::Random(); + x = xb*std::cos(theta); + y = xb*std::sin(theta); +#endif - for (int dir=0; dir<AMREX_SPACEDIM; dir++) { - if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) { - ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] ); - overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]); - } else { - no_overlap = 1; break; + Real dens; + XDim3 u; + if (gamma_boost == 1.) { + // Lab-frame simulation + // If the particle is not within the species's + // xmin, xmax, ymin, ymax, zmin, zmax, go to + // the next generated particle. + if (!inj_pos->insideBounds(xb, yb, z)) { + p.id() = -1; + return; } - if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) { - ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] ); - overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]); - } else { - no_overlap = 1; break; + u = inj_mom->getMomentum(x, y, z); + dens = inj_rho->getDensity(x, y, z); + // Remove particle if density below threshold + if ( dens < density_min ){ + p.id() = -1; + return; } - // Count the number of cells in this direction in overlap_realbox - overlap_box.setSmall( dir, 0 ); - overlap_box.setBig( dir, - int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1); - } - if (no_overlap == 1) { - continue; // Go to the next tile - } - - const int grid_id = mfi.index(); - const int tile_id = mfi.LocalTileIndex(); - - Cuda::HostVector<ParticleType> host_particles; - std::array<Cuda::HostVector<Real>, PIdx::nattribs> host_attribs; - - // Loop through the cells of overlap_box and inject - // the corresponding particles - const auto& overlap_corner = overlap_realbox.lo(); - for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv)) - { - int fac; - if (do_continuous_injection) { -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; - Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; -#endif - fac = GetRefineFac(x, y, z); - } else { - fac = 1.0; + // Cut density if above threshold + dens = amrex::min(dens, density_max); + } else { + // Boosted-frame simulation + // Since the user provides the density distribution + // at t_lab=0 and in the lab-frame coordinates, + // we need to find the lab-frame position of this + // particle at t_lab=0, from its boosted-frame coordinates + // Assuming ballistic motion, this is given by: + // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) ) + // where betaz_lab is the speed of the particle in the lab frame + // + // In order for this equation to be solvable, betaz_lab + // is explicitly assumed to have no dependency on z0_lab + u = inj_mom->getMomentum(x, y, 0.); // No z0_lab dependency + // At this point u is the lab-frame momentum + // => Apply the above formula for z0_lab + Real gamma_lab = std::sqrt( 1.+(u.x*u.x+u.y*u.y+u.z*u.z) ); + Real betaz_lab = u.z/(gamma_lab); + Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) + - PhysConst::c*t*(betaz_lab-beta_boost) ); + // If the particle is not within the lab-frame zmin, zmax, etc. + // go to the next generated particle. + if (!inj_pos->insideBounds(xb, yb, z0_lab)) { + p.id() = -1; + return; } + // call `getDensity` with lab-frame parameters + dens = inj_rho->getDensity(x, y, z0_lab); + // Remove particle if density below threshold + if ( dens < density_min ){ + p.id() = -1; + return; + } + // Cut density if above threshold + dens = amrex::min(dens, density_max); + // At this point u and dens are the lab-frame quantities + // => Perform Lorentz transform + dens = gamma_boost * dens * ( 1.0 - beta_boost*betaz_lab ); + u.z = gamma_boost * ( u.z -beta_boost*gamma_lab ); + } - int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac); - for (int i_part=0; i_part<ref_num_ppc;i_part++) { - std::array<Real, 3> r; - plasma_injector->getPositionUnitBox(r, i_part, fac); -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1]; - Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1]; -#endif - // If the new particle is not inside the tile box, - // go to the next generated particle. -#if ( AMREX_SPACEDIM == 3 ) - if(!tile_realbox.contains( RealVect{x, y, z} )) continue; -#elif ( AMREX_SPACEDIM == 2 ) - if(!tile_realbox.contains( RealVect{x, z} )) continue; -#endif - - // Save the x and y values to use in the insideBounds checks. - // This is needed with WARPX_RZ since x and y are modified. - Real xb = x; - Real yb = y; - -#ifdef WARPX_RZ - // Replace the x and y, choosing the angle randomly. - // These x and y are used to get the momentum and density - Real theta = 2.*MathConst::pi*amrex::Random(); - x = xb*std::cos(theta); - y = xb*std::sin(theta); -#endif + u.x *= PhysConst::c; + u.y *= PhysConst::c; + u.z *= PhysConst::c; - Real dens; - std::array<Real, 3> u; - if (WarpX::gamma_boost == 1.){ - // Lab-frame simulation - // If the particle is not within the species's - // xmin, xmax, ymin, ymax, zmin, zmax, go to - // the next generated particle. - if (!plasma_injector->insideBounds(xb, yb, z)) continue; - plasma_injector->getMomentum(u, x, y, z); - dens = plasma_injector->getDensity(x, y, z); - } else { - // Boosted-frame simulation - Real c = PhysConst::c; - Real gamma_boost = WarpX::gamma_boost; - Real beta_boost = WarpX::beta_boost; - // Since the user provides the density distribution - // at t_lab=0 and in the lab-frame coordinates, - // we need to find the lab-frame position of this - // particle at t_lab=0, from its boosted-frame coordinates - // Assuming ballistic motion, this is given by: - // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) ) - // where betaz_lab is the speed of the particle in the lab frame - // - // In order for this equation to be solvable, betaz_lab - // is explicitly assumed to have no dependency on z0_lab - plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency - // At this point u is the lab-frame momentum - // => Apply the above formula for z0_lab - Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) ); - Real betaz_lab = u[2]/gamma_lab/c; - Real t = WarpX::GetInstance().gett_new(lev); - Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) ); - // If the particle is not within the lab-frame zmin, zmax, etc. - // go to the next generated particle. - if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue; - // call `getDensity` with lab-frame parameters - dens = plasma_injector->getDensity(x, y, z0_lab); - // At this point u and dens are the lab-frame quantities - // => Perform Lorentz transform - dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab ); - u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab ); - } - Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac)); -#ifdef WARPX_RZ - if (plasma_injector->radially_weighted) { - weight *= 2*MathConst::pi*xb; - } else { - // This is not correct since it might shift the particle - // out of the local grid - x = std::sqrt(xb*rmax); - weight *= dx[0]; - } + // Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac)); + Real weight = dens * scale_fac; +#ifdef WARPX_DIM_RZ + if (radially_weighted) { + weight *= 2.*MathConst::pi*xb; + } else { + // This is not correct since it might shift the particle + // out of the local grid + x = std::sqrt(xb*rmax); + weight *= dx[0]; + } #endif - attribs[PIdx::w ] = weight; - attribs[PIdx::ux] = u[0]; - attribs[PIdx::uy] = u[1]; - attribs[PIdx::uz] = u[2]; - - // note - this will be slow on the GPU, need to revisit - if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) - { - auto& particle_tile = DefineAndReturnParticleTile(lev, grid_id, tile_id); - particle_tile.push_back_real(particle_comps["xold"], x); - particle_tile.push_back_real(particle_comps["yold"], y); - particle_tile.push_back_real(particle_comps["zold"], z); - - particle_tile.push_back_real(particle_comps["uxold"], u[0]); - particle_tile.push_back_real(particle_comps["uyold"], u[1]); - particle_tile.push_back_real(particle_comps["uzold"], u[2]); - } + pa[PIdx::w ][ip] = weight; + pa[PIdx::ux][ip] = u.x; + pa[PIdx::uy][ip] = u.y; + pa[PIdx::uz][ip] = u.z; + + if (do_boosted) { + pb[0][ip] = x; + pb[1][ip] = y; + pb[2][ip] = z; + pb[3][ip] = u.x; + pb[4][ip] = u.y; + pb[5][ip] = u.z; + } - ParticleType p; - p.id() = ParticleType::NextID(); - p.cpu() = ParallelDescriptor::MyProc(); #if (AMREX_SPACEDIM == 3) - p.pos(0) = x; - p.pos(1) = y; - p.pos(2) = z; + p.pos(0) = x; + p.pos(1) = y; + p.pos(2) = z; #elif (AMREX_SPACEDIM == 2) -#ifdef WARPX_RZ - attribs[PIdx::theta] = theta; +#ifdef WARPX_DIM_RZ + pa[PIdx::theta][ip] = theta; #endif - p.pos(0) = xb; - p.pos(1) = z; + p.pos(0) = xb; + p.pos(1) = z; #endif - - host_particles.push_back(p); - for (int kk = 0; kk < PIdx::nattribs; ++kk) - host_attribs[kk].push_back(attribs[kk]); - } - } - - auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)]; - auto old_size = particle_tile.GetArrayOfStructs().size(); - auto new_size = old_size + host_particles.size(); - particle_tile.resize(new_size); - - Cuda::thrust_copy(host_particles.begin(), - host_particles.end(), - particle_tile.GetArrayOfStructs().begin() + old_size); - - for (int kk = 0; kk < PIdx::nattribs; ++kk) { - Cuda::thrust_copy(host_attribs[kk].begin(), - host_attribs[kk].end(), - particle_tile.GetStructOfArrays().GetRealData(kk).begin() + old_size); - } - - if (cost) { - wt = (amrex::second() - wt) / tile_box.d_numPts(); - Array4<Real> const& costarr = cost->array(mfi); - amrex::ParallelFor(tile_box, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - costarr(i,j,k) += wt; - }); - } - } + }, shared_mem_bytes); + + if (cost) { + wt = (amrex::second() - wt) / tile_box.d_numPts(); + Array4<Real> const& costarr = cost->array(mfi); + amrex::ParallelFor(tile_box, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + costarr(i,j,k) += wt; + }); + } } + + // The function that calls this is responsible for redistributing particles. } -#endif #ifdef WARPX_DO_ELECTROSTATIC void @@ -1066,11 +848,14 @@ PhysicalParticleContainer::FieldGather (int lev, MultiFab* cost = WarpX::getCosts(lev); #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel #endif { - Cuda::ManagedDeviceVector<Real> xp, yp, zp; - +#ifdef _OPENMP + int thread_num = omp_get_thread_num(); +#else + int thread_num = 0; +#endif for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { Real wt = amrex::second(); @@ -1106,35 +891,15 @@ PhysicalParticleContainer::FieldGather (int lev, // // copy data from particle container to temp arrays // - pti.GetPosition(xp, yp, zp); - - const std::array<Real,3>& xyzmin = WarpX::LowerCorner(box, lev); - const int* ixyzmin = box.loVect(); + pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); // // Field Gather // - const int ll4symtry = false; - long lvect_fieldgathe = 64; - warpx_geteb_energy_conserving( - &np, - xp.dataPtr(), - yp.dataPtr(), - zp.dataPtr(), - Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(), - Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(), - ixyzmin, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dx[0], &dx[1], &dx[2], - &WarpX::nox, &WarpX::noy, &WarpX::noz, - BL_TO_FORTRAN_ANYD(exfab), - BL_TO_FORTRAN_ANYD(eyfab), - BL_TO_FORTRAN_ANYD(ezfab), - BL_TO_FORTRAN_ANYD(bxfab), - BL_TO_FORTRAN_ANYD(byfab), - BL_TO_FORTRAN_ANYD(bzfab), - &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal, - &lvect_fieldgathe, &WarpX::field_gathering_algo); + int e_is_nodal = Ex.is_nodal() and Ey.is_nodal() and Ez.is_nodal(); + FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp, + &exfab, &eyfab, &ezfab, &bxfab, &byfab, &bzfab, + Ex.nGrow(), e_is_nodal, 0, np, thread_num, lev, lev); if (cost) { const Box& tbx = pti.tilebox(); @@ -1164,7 +929,7 @@ PhysicalParticleContainer::Evolve (int lev, BL_PROFILE("PPC::Evolve()"); BL_PROFILE_VAR_NS("PPC::Evolve::Copy", blp_copy); BL_PROFILE_VAR_NS("PICSAR::FieldGather", blp_pxr_fg); - BL_PROFILE_VAR_NS("PICSAR::ParticlePush", blp_pxr_pp); + BL_PROFILE_VAR_NS("PPC::ParticlePush", blp_ppc_pp); BL_PROFILE_VAR_NS("PPC::Evolve::partition", blp_partition); const std::array<Real,3>& dx = WarpX::CellSize(lev); @@ -1391,57 +1156,40 @@ PhysicalParticleContainer::Evolve (int lev, pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); BL_PROFILE_VAR_STOP(blp_copy); - if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev); + if (rho) { + DepositCharge(pti, wp, rho, 0, 0, np_current, thread_num, lev, lev); + if (has_buffer){ + DepositCharge(pti, wp, crho, 0, np_current, np-np_current, thread_num, lev, lev-1); + } + } if (! do_not_push) { + const long np_gather = (cEx) ? nfine_gather : np; + + int e_is_nodal = Ex.is_nodal() and Ey.is_nodal() and Ez.is_nodal(); + // // Field Gather of Aux Data (i.e., the full solution) // - const int ll4symtry = false; - long lvect_fieldgathe = 64; - - const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); - const int* ixyzmin_grid = box.loVect(); - - const long np_gather = (cEx) ? nfine_gather : np; - BL_PROFILE_VAR_START(blp_pxr_fg); - - warpx_geteb_energy_conserving( - &np_gather, - m_xp[thread_num].dataPtr(), - m_yp[thread_num].dataPtr(), - m_zp[thread_num].dataPtr(), - Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(), - Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(), - ixyzmin_grid, - &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2], - &dx[0], &dx[1], &dx[2], - &WarpX::nox, &WarpX::noy, &WarpX::noz, - BL_TO_FORTRAN_ANYD(*exfab), - BL_TO_FORTRAN_ANYD(*eyfab), - BL_TO_FORTRAN_ANYD(*ezfab), - BL_TO_FORTRAN_ANYD(*bxfab), - BL_TO_FORTRAN_ANYD(*byfab), - BL_TO_FORTRAN_ANYD(*bzfab), - &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal, - &lvect_fieldgathe, &WarpX::field_gathering_algo); + FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp, + exfab, eyfab, ezfab, bxfab, byfab, bzfab, + Ex.nGrow(), e_is_nodal, 0, np_gather, thread_num, lev, lev); if (np_gather < np) { const IntVect& ref_ratio = WarpX::RefRatio(lev-1); const Box& cbox = amrex::coarsen(box,ref_ratio); - const std::array<Real,3>& cxyzmin_grid = WarpX::LowerCorner(cbox, lev-1); - const int* cixyzmin_grid = cbox.loVect(); - - const FArrayBox* cexfab = &(*cEx)[pti]; - const FArrayBox* ceyfab = &(*cEy)[pti]; - const FArrayBox* cezfab = &(*cEz)[pti]; - const FArrayBox* cbxfab = &(*cBx)[pti]; - const FArrayBox* cbyfab = &(*cBy)[pti]; - const FArrayBox* cbzfab = &(*cBz)[pti]; + // Data on the grid + FArrayBox const* cexfab = &(*cEx)[pti]; + FArrayBox const* ceyfab = &(*cEy)[pti]; + FArrayBox const* cezfab = &(*cEz)[pti]; + FArrayBox const* cbxfab = &(*cBx)[pti]; + FArrayBox const* cbyfab = &(*cBy)[pti]; + FArrayBox const* cbzfab = &(*cBz)[pti]; + if (WarpX::use_fdtd_nci_corr) { #if (AMREX_SPACEDIM == 2) @@ -1494,26 +1242,14 @@ PhysicalParticleContainer::Evolve (int lev, #endif } - long ncrse = np - nfine_gather; - warpx_geteb_energy_conserving( - &ncrse, - m_xp[thread_num].dataPtr()+nfine_gather, - m_yp[thread_num].dataPtr()+nfine_gather, - m_zp[thread_num].dataPtr()+nfine_gather, - Exp.dataPtr()+nfine_gather, Eyp.dataPtr()+nfine_gather, Ezp.dataPtr()+nfine_gather, - Bxp.dataPtr()+nfine_gather, Byp.dataPtr()+nfine_gather, Bzp.dataPtr()+nfine_gather, - cixyzmin_grid, - &cxyzmin_grid[0], &cxyzmin_grid[1], &cxyzmin_grid[2], - &cdx[0], &cdx[1], &cdx[2], - &WarpX::nox, &WarpX::noy, &WarpX::noz, - BL_TO_FORTRAN_ANYD(*cexfab), - BL_TO_FORTRAN_ANYD(*ceyfab), - BL_TO_FORTRAN_ANYD(*cezfab), - BL_TO_FORTRAN_ANYD(*cbxfab), - BL_TO_FORTRAN_ANYD(*cbyfab), - BL_TO_FORTRAN_ANYD(*cbzfab), - &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal, - &lvect_fieldgathe, &WarpX::field_gathering_algo); + // Field gather for particles in gather buffers + e_is_nodal = cEx->is_nodal() and cEy->is_nodal() and cEz->is_nodal(); + FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp, + cexfab, ceyfab, cezfab, + cbxfab, cbyfab, cbzfab, + cEx->nGrow(), e_is_nodal, + nfine_gather, np-nfine_gather, + thread_num, lev, lev-1); } BL_PROFILE_VAR_STOP(blp_pxr_fg); @@ -1521,10 +1257,10 @@ PhysicalParticleContainer::Evolve (int lev, // // Particle Push // - BL_PROFILE_VAR_START(blp_pxr_pp); + BL_PROFILE_VAR_START(blp_ppc_pp); PushPX(pti, m_xp[thread_num], m_yp[thread_num], m_zp[thread_num], m_giv[thread_num], dt); - BL_PROFILE_VAR_STOP(blp_pxr_pp); + BL_PROFILE_VAR_STOP(blp_ppc_pp); // // Current Deposition @@ -1561,7 +1297,12 @@ PhysicalParticleContainer::Evolve (int lev, BL_PROFILE_VAR_STOP(blp_copy); } - if (rho) DepositCharge(pti, wp, rho, crho, 1, np_current, np, thread_num, lev); + if (rho) { + DepositCharge(pti, wp, rho, 1, 0, np_current, thread_num, lev, lev); + if (has_buffer){ + DepositCharge(pti, wp, crho, 1, np_current, np-np_current, thread_num, lev, lev-1); + } + } if (cost) { const Box& tbx = pti.tilebox(); @@ -1742,36 +1483,52 @@ PhysicalParticleContainer::PushPX(WarpXParIter& pti, Real dt) { + // This wraps the momentum and position advance so that inheritors can modify the call. + auto& attribs = pti.GetAttribs(); + // Extract pointers to the different particle quantities + Real* const AMREX_RESTRICT x = xp.dataPtr(); + Real* const AMREX_RESTRICT y = yp.dataPtr(); + Real* const AMREX_RESTRICT z = zp.dataPtr(); + Real* const AMREX_RESTRICT gi = giv.dataPtr(); + Real* const AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr(); + Real* const AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr(); + Real* const AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr(); + const Real* const AMREX_RESTRICT Ex = attribs[PIdx::Ex].dataPtr(); + const Real* const AMREX_RESTRICT Ey = attribs[PIdx::Ey].dataPtr(); + const Real* const AMREX_RESTRICT Ez = attribs[PIdx::Ez].dataPtr(); + const Real* const AMREX_RESTRICT Bx = attribs[PIdx::Bx].dataPtr(); + const Real* const AMREX_RESTRICT By = attribs[PIdx::By].dataPtr(); + const Real* const AMREX_RESTRICT Bz = attribs[PIdx::Bz].dataPtr(); + if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) { - copy_attribs(pti, xp.dataPtr(), yp.dataPtr(), zp.dataPtr()); + copy_attribs(pti, x, y, z); } - // The following attributes should be included in CPP version of warpx_particle_pusher - // This wraps the call to warpx_particle_pusher so that inheritors can modify the call. - auto& attribs = pti.GetAttribs(); - auto& uxp = attribs[PIdx::ux]; - auto& uyp = attribs[PIdx::uy]; - auto& uzp = attribs[PIdx::uz]; - auto& Exp = attribs[PIdx::Ex]; - auto& Eyp = attribs[PIdx::Ey]; - auto& Ezp = attribs[PIdx::Ez]; - auto& Bxp = attribs[PIdx::Bx]; - auto& Byp = attribs[PIdx::By]; - auto& Bzp = attribs[PIdx::Bz]; - const long np = pti.numParticles(); - - warpx_particle_pusher(&np, - xp.dataPtr(), - yp.dataPtr(), - zp.dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - giv.dataPtr(), - Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(), - Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(), - &this->charge, &this->mass, &dt, - &WarpX::particle_pusher_algo); - + // Loop over the particles and update their momentum + const Real q = this->charge; + const Real m = this-> mass; + if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Boris){ + amrex::ParallelFor( pti.numParticles(), + [=] AMREX_GPU_DEVICE (long i) { + UpdateMomentumBoris( ux[i], uy[i], uz[i], gi[i], + Ex[i], Ey[i], Ez[i], Bx[i], By[i], Bz[i], q, m, dt); + UpdatePosition( x[i], y[i], z[i], + ux[i], uy[i], uz[i], dt ); + } + ); + } else if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Vay) { + amrex::ParallelFor( pti.numParticles(), + [=] AMREX_GPU_DEVICE (long i) { + UpdateMomentumVay( ux[i], uy[i], uz[i], gi[i], + Ex[i], Ey[i], Ez[i], Bx[i], By[i], Bz[i], q, m, dt); + UpdatePosition( x[i], y[i], z[i], + ux[i], uy[i], uz[i], dt ); + } + ); + } else { + amrex::Abort("Unknown particle pusher"); + }; } void @@ -1800,9 +1557,6 @@ PhysicalParticleContainer::PushP (int lev, Real dt, auto& attribs = pti.GetAttribs(); - auto& uxp = attribs[PIdx::ux]; - auto& uyp = attribs[PIdx::uy]; - auto& uzp = attribs[PIdx::uz]; auto& Exp = attribs[PIdx::Ex]; auto& Eyp = attribs[PIdx::Ey]; auto& Ezp = attribs[PIdx::Ez]; @@ -1834,42 +1588,44 @@ PhysicalParticleContainer::PushP (int lev, Real dt, // pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); - const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); - const int* ixyzmin_grid = box.loVect(); - - const int ll4symtry = false; - long lvect_fieldgathe = 64; - - warpx_geteb_energy_conserving( - &np, - m_xp[thread_num].dataPtr(), - m_yp[thread_num].dataPtr(), - m_zp[thread_num].dataPtr(), - Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(), - Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(), - ixyzmin_grid, - &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2], - &dx[0], &dx[1], &dx[2], - &WarpX::nox, &WarpX::noy, &WarpX::noz, - BL_TO_FORTRAN_ANYD(exfab), - BL_TO_FORTRAN_ANYD(eyfab), - BL_TO_FORTRAN_ANYD(ezfab), - BL_TO_FORTRAN_ANYD(bxfab), - BL_TO_FORTRAN_ANYD(byfab), - BL_TO_FORTRAN_ANYD(bzfab), - &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal, - &lvect_fieldgathe, &WarpX::field_gathering_algo); - - warpx_particle_pusher_momenta(&np, - m_xp[thread_num].dataPtr(), - m_yp[thread_num].dataPtr(), - m_zp[thread_num].dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - m_giv[thread_num].dataPtr(), - Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(), - Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(), - &this->charge, &this->mass, &dt, - &WarpX::particle_pusher_algo); + int e_is_nodal = Ex.is_nodal() and Ey.is_nodal() and Ez.is_nodal(); + FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp, + &exfab, &eyfab, &ezfab, &bxfab, &byfab, &bzfab, + Ex.nGrow(), e_is_nodal, 0, np, thread_num, lev, lev); + + // This wraps the momentum advance so that inheritors can modify the call. + // Extract pointers to the different particle quantities + Real* const AMREX_RESTRICT gi = m_giv[thread_num].dataPtr(); + Real* const AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr(); + Real* const AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr(); + Real* const AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr(); + const Real* const AMREX_RESTRICT Expp = Exp.dataPtr(); + const Real* const AMREX_RESTRICT Eypp = Eyp.dataPtr(); + const Real* const AMREX_RESTRICT Ezpp = Ezp.dataPtr(); + const Real* const AMREX_RESTRICT Bxpp = Bxp.dataPtr(); + const Real* const AMREX_RESTRICT Bypp = Byp.dataPtr(); + const Real* const AMREX_RESTRICT Bzpp = Bzp.dataPtr(); + + // Loop over the particles and update their momentum + const Real q = this->charge; + const Real m = this-> mass; + if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Boris){ + amrex::ParallelFor( pti.numParticles(), + [=] AMREX_GPU_DEVICE (long i) { + UpdateMomentumBoris( ux[i], uy[i], uz[i], gi[i], + Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt); + } + ); + } else if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Vay) { + amrex::ParallelFor( pti.numParticles(), + [=] AMREX_GPU_DEVICE (long i) { + UpdateMomentumVay( ux[i], uy[i], uz[i], gi[i], + Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt); + } + ); + } else { + amrex::Abort("Unknown particle pusher"); + }; } } } @@ -2034,74 +1790,6 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real } } -int PhysicalParticleContainer::GetRefineFac(const Real x, const Real y, const Real z) -{ - if (finestLevel() == 0) return 1; - if (not WarpX::refine_plasma) return 1; - - IntVect iv; - const Geometry& geom = Geom(0); - - std::array<Real, 3> offset; - -#if ( AMREX_SPACEDIM == 3) - offset[0] = geom.ProbLo(0); - offset[1] = geom.ProbLo(1); - offset[2] = geom.ProbLo(2); -#elif ( AMREX_SPACEDIM == 2 ) - offset[0] = geom.ProbLo(0); - offset[1] = 0.0; - offset[2] = geom.ProbLo(1); -#endif - - AMREX_D_TERM(iv[0]=static_cast<int>(floor((x-offset[0])*geom.InvCellSize(0)));, - iv[1]=static_cast<int>(floor((y-offset[1])*geom.InvCellSize(1)));, - iv[2]=static_cast<int>(floor((z-offset[2])*geom.InvCellSize(2)));); - - iv += geom.Domain().smallEnd(); - - const int dir = WarpX::moving_window_dir; - - IntVect iv2 = iv; - iv2[dir] = 0; - - if ( (*m_refined_injection_mask)(iv2) != -1) return (*m_refined_injection_mask)(iv2); - - int ref_fac = 1; - for (int lev = 0; lev < finestLevel(); ++lev) - { - const IntVect rr = m_gdb->refRatio(lev); - const BoxArray& fine_ba = this->ParticleBoxArray(lev+1); - const int num_boxes = fine_ba.size(); - Vector<Box> stretched_boxes; - const int safety_factor = 4; - for (int i = 0; i < num_boxes; ++i) - { - Box bx = fine_ba[i]; - bx.coarsen(ref_fac*rr[dir]); - bx.setSmall(dir, std::numeric_limits<int>::min()/safety_factor); - bx.setBig(dir, std::numeric_limits<int>::max()/safety_factor); - stretched_boxes.push_back(bx); - } - - BoxArray stretched_ba(stretched_boxes.dataPtr(), stretched_boxes.size()); - - const int num_ghost = 0; - if ( stretched_ba.intersects(Box(iv, iv), num_ghost) ) - { - ref_fac *= rr[dir]; - } - else - { - break; - } - } - - (*m_refined_injection_mask)(iv2) = ref_fac; - - return ref_fac; -} - /* \brief Inject particles during the simulation * \param injection_box: domain where particles should be injected. */ @@ -2112,3 +1800,134 @@ PhysicalParticleContainer::ContinuousInjection(const RealBox& injection_box) const int lev=0; AddPlasma(lev, injection_box); } + +/* \brief Gather fields from FArrayBox exfab, eyfab, ezfab, bxfab, byfab, + * bzfab into arrays of fields on particles Exp, Eyp, Ezp, Bxp, Byp, Bzp. + * \param Exp-Bzp: fields on particles. + * \param exfab-bzfab: FAB of electric and magnetic fields for particles in pti + * \param ngE: number of guard cells for E + * \param e_is_nodal: 0 if E is staggered, 1 if E is nodal + * \param offset: index of first particle for which fields are gathered + * \param np_to_gather: number of particles onto which fields are gathered + * \param thread_num: if using OpenMP, thread number + * \param lev: level on which particles are located + * \param gather_lev: level from which particles gather fields (lev-1) for + particles in buffers. + */ +void +PhysicalParticleContainer::FieldGather (WarpXParIter& pti, + RealVector& Exp, + RealVector& Eyp, + RealVector& Ezp, + RealVector& Bxp, + RealVector& Byp, + RealVector& Bzp, + FArrayBox const * exfab, + FArrayBox const * eyfab, + FArrayBox const * ezfab, + FArrayBox const * bxfab, + FArrayBox const * byfab, + FArrayBox const * bzfab, + const int ngE, const int e_is_nodal, + const long offset, + const long np_to_gather, + int thread_num, + int lev, + int gather_lev) +{ + AMREX_ALWAYS_ASSERT_WITH_MESSAGE((gather_lev==(lev-1)) || + (gather_lev==(lev )), + "Gather buffers only work for lev-1"); + + // If no particles, do not do anything + if (np_to_gather == 0) return; + // Get cell size on gather_lev + const std::array<Real,3>& dx = WarpX::CellSize(std::max(gather_lev,0)); + // Set staggering shift depending on e_is_nodal + const Real stagger_shift = e_is_nodal ? 0.0 : 0.5; + + // Get box from which field is gathered. + // If not gathering from the finest level, the box is coarsened. + Box box; + if (lev == gather_lev) { + box = pti.tilebox(); + } else { + const IntVect& ref_ratio = WarpX::RefRatio(gather_lev); + box = amrex::coarsen(pti.tilebox(),ref_ratio); + } + + // Add guard cells to the box. + box.grow(ngE); + + const Array4<const Real>& ex_arr = exfab->array(); + const Array4<const Real>& ey_arr = eyfab->array(); + const Array4<const Real>& ez_arr = ezfab->array(); + const Array4<const Real>& bx_arr = bxfab->array(); + const Array4<const Real>& by_arr = byfab->array(); + const Array4<const Real>& bz_arr = bzfab->array(); + + const Real * const AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; + const Real * const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; + const Real * const AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset; + + // Lower corner of tile box physical domain + const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(box, gather_lev); + + const Dim3 lo = lbound(box); + + // Depending on l_lower_in_v and WarpX::nox, call + // different versions of template function doGatherShapeN + if (WarpX::l_lower_order_in_v){ + if (WarpX::nox == 1){ + doGatherShapeN<1,1>(xp, yp, zp, + Exp.dataPtr() + offset, Eyp.dataPtr() + offset, + Ezp.dataPtr() + offset, Bxp.dataPtr() + offset, + Byp.dataPtr() + offset, Bzp.dataPtr() + offset, + ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, + np_to_gather, dx, + xyzmin, lo, stagger_shift); + } else if (WarpX::nox == 2){ + doGatherShapeN<2,1>(xp, yp, zp, + Exp.dataPtr() + offset, Eyp.dataPtr() + offset, + Ezp.dataPtr() + offset, Bxp.dataPtr() + offset, + Byp.dataPtr() + offset, Bzp.dataPtr() + offset, + ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, + np_to_gather, dx, + xyzmin, lo, stagger_shift); + } else if (WarpX::nox == 3){ + doGatherShapeN<3,1>(xp, yp, zp, + Exp.dataPtr() + offset, Eyp.dataPtr() + offset, + Ezp.dataPtr() + offset, Bxp.dataPtr() + offset, + Byp.dataPtr() + offset, Bzp.dataPtr() + offset, + ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, + np_to_gather, dx, + xyzmin, lo, stagger_shift); + } + } else { + if (WarpX::nox == 1){ + doGatherShapeN<1,0>(xp, yp, zp, + Exp.dataPtr() + offset, Eyp.dataPtr() + offset, + Ezp.dataPtr() + offset, Bxp.dataPtr() + offset, + Byp.dataPtr() + offset, Bzp.dataPtr() + offset, + ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, + np_to_gather, dx, + xyzmin, lo, stagger_shift); + } else if (WarpX::nox == 2){ + doGatherShapeN<2,0>(xp, yp, zp, + Exp.dataPtr() + offset, Eyp.dataPtr() + offset, + Ezp.dataPtr() + offset, Bxp.dataPtr() + offset, + Byp.dataPtr() + offset, Bzp.dataPtr() + offset, + ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, + np_to_gather, dx, + xyzmin, lo, stagger_shift); + } else if (WarpX::nox == 3){ + doGatherShapeN<3,0>(xp, yp, zp, + Exp.dataPtr() + offset, Eyp.dataPtr() + offset, + Ezp.dataPtr() + offset, Bxp.dataPtr() + offset, + Byp.dataPtr() + offset, Bzp.dataPtr() + offset, + ex_arr, ey_arr, ez_arr, bx_arr, by_arr, bz_arr, + np_to_gather, dx, + xyzmin, lo, stagger_shift); + } + } +} diff --git a/Source/Particles/Pusher/GetAndSetPosition.H b/Source/Particles/Pusher/GetAndSetPosition.H index 42c61343e..3c74baeb2 100644 --- a/Source/Particles/Pusher/GetAndSetPosition.H +++ b/Source/Particles/Pusher/GetAndSetPosition.H @@ -5,7 +5,7 @@ #include <WarpXParticleContainer.H> #include <AMReX_REAL.H> -#ifndef WARPX_RZ +#ifndef WARPX_DIM_RZ /* \brief Extract the particle's coordinates from the ParticleType struct `p`, * and stores them in the variables `x`, `y`, `z`. */ @@ -42,7 +42,7 @@ void SetPosition( #endif } -# else // if WARPX_RZ is True +# elif defined WARPX_DIM_RZ /* \brief Extract the particle's coordinates from `theta` and the attributes * of the ParticleType struct `p` (which contains the radius), @@ -71,6 +71,6 @@ void SetCylindricalPositionFromCartesian( p.pos(1) = z; } -#endif // WARPX_RZ +#endif // WARPX_DIM_RZ #endif // WARPX_PARTICLES_PUSHER_GETANDSETPOSITION_H_ diff --git a/Source/Particles/Pusher/Make.package b/Source/Particles/Pusher/Make.package index 8c8e77905..95a38fa2d 100644 --- a/Source/Particles/Pusher/Make.package +++ b/Source/Particles/Pusher/Make.package @@ -1,4 +1,6 @@ CEXE_headers += GetAndSetPosition.H CEXE_headers += UpdatePosition.H +CEXE_headers += UpdateMomentumBoris.H +CEXE_headers += UpdateMomentumVay.H INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Pusher VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles/Pusher diff --git a/Source/Particles/Pusher/UpdateMomentumBoris.H b/Source/Particles/Pusher/UpdateMomentumBoris.H new file mode 100644 index 000000000..71e9a8ed1 --- /dev/null +++ b/Source/Particles/Pusher/UpdateMomentumBoris.H @@ -0,0 +1,47 @@ +#ifndef WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_BORIS_H_ +#define WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_BORIS_H_ + +#include <AMReX_REAL.H> + +/* \brief Push the particle's positions over one timestep, + * given the value of its momenta `ux`, `uy`, `uz` */ +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void UpdateMomentumBoris( + amrex::Real& ux, amrex::Real& uy, amrex::Real& uz, amrex::Real& gaminv, + const amrex::Real Ex, const amrex::Real Ey, const amrex::Real Ez, + const amrex::Real Bx, const amrex::Real By, const amrex::Real Bz, + const amrex::Real q, const amrex::Real m, const amrex::Real dt ) +{ + const amrex::Real econst = 0.5*q*dt/m; + + // First half-push for E + ux += econst*Ex; + uy += econst*Ey; + uz += econst*Ez; + // Compute temporary gamma factor + constexpr amrex::Real inv_c2 = 1./(PhysConst::c*PhysConst::c); + const amrex::Real inv_gamma = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*inv_c2); + // Magnetic rotation + // - Compute temporary variables + const amrex::Real tx = econst*inv_gamma*Bx; + const amrex::Real ty = econst*inv_gamma*By; + const amrex::Real tz = econst*inv_gamma*Bz; + const amrex::Real tsqi = 2./(1. + tx*tx + ty*ty + tz*tz); + const amrex::Real sx = tx*tsqi; + const amrex::Real sy = ty*tsqi; + const amrex::Real sz = tz*tsqi; + const amrex::Real ux_p = ux + uy*tz - uz*ty; + const amrex::Real uy_p = uy + uz*tx - ux*tz; + const amrex::Real uz_p = uz + ux*ty - uy*tx; + // - Update momentum + ux += uy_p*sz - uz_p*sy; + uy += uz_p*sx - ux_p*sz; + uz += ux_p*sy - uy_p*sx; + // Second half-push for E + ux += econst*Ex; + uy += econst*Ey; + uz += econst*Ez; + gaminv = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*inv_c2); +} + +#endif // WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_BORIS_H_ diff --git a/Source/Particles/Pusher/UpdateMomentumVay.H b/Source/Particles/Pusher/UpdateMomentumVay.H new file mode 100644 index 000000000..044297e22 --- /dev/null +++ b/Source/Particles/Pusher/UpdateMomentumVay.H @@ -0,0 +1,54 @@ +#ifndef WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_VAY_H_ +#define WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_VAY_H_ + +#include <AMReX_FArrayBox.H> +#include <WarpXConst.H> +#include <AMReX_REAL.H> + +/* \brief Push the particle's positions over one timestep, + * given the value of its momenta `ux`, `uy`, `uz` */ +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void UpdateMomentumVay( + amrex::Real& ux, amrex::Real& uy, amrex::Real& uz, amrex::Real& gaminv, + const amrex::Real Ex, const amrex::Real Ey, const amrex::Real Ez, + const amrex::Real Bx, const amrex::Real By, const amrex::Real Bz, + const amrex::Real q, const amrex::Real m, const amrex::Real dt ) +{ + // Constants + const amrex::Real econst = q*dt/m; + const amrex::Real bconst = 0.5*q*dt/m; + constexpr amrex::Real invclight = 1./PhysConst::c; + constexpr amrex::Real invclightsq = 1./(PhysConst::c*PhysConst::c); + // Compute initial gamma + const amrex::Real inv_gamma = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*invclightsq); + // Get tau + const amrex::Real taux = bconst*Bx; + const amrex::Real tauy = bconst*By; + const amrex::Real tauz = bconst*Bz; + const amrex::Real tausq = taux*taux+tauy*tauy+tauz*tauz; + // Get U', gamma'^2 + const amrex::Real uxpr = ux + econst*Ex + (uy*tauz-uz*tauy)*inv_gamma; + const amrex::Real uypr = uy + econst*Ey + (uz*taux-ux*tauz)*inv_gamma; + const amrex::Real uzpr = uz + econst*Ez + (ux*tauy-uy*taux)*inv_gamma; + const amrex::Real gprsq = (1. + (uxpr*uxpr + uypr*uypr + uzpr*uzpr)*invclightsq); + // Get u* + const amrex::Real ust = (uxpr*taux + uypr*tauy + uzpr*tauz)*invclight; + // Get new gamma + const amrex::Real sigma = gprsq-tausq; + const amrex::Real gisq = 2./(sigma + std::sqrt(sigma*sigma + 4.*(tausq + ust*ust)) ); + // Get t, s + const amrex::Real bg = bconst*std::sqrt(gisq); + const amrex::Real tx = bg*Bx; + const amrex::Real ty = bg*By; + const amrex::Real tz = bg*Bz; + const amrex::Real s = 1./(1.+tausq*gisq); + // Get t.u' + const amrex::Real tu = tx*uxpr + ty*uypr + tz*uzpr; + // Get new U + ux = s*(uxpr+tx*tu+uypr*tz-uzpr*ty); + uy = s*(uypr+ty*tu+uzpr*tx-uxpr*tz); + uz = s*(uzpr+tz*tu+uxpr*ty-uypr*tx); + gaminv = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*invclightsq); +} + +#endif // WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_VAY_H_ diff --git a/Source/Particles/Pusher/UpdatePosition.H b/Source/Particles/Pusher/UpdatePosition.H index 0a4f579f4..a9df63a30 100644 --- a/Source/Particles/Pusher/UpdatePosition.H +++ b/Source/Particles/Pusher/UpdatePosition.H @@ -20,7 +20,7 @@ void UpdatePosition( const amrex::Real inv_gamma = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*inv_c2); // Update positions over one time step x += ux * inv_gamma * dt; -#if (AMREX_SPACEDIM == 3) || (defined WARPX_RZ) // RZ pushes particles in 3D +#if (AMREX_SPACEDIM == 3) || (defined WARPX_DIM_RZ) // RZ pushes particles in 3D y += uy * inv_gamma * dt; #endif z += uz * inv_gamma * dt; diff --git a/Source/Particles/RigidInjectedParticleContainer.H b/Source/Particles/RigidInjectedParticleContainer.H index 0b27a2f2f..b920ece0a 100644 --- a/Source/Particles/RigidInjectedParticleContainer.H +++ b/Source/Particles/RigidInjectedParticleContainer.H @@ -43,7 +43,7 @@ public: amrex::Real dt) override; virtual void PushPX(WarpXParIter& pti, - amrex::Cuda::ManagedDeviceVector<amrex::Real>& xp, + amrex::Cuda::ManagedDeviceVector<amrex::Real>& xp, amrex::Cuda::ManagedDeviceVector<amrex::Real>& yp, amrex::Cuda::ManagedDeviceVector<amrex::Real>& zp, amrex::Cuda::ManagedDeviceVector<amrex::Real>& giv, @@ -77,7 +77,6 @@ private: // Temporary quantites amrex::Real zinject_plane_lev; amrex::Real zinject_plane_lev_previous; - amrex::Vector<int> done_injecting_temp; bool done_injecting_lev; }; diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp index 9bd4cb4fc..36cb9d224 100644 --- a/Source/Particles/RigidInjectedParticleContainer.cpp +++ b/Source/Particles/RigidInjectedParticleContainer.cpp @@ -10,6 +10,9 @@ #include <WarpX_f.H> #include <WarpX.H> #include <WarpXConst.H> +#include <WarpXAlgorithmSelection.H> +#include <UpdateMomentumBoris.H> +#include <UpdateMomentumVay.H> using namespace amrex; @@ -204,48 +207,58 @@ RigidInjectedParticleContainer::BoostandRemapParticles() void RigidInjectedParticleContainer::PushPX(WarpXParIter& pti, - Cuda::ManagedDeviceVector<Real>& xp, + Cuda::ManagedDeviceVector<Real>& xp, Cuda::ManagedDeviceVector<Real>& yp, Cuda::ManagedDeviceVector<Real>& zp, Cuda::ManagedDeviceVector<Real>& giv, Real dt) { - if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) - { - copy_attribs(pti, xp.dataPtr(), yp.dataPtr(), zp.dataPtr()); - } - - // This wraps the call to warpx_particle_pusher so that inheritors can modify the call. + // This wraps the momentum and position advance so that inheritors can modify the call. auto& attribs = pti.GetAttribs(); auto& uxp = attribs[PIdx::ux]; auto& uyp = attribs[PIdx::uy]; auto& uzp = attribs[PIdx::uz]; - auto& Exp = attribs[PIdx::Ex]; - auto& Eyp = attribs[PIdx::Ey]; - auto& Ezp = attribs[PIdx::Ez]; - auto& Bxp = attribs[PIdx::Bx]; - auto& Byp = attribs[PIdx::By]; - auto& Bzp = attribs[PIdx::Bz]; - const long np = pti.numParticles(); // Save the position and momenta, making copies Cuda::ManagedDeviceVector<Real> xp_save, yp_save, zp_save; RealVector uxp_save, uyp_save, uzp_save; + Real* const AMREX_RESTRICT x = xp.dataPtr(); + Real* const AMREX_RESTRICT y = yp.dataPtr(); + Real* const AMREX_RESTRICT z = zp.dataPtr(); + Real* const AMREX_RESTRICT gi = giv.dataPtr(); + Real* const AMREX_RESTRICT ux = uxp.dataPtr(); + Real* const AMREX_RESTRICT uy = uyp.dataPtr(); + Real* const AMREX_RESTRICT uz = uzp.dataPtr(); + Real* const AMREX_RESTRICT Exp = attribs[PIdx::Ex].dataPtr(); + Real* const AMREX_RESTRICT Eyp = attribs[PIdx::Ey].dataPtr(); + Real* const AMREX_RESTRICT Ezp = attribs[PIdx::Ez].dataPtr(); + Real* const AMREX_RESTRICT Bxp = attribs[PIdx::Bx].dataPtr(); + Real* const AMREX_RESTRICT Byp = attribs[PIdx::By].dataPtr(); + Real* const AMREX_RESTRICT Bzp = attribs[PIdx::Bz].dataPtr(); + if (!done_injecting_lev) { - xp_save = xp; - yp_save = yp; - zp_save = zp; - uxp_save = uxp; - uyp_save = uyp; - uzp_save = uzp; + if (!(WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)) { + // If the old values are not already saved, create copies here. + xp_save = xp; + yp_save = yp; + zp_save = zp; + uxp_save = uxp; + uyp_save = uyp; + uzp_save = uzp; + } + // Scale the fields of particles about to cross the injection plane. // This only approximates what should be happening. The particles // should by advanced a fraction of a time step instead. // Scaling the fields is much easier and may be good enough. - for (int i=0 ; i < zp.size() ; i++) { - const Real dtscale = dt - (zinject_plane_lev_previous - zp[i])/(vzbeam_ave_boosted + WarpX::beta_boost*PhysConst::c); + const Real v_boost = WarpX::beta_boost*PhysConst::c; + const Real z_plane_previous = zinject_plane_lev_previous; + const Real vz_ave_boosted = vzbeam_ave_boosted; + amrex::ParallelFor( pti.numParticles(), + [=] AMREX_GPU_DEVICE (long i) { + const Real dtscale = dt - (z_plane_previous - z[i])/(vz_ave_boosted + v_boost); if (0. < dtscale && dtscale < dt) { Exp[i] *= dtscale; Eyp[i] *= dtscale; @@ -255,46 +268,60 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti, Bzp[i] *= dtscale; } } + ); } - warpx_particle_pusher(&np, - xp.dataPtr(), - yp.dataPtr(), - zp.dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - giv.dataPtr(), - Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(), - Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(), - &this->charge, &this->mass, &dt, - &WarpX::particle_pusher_algo); + PhysicalParticleContainer::PushPX(pti, xp, yp, zp, giv, dt); if (!done_injecting_lev) { -#ifdef _OPENMP - const int tid = omp_get_thread_num(); -#else - const int tid = 0; -#endif + + Real* AMREX_RESTRICT x_save; + Real* AMREX_RESTRICT y_save; + Real* AMREX_RESTRICT z_save; + Real* AMREX_RESTRICT ux_save; + Real* AMREX_RESTRICT uy_save; + Real* AMREX_RESTRICT uz_save; + if (!(WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)) { + x_save = xp_save.dataPtr(); + y_save = yp_save.dataPtr(); + z_save = zp_save.dataPtr(); + ux_save = uxp_save.dataPtr(); + uy_save = uyp_save.dataPtr(); + uz_save = uzp_save.dataPtr(); + } else { + x_save = pti.GetAttribs(particle_comps["xold"]).dataPtr(); + y_save = pti.GetAttribs(particle_comps["yold"]).dataPtr(); + z_save = pti.GetAttribs(particle_comps["zold"]).dataPtr(); + ux_save = pti.GetAttribs(particle_comps["uxold"]).dataPtr(); + uy_save = pti.GetAttribs(particle_comps["uyold"]).dataPtr(); + uz_save = pti.GetAttribs(particle_comps["uzold"]).dataPtr(); + } + // Undo the push for particles not injected yet. // The zp are advanced a fixed amount. - for (int i=0 ; i < zp.size() ; i++) { - if (zp[i] <= zinject_plane_lev) { - uxp[i] = uxp_save[i]; - uyp[i] = uyp_save[i]; - uzp[i] = uzp_save[i]; - giv[i] = 1./std::sqrt(1. + (uxp[i]*uxp[i] + uyp[i]*uyp[i] + uzp[i]*uzp[i])/(PhysConst::c*PhysConst::c)); - xp[i] = xp_save[i]; - yp[i] = yp_save[i]; - if (rigid_advance) { - zp[i] = zp_save[i] + dt*vzbeam_ave_boosted; + const Real z_plane_lev = zinject_plane_lev; + const Real vz_ave_boosted = vzbeam_ave_boosted; + const bool rigid = rigid_advance; + const Real inv_csq = 1./(PhysConst::c*PhysConst::c); + amrex::ParallelFor( pti.numParticles(), + [=] AMREX_GPU_DEVICE (long i) { + if (z[i] <= z_plane_lev) { + ux[i] = ux_save[i]; + uy[i] = uy_save[i]; + uz[i] = uz_save[i]; + gi[i] = 1./std::sqrt(1. + (ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i])*inv_csq); + x[i] = x_save[i]; + y[i] = y_save[i]; + if (rigid) { + z[i] = z_save[i] + dt*vz_ave_boosted; } else { - zp[i] = zp_save[i] + dt*uzp[i]*giv[i]; + z[i] = z_save[i] + dt*uz[i]*gi[i]; } - done_injecting_temp[tid] = 0; } } + ); } - } void @@ -314,28 +341,26 @@ RigidInjectedParticleContainer::Evolve (int lev, zinject_plane_levels[lev] -= dt*WarpX::beta_boost*PhysConst::c; zinject_plane_lev = zinject_plane_levels[lev]; - // Setup check of whether more particles need to be injected -#ifdef _OPENMP - const int nthreads = omp_get_max_threads(); -#else - const int nthreads = 1; -#endif - done_injecting_temp.assign(nthreads, 1); // We do not use bool because vector<bool> is special. + // Set the done injecting flag whan the inject plane moves out of the + // simulation domain. + // It is much easier to do this check, rather than checking if all of the + // particles have crossed the inject plane. + const Real* plo = Geom(lev).ProbLo(); + const Real* phi = Geom(lev).ProbHi(); + const int zdir = AMREX_SPACEDIM-1; + done_injecting[lev] = ((zinject_plane_levels[lev] < plo[zdir] && WarpX::moving_window_v + WarpX::beta_boost*PhysConst::c >= 0.) || + (zinject_plane_levels[lev] > phi[zdir] && WarpX::moving_window_v + WarpX::beta_boost*PhysConst::c <= 0.)); done_injecting_lev = done_injecting[lev]; PhysicalParticleContainer::Evolve (lev, - Ex, Ey, Ez, - Bx, By, Bz, - jx, jy, jz, + Ex, Ey, Ez, + Bx, By, Bz, + jx, jy, jz, cjx, cjy, cjz, rho, crho, cEx, cEy, cEz, cBx, cBy, cBz, t, dt); - - // Check if all done_injecting_temp are still true. - done_injecting[lev] = std::all_of(done_injecting_temp.begin(), done_injecting_temp.end(), - [](int i) -> bool { return i; }); } void @@ -343,6 +368,8 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt, const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez, const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz) { + BL_PROFILE("RigidInjectedParticleContainer::PushP"); + if (do_not_push) return; const std::array<Real,3>& dx = WarpX::CellSize(lev); @@ -351,8 +378,11 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt, #pragma omp parallel #endif { - Cuda::ManagedDeviceVector<Real> xp, yp, zp, giv; - +#ifdef _OPENMP + int thread_num = omp_get_thread_num(); +#else + int thread_num = 0; +#endif for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { const Box& box = pti.validbox(); @@ -386,65 +416,74 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt, Byp.assign(np,WarpX::B_external[1]); Bzp.assign(np,WarpX::B_external[2]); - giv.resize(np); + m_giv[thread_num].resize(np); // // copy data from particle container to temp arrays // - pti.GetPosition(xp, yp, zp); + pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); - const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); - const int* ixyzmin_grid = box.loVect(); - - const int ll4symtry = false; - const int l_lower_order_in_v = true; - long lvect_fieldgathe = 64; - warpx_geteb_energy_conserving( - &np, - xp.dataPtr(), - yp.dataPtr(), - zp.dataPtr(), - Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(), - Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(), - ixyzmin_grid, - &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2], - &dx[0], &dx[1], &dx[2], - &WarpX::nox, &WarpX::noy, &WarpX::noz, - BL_TO_FORTRAN_ANYD(exfab), - BL_TO_FORTRAN_ANYD(eyfab), - BL_TO_FORTRAN_ANYD(ezfab), - BL_TO_FORTRAN_ANYD(bxfab), - BL_TO_FORTRAN_ANYD(byfab), - BL_TO_FORTRAN_ANYD(bzfab), - &ll4symtry, &l_lower_order_in_v, &WarpX::do_nodal, - &lvect_fieldgathe, &WarpX::field_gathering_algo); + int e_is_nodal = Ex.is_nodal() and Ey.is_nodal() and Ez.is_nodal(); + FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp, + &exfab, &eyfab, &ezfab, &bxfab, &byfab, &bzfab, + Ex.nGrow(), e_is_nodal, 0, np, thread_num, lev, lev); // Save the position and momenta, making copies auto uxp_save = uxp; auto uyp_save = uyp; auto uzp_save = uzp; - warpx_particle_pusher_momenta(&np, - xp.dataPtr(), - yp.dataPtr(), - zp.dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - giv.dataPtr(), - Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(), - Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(), - &this->charge, &this->mass, &dt, - &WarpX::particle_pusher_algo); + // This wraps the momentum advance so that inheritors can modify the call. + // Extract pointers to the different particle quantities + const Real* const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr(); + Real* const AMREX_RESTRICT gi = m_giv[thread_num].dataPtr(); + Real* const AMREX_RESTRICT uxpp = uxp.dataPtr(); + Real* const AMREX_RESTRICT uypp = uyp.dataPtr(); + Real* const AMREX_RESTRICT uzpp = uzp.dataPtr(); + const Real* const AMREX_RESTRICT Expp = Exp.dataPtr(); + const Real* const AMREX_RESTRICT Eypp = Eyp.dataPtr(); + const Real* const AMREX_RESTRICT Ezpp = Ezp.dataPtr(); + const Real* const AMREX_RESTRICT Bxpp = Bxp.dataPtr(); + const Real* const AMREX_RESTRICT Bypp = Byp.dataPtr(); + const Real* const AMREX_RESTRICT Bzpp = Bzp.dataPtr(); + + // Loop over the particles and update their momentum + const Real q = this->charge; + const Real m = this->mass; + if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Boris){ + amrex::ParallelFor( pti.numParticles(), + [=] AMREX_GPU_DEVICE (long i) { + UpdateMomentumBoris( uxpp[i], uypp[i], uzpp[i], gi[i], + Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt); + } + ); + } else if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Vay) { + amrex::ParallelFor( pti.numParticles(), + [=] AMREX_GPU_DEVICE (long i) { + UpdateMomentumVay( uxpp[i], uypp[i], uzpp[i], gi[i], + Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt); + } + ); + } else { + amrex::Abort("Unknown particle pusher"); + }; // Undo the push for particles not injected yet. // It is assumed that PushP will only be called on the first and last steps // and that no particles will cross zinject_plane. - for (int i=0 ; i < zp.size() ; i++) { - if (zp[i] <= zinject_plane_levels[lev]) { - uxp[i] = uxp_save[i]; - uyp[i] = uyp_save[i]; - uzp[i] = uzp_save[i]; + const Real* const AMREX_RESTRICT ux_save = uxp_save.dataPtr(); + const Real* const AMREX_RESTRICT uy_save = uyp_save.dataPtr(); + const Real* const AMREX_RESTRICT uz_save = uzp_save.dataPtr(); + const Real zz = zinject_plane_levels[lev]; + amrex::ParallelFor( pti.numParticles(), + [=] AMREX_GPU_DEVICE (long i) { + if (zp[i] <= zz) { + uxpp[i] = ux_save[i]; + uypp[i] = uy_save[i]; + uzpp[i] = uz_save[i]; } } + ); } } diff --git a/Source/Particles/ShapeFactors.H b/Source/Particles/ShapeFactors.H new file mode 100644 index 000000000..9d185714a --- /dev/null +++ b/Source/Particles/ShapeFactors.H @@ -0,0 +1,117 @@ +#ifndef SHAPEFACTORS_H_ +#define SHAPEFACTORS_H_ + +// Compute shape factor and return index of leftmost cell where +// particle writes. +// Specialized templates are defined below for orders 0 to 3. +template <int depos_order> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shape_factor(amrex::Real* const sx, amrex::Real xint) +{ + return 0; +}; + +// Compute shape factor for order 0. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shape_factor <0> (amrex::Real* const sx, amrex::Real xmid){ + const int j = (int) (xmid+0.5); + sx[0] = 1.0; + return j; +} + +// Compute shape factor for order 1. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shape_factor <1> (amrex::Real* const sx, amrex::Real xmid){ + const int j = (int) xmid; + const amrex::Real xint = xmid-j; + sx[0] = 1.0 - xint; + sx[1] = xint; + return j; +} + +// Compute shape factor for order 2. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shape_factor <2> (amrex::Real* const sx, amrex::Real xmid){ + const int j = (int) (xmid+0.5); + const amrex::Real xint = xmid-j; + sx[0] = 0.5*(0.5-xint)*(0.5-xint); + sx[1] = 0.75-xint*xint; + sx[2] = 0.5*(0.5+xint)*(0.5+xint); + // index of the leftmost cell where particle deposits + return j-1; +} + +// Compute shape factor for order 3. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shape_factor <3> (amrex::Real* const sx, amrex::Real xmid){ + const int j = (int) xmid; + const amrex::Real xint = xmid-j; + sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); + sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0); + sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); + sx[3] = 1.0/6.0*xint*xint*xint; + // index of the leftmost cell where particle deposits + return j-1; +} + +// Compute shifted shape factor and return index of leftmost cell where +// particle writes, for Esirkepov algorithm. +// Specialized templates are defined below for orders 1, 2 and 3. +template <int depos_order> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shifted_shape_factor (amrex::Real* const sx, + const amrex::Real x_old, + const int i_new); + +// Compute shape factor for order 1. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shifted_shape_factor <1> (amrex::Real* const sx, + const amrex::Real x_old, + const int i_new){ + const int i = (int) x_old; + const int i_shift = i - i_new; + const amrex::Real xint = x_old - i; + sx[1+i_shift] = 1.0 - xint; + sx[2+i_shift] = xint; + return i; +} + +// Compute shape factor for order 2. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shifted_shape_factor <2> (amrex::Real* const sx, + const amrex::Real x_old, + const int i_new){ + const int i = (int) (x_old+0.5); + const int i_shift = i - (i_new + 1); + const amrex::Real xint = x_old - i; + sx[1+i_shift] = 0.5*(0.5-xint)*(0.5-xint); + sx[2+i_shift] = 0.75-xint*xint; + sx[3+i_shift] = 0.5*(0.5+xint)*(0.5+xint); + // index of the leftmost cell where particle deposits + return i-1; +} + +// Compute shape factor for order 3. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shifted_shape_factor <3> (amrex::Real* const sx, + const amrex::Real x_old, + const int i_new){ + const int i = (int) x_old; + const int i_shift = i - (i_new + 1); + const amrex::Real xint = x_old - i; + sx[1+i_shift] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); + sx[2+i_shift] = 2.0/3.0-xint*xint*(1-xint/2.0); + sx[3+i_shift] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); + sx[4+i_shift] = 1.0/6.0*xint*xint*xint; + // index of the leftmost cell where particle deposits + return i-1; +} + +#endif // SHAPEFACTORS_H_ diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H index 662b2e1b8..ac5b47ada 100644 --- a/Source/Particles/WarpXParticleContainer.H +++ b/Source/Particles/WarpXParticleContainer.H @@ -13,7 +13,7 @@ struct PIdx enum { // Particle Attributes stored in amrex::ParticleContainer's struct of array w = 0, // weight ux, uy, uz, Ex, Ey, Ez, Bx, By, Bz, -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ theta, // RZ needs all three position components #endif nattribs @@ -104,8 +104,9 @@ public: const amrex::Vector<std::unique_ptr<amrex::FabArray<amrex::BaseFab<int> > > >& masks) {} virtual void FieldGather (int lev, - const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, const amrex::MultiFab& Ez, - const amrex::MultiFab& Bx, const amrex::MultiFab& By, const amrex::MultiFab& Bz) {} + const amrex::MultiFab& Ex, const amrex::MultiFab& Ey, + const amrex::MultiFab& Ez, const amrex::MultiFab& Bx, + const amrex::MultiFab& By, const amrex::MultiFab& Bz) {} #ifdef WARPX_DO_ELECTROSTATIC virtual void EvolveES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E, @@ -166,13 +167,13 @@ public: virtual void DepositCharge(WarpXParIter& pti, RealVector& wp, - amrex::MultiFab* rhomf, - amrex::MultiFab* crhomf, + amrex::MultiFab* rho, int icomp, - const long np_current, - const long np, + const long offset, + const long np_to_depose, int thread_num, - int lev ); + int lev, + int depos_lev); virtual void DepositCurrent(WarpXParIter& pti, RealVector& wp, diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index a20f0035e..befa5cfed 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -12,6 +12,7 @@ #include <GetAndSetPosition.H> #include <UpdatePosition.H> #include <CurrentDeposition.H> +#include <ChargeDeposition.H> using namespace amrex; @@ -27,7 +28,7 @@ void WarpXParIter::GetPosition (Cuda::ManagedDeviceVector<Real>& x, Cuda::ManagedDeviceVector<Real>& y, Cuda::ManagedDeviceVector<Real>& z) const { amrex::ParIter<0,0,PIdx::nattribs>::GetPosition(x, z); -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ const auto& attribs = GetAttribs(); const auto& theta = attribs[PIdx::theta]; y.resize(x.size()); @@ -44,10 +45,10 @@ WarpXParIter::GetPosition (Cuda::ManagedDeviceVector<Real>& x, Cuda::ManagedDevi void WarpXParIter::SetPosition (const Cuda::ManagedDeviceVector<Real>& x, const Cuda::ManagedDeviceVector<Real>& y, const Cuda::ManagedDeviceVector<Real>& z) { -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ auto& attribs = GetAttribs(); auto& theta = attribs[PIdx::theta]; - Cuda::DeviceVector<Real> r(x.size()); + Cuda::ManagedDeviceVector<Real> r(x.size()); for (unsigned int i=0 ; i < x.size() ; i++) { theta[i] = std::atan2(y[i], x[i]); r[i] = std::sqrt(x[i]*x[i] + y[i]*y[i]); @@ -80,7 +81,7 @@ WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies) particle_comps["Bx"] = PIdx::Bx; particle_comps["By"] = PIdx::By; particle_comps["Bz"] = PIdx::Bz; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ particle_comps["theta"] = PIdx::theta; #endif @@ -163,7 +164,7 @@ WarpXParticleContainer::AddOneParticle (ParticleTileType& particle_tile, p.pos(1) = y; p.pos(2) = z; #elif (AMREX_SPACEDIM == 2) -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ attribs[PIdx::theta] = std::atan2(y, x); x = std::sqrt(x*x + y*y); #endif @@ -209,7 +210,7 @@ WarpXParticleContainer::AddNParticles (int lev, std::size_t np = iend-ibegin; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ Vector<Real> theta(np); #endif @@ -228,7 +229,7 @@ WarpXParticleContainer::AddNParticles (int lev, p.pos(1) = y[i]; p.pos(2) = z[i]; #elif (AMREX_SPACEDIM == 2) -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ theta[i-ibegin] = std::atan2(y[i], x[i]); p.pos(0) = std::sqrt(x[i]*x[i] + y[i]*y[i]); #else @@ -265,7 +266,7 @@ WarpXParticleContainer::AddNParticles (int lev, for (int comp = PIdx::uz+1; comp < PIdx::nattribs; ++comp) { -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ if (comp == PIdx::theta) { particle_tile.push_back_real(comp, theta.front(), theta.back()); } @@ -394,14 +395,6 @@ WarpXParticleContainer::DepositCurrentFortran(WarpXParIter& pti, &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, &lvect,&WarpX::current_deposition_algo); -#ifdef WARPX_RZ - // Rescale current in r-z mode - warpx_current_deposition_rz_volume_scaling( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &xyzmin[0], &dx[0]); -#endif BL_PROFILE_VAR_STOP(blp_pxr_cd); #ifndef AMREX_USE_GPU @@ -503,7 +496,8 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset; // Lower corner of tile box physical domain - const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; + // Note that this includes guard cells since it is after tilebox.ngrow + const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev); // xyzmin is built on pti.tilebox(), so it does // not include staggering, so the stagger_shift has to be done by hand. // Alternatively, we could define xyzminx from tbx (and the same for 3 @@ -513,36 +507,36 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) { if (WarpX::nox == 1){ - doEsirkepovDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + doEsirkepovDepositionShapeN<1>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo, q); } else if (WarpX::nox == 2){ - doEsirkepovDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + doEsirkepovDepositionShapeN<2>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo, q); } else if (WarpX::nox == 3){ - doEsirkepovDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + doEsirkepovDepositionShapeN<3>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo, q); } } else { if (WarpX::nox == 1){ - doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, + doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, + jz_arr, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } else if (WarpX::nox == 2){ - doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, + doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, + jz_arr, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } else if (WarpX::nox == 3){ - doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, + doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, + jz_arr, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } } @@ -559,140 +553,87 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, } void -WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, - MultiFab* rhomf, MultiFab* crhomf, int icomp, - const long np_current, - const long np, int thread_num, int lev ) +WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector& wp, + MultiFab* rho, int icomp, + const long offset, const long np_to_depose, + int thread_num, int lev, int depos_lev) { + AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) || + (depos_lev==(lev )), + "Deposition buffers only work for lev-1"); - BL_PROFILE_VAR_NS("PICSAR::ChargeDeposition", blp_pxr_chd); - BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); - - const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); - const long lvect = 8; + // If no particles, do not do anything + if (np_to_depose == 0) return; - long ngRho = rhomf->nGrow(); - Real* data_ptr; - Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector()); + const long ngRho = rho->nGrow(); + const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0)); + const Real q = this->charge; - const std::array<Real,3>& dx = WarpX::CellSize(lev); - const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0)); + BL_PROFILE_VAR_NS("PPC::ChargeDeposition", blp_ppc_chd); + BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); - // Deposit charge for particles that are not in the current buffers - if (np_current > 0) - { - const std::array<Real, 3>& xyzmin = xyzmin_tile; + // Get tile box where charge is deposited. + // The tile box is different when depositing in the buffers (depos_lev<lev) + // or when depositing inside the level (depos_lev=lev) + Box tilebox; + if (lev == depos_lev) { + tilebox = pti.tilebox(); + } else { + const IntVect& ref_ratio = WarpX::RefRatio(depos_lev); + tilebox = amrex::coarsen(pti.tilebox(),ref_ratio); + } + + tilebox.grow(ngRho); #ifdef AMREX_USE_GPU - data_ptr = (*rhomf)[pti].dataPtr(icomp); - auto rholen = (*rhomf)[pti].length(); + // No tiling on GPU: rho_arr points to the full rho array. + MultiFab rhoi(*rho, amrex::make_alias, icomp, 1); + Array4<Real> const& rho_arr = rhoi.array(pti); #else - tile_box.grow(ngRho); - local_rho[thread_num].resize(tile_box); + // Tiling is on: rho_arr points to local_rho[thread_num] + const Box tb = amrex::convert(tilebox, IntVect::TheUnitVector()); - data_ptr = local_rho[thread_num].dataPtr(); - auto rholen = local_rho[thread_num].length(); + local_rho[thread_num].resize(tb); - local_rho[thread_num].setVal(0.0); -#endif + // local_rho[thread_num] is set to zero + local_rho[thread_num].setVal(0.0); -#if (AMREX_SPACEDIM == 3) - const long nx = rholen[0]-1-2*ngRho; - const long ny = rholen[1]-1-2*ngRho; - const long nz = rholen[2]-1-2*ngRho; -#else - const long nx = rholen[0]-1-2*ngRho; - const long ny = 0; - const long nz = rholen[1]-1-2*ngRho; + Array4<Real> const& rho_arr = local_rho[thread_num].array(); #endif - BL_PROFILE_VAR_START(blp_pxr_chd); - warpx_charge_deposition(data_ptr, &np_current, - m_xp[thread_num].dataPtr(), - m_yp[thread_num].dataPtr(), - m_zp[thread_num].dataPtr(), - wp.dataPtr(), - &this->charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dx[0], &dx[1], &dx[2], &nx, &ny, &nz, - &ngRho, &ngRho, &ngRho, - &WarpX::nox,&WarpX::noy,&WarpX::noz, - &lvect, &WarpX::charge_deposition_algo); -#ifdef WARPX_RZ - warpx_charge_deposition_rz_volume_scaling( - data_ptr, &ngRho, rholen.getVect(), - &xyzmin[0], &dx[0]); -#endif - BL_PROFILE_VAR_STOP(blp_pxr_chd); - -#ifndef AMREX_USE_GPU - BL_PROFILE_VAR_START(blp_accumulate); - - (*rhomf)[pti].atomicAdd(local_rho[thread_num], tile_box, tile_box, 0, icomp, 1); - - BL_PROFILE_VAR_STOP(blp_accumulate); -#endif - } - - // Deposit charge for particles that are in the current buffers - if (np_current < np) - { - const IntVect& ref_ratio = WarpX::RefRatio(lev-1); - const Box& ctilebox = amrex::coarsen(pti.tilebox(), ref_ratio); - const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); - -#ifdef AMREX_USE_GPU - data_ptr = (*crhomf)[pti].dataPtr(icomp); - auto rholen = (*crhomf)[pti].length(); -#else - tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector()); - tile_box.grow(ngRho); - local_rho[thread_num].resize(tile_box); - - data_ptr = local_rho[thread_num].dataPtr(); - auto rholen = local_rho[thread_num].length(); + // GPU, no tiling: deposit directly in rho + // CPU, tiling: deposit into local_rho - local_rho[thread_num].setVal(0.0); -#endif + Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; + Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; + Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset; -#if (AMREX_SPACEDIM == 3) - const long nx = rholen[0]-1-2*ngRho; - const long ny = rholen[1]-1-2*ngRho; - const long nz = rholen[2]-1-2*ngRho; -#else - const long nx = rholen[0]-1-2*ngRho; - const long ny = 0; - const long nz = rholen[1]-1-2*ngRho; -#endif + // Lower corner of tile box physical domain + // Note that this includes guard cells since it is after tilebox.ngrow + const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev); + // Indices of the lower bound + const Dim3 lo = lbound(tilebox); - long ncrse = np - np_current; - BL_PROFILE_VAR_START(blp_pxr_chd); - warpx_charge_deposition(data_ptr, &ncrse, - m_xp[thread_num].dataPtr() + np_current, - m_yp[thread_num].dataPtr() + np_current, - m_zp[thread_num].dataPtr() + np_current, - wp.dataPtr() + np_current, - &this->charge, - &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], - &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz, - &ngRho, &ngRho, &ngRho, - &WarpX::nox,&WarpX::noy,&WarpX::noz, - &lvect, &WarpX::charge_deposition_algo); -#ifdef WARPX_RZ - warpx_charge_deposition_rz_volume_scaling( - data_ptr, &ngRho, rholen.getVect(), - &cxyzmin_tile[0], &cdx[0]); -#endif - BL_PROFILE_VAR_STOP(blp_pxr_chd); + BL_PROFILE_VAR_START(blp_ppc_chd); + if (WarpX::nox == 1){ + doChargeDepositionShapeN<1>(xp, yp, zp, wp.dataPtr()+offset, rho_arr, + np_to_depose, dx, xyzmin, lo, q); + } else if (WarpX::nox == 2){ + doChargeDepositionShapeN<2>(xp, yp, zp, wp.dataPtr()+offset, rho_arr, + np_to_depose, dx, xyzmin, lo, q); + } else if (WarpX::nox == 3){ + doChargeDepositionShapeN<3>(xp, yp, zp, wp.dataPtr()+offset, rho_arr, + np_to_depose, dx, xyzmin, lo, q); + } + BL_PROFILE_VAR_STOP(blp_ppc_chd); #ifndef AMREX_USE_GPU - BL_PROFILE_VAR_START(blp_accumulate); + BL_PROFILE_VAR_START(blp_accumulate); - (*crhomf)[pti].atomicAdd(local_rho[thread_num], tile_box, tile_box, 0, icomp, 1); + (*rho)[pti].atomicAdd(local_rho[thread_num], tb, tb, 0, icomp, 1); - BL_PROFILE_VAR_STOP(blp_accumulate); + BL_PROFILE_VAR_STOP(blp_accumulate); #endif - } -}; +} void WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho, bool local) @@ -769,8 +710,6 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local) BoxArray nba = ba; nba.surroundingNodes(); - const std::array<Real,3>& dx = WarpX::CellSize(lev); - const int ng = WarpX::nox; auto rho = std::unique_ptr<MultiFab>(new MultiFab(nba,dm,1,ng)); @@ -780,75 +719,28 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local) #pragma omp parallel { #endif - Cuda::ManagedDeviceVector<Real> xp, yp, zp; #ifdef _OPENMP - FArrayBox rho_loc; + int thread_num = omp_get_thread_num(); +#else + int thread_num = 0; #endif for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { + const long np = pti.numParticles(); auto& wp = pti.GetAttribs(PIdx::w); - const long np = pti.numParticles(); - - pti.GetPosition(xp, yp, zp); + pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); - // Data on the grid - Real* data_ptr; - FArrayBox& rhofab = (*rho)[pti]; + DepositCharge(pti, wp, rho.get(), 0, 0, np, thread_num, lev, lev); + } #ifdef _OPENMP - const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); - Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector()); - const std::array<Real, 3>& xyzmin = xyzmin_tile; - tile_box.grow(ng); - rho_loc.resize(tile_box); - rho_loc = 0.0; - data_ptr = rho_loc.dataPtr(); - auto rholen = rho_loc.length(); -#else - const Box& box = pti.validbox(); - const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); - const std::array<Real, 3>& xyzmin = xyzmin_grid; - data_ptr = rhofab.dataPtr(); - auto rholen = rhofab.length(); -#endif - -#if (AMREX_SPACEDIM == 3) - const long nx = rholen[0]-1-2*ng; - const long ny = rholen[1]-1-2*ng; - const long nz = rholen[2]-1-2*ng; -#else - const long nx = rholen[0]-1-2*ng; - const long ny = 0; - const long nz = rholen[1]-1-2*ng; -#endif - - long nxg = ng; - long nyg = ng; - long nzg = ng; - long lvect = 8; - - warpx_charge_deposition(data_ptr, - &np, - xp.dataPtr(), - yp.dataPtr(), - zp.dataPtr(), wp.dataPtr(), - &this->charge, &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dx[0], &dx[1], &dx[2], &nx, &ny, &nz, - &nxg, &nyg, &nzg, &WarpX::nox,&WarpX::noy,&WarpX::noz, - &lvect, &WarpX::charge_deposition_algo); -#ifdef WARPX_RZ - long ngRho = WarpX::nox; - warpx_charge_deposition_rz_volume_scaling( - data_ptr, &ngRho, rholen.getVect(), - &xyzmin[0], &dx[0]); + } #endif -#ifdef _OPENMP - rhofab.atomicAdd(rho_loc); - } +#ifdef WARPX_DIM_RZ + WarpX::GetInstance().ApplyInverseVolumeScalingToChargeDensity(rho.get(), lev); #endif - } if (!local) rho->SumBoundary(gm.periodicity()); @@ -1022,7 +914,7 @@ WarpXParticleContainer::PushX (int lev, Real dt) Real* AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr(); Real* AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr(); Real* AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr(); -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ Real* AMREX_RESTRICT theta = attribs[PIdx::theta].dataPtr(); #endif // Loop over the particles and update their position @@ -1030,12 +922,12 @@ WarpXParticleContainer::PushX (int lev, Real dt) [=] AMREX_GPU_DEVICE (long i) { ParticleType& p = pstructs[i]; // Particle object that gets updated Real x, y, z; // Temporary variables -#ifndef WARPX_RZ +#ifndef WARPX_DIM_RZ GetPosition( x, y, z, p ); // Initialize x, y, z UpdatePosition( x, y, z, ux[i], uy[i], uz[i], dt); SetPosition( p, x, y, z ); // Update the object p #else - // For WARPX_RZ, the particles are still pushed in 3D Cartesian + // For WARPX_DIM_RZ, the particles are still pushed in 3D Cartesian GetCartesianPositionFromCylindrical( x, y, z, p, theta[i] ); UpdatePosition( x, y, z, ux[i], uy[i], uz[i], dt); SetCylindricalPositionFromCartesian( p, theta[i], x, y, z ); diff --git a/Source/Utils/WarpXAlgorithmSelection.H b/Source/Utils/WarpXAlgorithmSelection.H index 3fb23698a..6a32513b7 100644 --- a/Source/Utils/WarpXAlgorithmSelection.H +++ b/Source/Utils/WarpXAlgorithmSelection.H @@ -34,11 +34,9 @@ struct CurrentDepositionAlgo { }; struct ChargeDepositionAlgo { - // These numbers corresponds to the algorithm code in WarpX's - // `warpx_charge_deposition` function + // Only the Standard algorithm is implemented enum { - Vectorized = 0, - Standard = 1 + Standard = 0 }; }; diff --git a/Source/Utils/WarpXAlgorithmSelection.cpp b/Source/Utils/WarpXAlgorithmSelection.cpp index 2c8038ccd..842085a36 100644 --- a/Source/Utils/WarpXAlgorithmSelection.cpp +++ b/Source/Utils/WarpXAlgorithmSelection.cpp @@ -8,7 +8,7 @@ const std::map<std::string, int> maxwell_solver_algo_to_int = { {"yee", MaxwellSolverAlgo::Yee }, -#ifndef WARPX_RZ // Not available in RZ +#ifndef WARPX_DIM_RZ // Not available in RZ {"ckc", MaxwellSolverAlgo::CKC }, #endif {"default", MaxwellSolverAlgo::Yee } @@ -31,12 +31,7 @@ const std::map<std::string, int> current_deposition_algo_to_int = { const std::map<std::string, int> charge_deposition_algo_to_int = { {"standard", ChargeDepositionAlgo::Standard }, -#if (!defined AMREX_USE_GPU)&&(AMREX_SPACEDIM == 3) // Only available on CPU and 3D - {"vectorized", ChargeDepositionAlgo::Vectorized }, - {"default", ChargeDepositionAlgo::Vectorized } -#else {"default", ChargeDepositionAlgo::Standard } -#endif }; const std::map<std::string, int> gathering_algo_to_int = { diff --git a/Source/WarpX.H b/Source/WarpX.H index a25eef9e4..927cc1f32 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -152,12 +152,12 @@ public: BilinearFilter bilinear_filter; amrex::Vector< std::unique_ptr<NCIGodfreyFilter> > nci_godfrey_filter_exeybz; amrex::Vector< std::unique_ptr<NCIGodfreyFilter> > nci_godfrey_filter_bxbyez; - + static int num_mirrors; amrex::Vector<amrex::Real> mirror_z; amrex::Vector<amrex::Real> mirror_z_width; amrex::Vector<int> mirror_z_npoints; - + void applyMirrors(amrex::Real time); void ComputeDt (); @@ -178,6 +178,16 @@ public: void EvolveE (int lev, PatchType patch_type, amrex::Real dt); void EvolveF (int lev, PatchType patch_type, amrex::Real dt, DtType dt_type); +#ifdef WARPX_DIM_RZ + void ApplyInverseVolumeScalingToCurrentDensity(amrex::MultiFab* Jx, + amrex::MultiFab* Jy, + amrex::MultiFab* Jz, + int lev); + + void ApplyInverseVolumeScalingToChargeDensity(amrex::MultiFab* Rho, + int lev); +#endif + void DampPML (); void DampPML (int lev); void DampPML (int lev, PatchType patch_type); @@ -247,6 +257,7 @@ public: static int do_moving_window; static int moving_window_dir; + static amrex::Real moving_window_v; // slice generation // void InitializeSliceMultiFabs (); @@ -489,17 +500,18 @@ private: int do_pml = 1; int pml_ncell = 10; int pml_delta = 10; + amrex::IntVect do_pml_Lo = amrex::IntVect::TheUnitVector(); + amrex::IntVect do_pml_Hi = amrex::IntVect::TheUnitVector(); amrex::Vector<std::unique_ptr<PML> > pml; amrex::Real moving_window_x = std::numeric_limits<amrex::Real>::max(); - amrex::Real moving_window_v = std::numeric_limits<amrex::Real>::max(); amrex::Real current_injection_position = 0; // Plasma injection parameters int warpx_do_continuous_injection = 0; int num_injected_species = -1; amrex::Vector<int> injected_plasma_species; - + int do_electrostatic = 0; int n_buffer = 4; amrex::Real const_dt = 0.5e-11; diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 1f5ade13a..1b653fd7f 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -30,6 +30,7 @@ Vector<Real> WarpX::B_external(3, 0.0); int WarpX::do_moving_window = 0; int WarpX::moving_window_dir = -1; +Real WarpX::moving_window_v = std::numeric_limits<amrex::Real>::max(); Real WarpX::gamma_boost = 1.; Real WarpX::beta_boost = 0.; @@ -334,7 +335,19 @@ WarpX::ReadParameters () "The boosted frame diagnostic currently only works if the boost is in the z direction."); pp.get("num_snapshots_lab", num_snapshots_lab); - pp.get("dt_snapshots_lab", dt_snapshots_lab); + + // Read either dz_snapshots_lab or dt_snapshots_lab + bool snapshot_interval_is_specified = 0; + Real dz_snapshots_lab = 0; + snapshot_interval_is_specified += pp.query("dt_snapshots_lab", dt_snapshots_lab); + if ( pp.query("dz_snapshots_lab", dz_snapshots_lab) ){ + dt_snapshots_lab = dz_snapshots_lab/PhysConst::c; + snapshot_interval_is_specified = 1; + } + AMREX_ALWAYS_ASSERT_WITH_MESSAGE( + snapshot_interval_is_specified, + "When using back-transformed diagnostics, user should specify either dz_snapshots_lab or dt_snapshots_lab."); + pp.get("gamma_boost", gamma_boost); pp.query("do_boosted_frame_fields", do_boosted_frame_fields); @@ -383,6 +396,22 @@ WarpX::ReadParameters () pp.query("pml_ncell", pml_ncell); pp.query("pml_delta", pml_delta); + Vector<int> parse_do_pml_Lo(AMREX_SPACEDIM,1); + pp.queryarr("do_pml_Lo", parse_do_pml_Lo); + do_pml_Lo[0] = parse_do_pml_Lo[0]; + do_pml_Lo[1] = parse_do_pml_Lo[1]; +#if (AMREX_SPACEDIM == 3) + do_pml_Lo[2] = parse_do_pml_Lo[2]; +#endif + Vector<int> parse_do_pml_Hi(AMREX_SPACEDIM,1); + pp.queryarr("do_pml_Hi", parse_do_pml_Hi); + do_pml_Hi[0] = parse_do_pml_Hi[0]; + do_pml_Hi[1] = parse_do_pml_Hi[1]; +#if (AMREX_SPACEDIM == 3) + do_pml_Hi[2] = parse_do_pml_Hi[2]; +#endif + + pp.query("dump_openpmd", dump_openpmd); pp.query("dump_plotfiles", dump_plotfiles); pp.query("plot_raw_fields", plot_raw_fields); @@ -393,7 +422,7 @@ WarpX::ReadParameters () if (not user_fields_to_plot){ // If not specified, set default values fields_to_plot = {"Ex", "Ey", "Ez", "Bx", "By", - "Bz", "jx", "jy", "jz", + "Bz", "jx", "jy", "jz", "part_per_cell"}; } // set plot_rho to true of the users requests it, so that @@ -411,9 +440,9 @@ WarpX::ReadParameters () // If user requests to plot proc_number for a serial run, // delete proc_number from fields_to_plot if (ParallelDescriptor::NProcs() == 1){ - fields_to_plot.erase(std::remove(fields_to_plot.begin(), - fields_to_plot.end(), - "proc_number"), + fields_to_plot.erase(std::remove(fields_to_plot.begin(), + fields_to_plot.end(), + "proc_number"), fields_to_plot.end()); } @@ -497,11 +526,9 @@ WarpX::ReadParameters () { ParmParse pp("algo"); // If not in RZ mode, read use_picsar_deposition - // In RZ mode, use_picsar_deposition is on, as the C++ version + // In RZ mode, use_picsar_deposition is on, as the C++ version // of the deposition does not support RZ -#ifndef WARPX_RZ pp.query("use_picsar_deposition", use_picsar_deposition); -#endif current_deposition_algo = GetAlgorithmInteger(pp, "current_deposition"); charge_deposition_algo = GetAlgorithmInteger(pp, "charge_deposition"); field_gathering_algo = GetAlgorithmInteger(pp, "field_gathering"); @@ -876,6 +903,21 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm rho_cp[lev].reset(new MultiFab(amrex::convert(cba,IntVect::TheUnitVector()),dm,2,ngRho)); rho_cp_owner_masks[lev] = std::move(rho_cp[lev]->OwnerMask(cperiod)); } + if (fft_hybrid_mpi_decomposition == false){ + // Allocate and initialize the spectral solver + std::array<Real,3> cdx = CellSize(lev-1); + #if (AMREX_SPACEDIM == 3) + RealVect cdx_vect(cdx[0], cdx[1], cdx[2]); + #elif (AMREX_SPACEDIM == 2) + RealVect cdx_vect(cdx[0], cdx[2]); + #endif + // Get the cell-centered box, with guard cells + BoxArray realspace_ba = cba; // Copy box + realspace_ba.enclosedCells().grow(ngE); // cell-centered + guard cells + // Define spectral solver + spectral_solver_cp[lev].reset( new SpectralSolver( realspace_ba, dm, + nox_fft, noy_fft, noz_fft, do_nodal, cdx_vect, dt[lev] ) ); + } #endif } @@ -907,7 +949,7 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm current_buf[lev][0].reset( new MultiFab(amrex::convert(cba,jx_nodal_flag),dm,1,ngJ)); current_buf[lev][1].reset( new MultiFab(amrex::convert(cba,jy_nodal_flag),dm,1,ngJ)); current_buf[lev][2].reset( new MultiFab(amrex::convert(cba,jz_nodal_flag),dm,1,ngJ)); - if (do_dive_cleaning || plot_rho) { + if (rho_cp[lev]) { charge_buf[lev].reset( new MultiFab(amrex::convert(cba,IntVect::TheUnitVector()),dm,2,ngRho)); } current_buffer_masks[lev].reset( new iMultiFab(ba, dm, 1, 1) ); @@ -995,7 +1037,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp, { Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2]; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ const Real rmin = GetInstance().Geom(0).ProbLo(0); #endif @@ -1014,7 +1056,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { warpx_computedivb(i, j, k, dcomp, divBfab, Bxfab, Byfab, Bzfab, dxinv, dyinv, dzinv -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ ,rmin #endif ); @@ -1029,7 +1071,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp, { Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2]; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ const Real rmin = GetInstance().Geom(0).ProbLo(0); #endif @@ -1048,7 +1090,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { warpx_computedivb(i, j, k, dcomp, divBfab, Bxfab, Byfab, Bzfab, dxinv, dyinv, dzinv -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ ,rmin #endif ); @@ -1063,7 +1105,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp, { Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2]; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ const Real rmin = GetInstance().Geom(0).ProbLo(0); #endif @@ -1082,7 +1124,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { warpx_computedive(i, j, k, dcomp, divEfab, Exfab, Eyfab, Ezfab, dxinv, dyinv, dzinv -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ ,rmin #endif ); @@ -1097,7 +1139,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp, { Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2]; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ const Real rmin = GetInstance().Geom(0).ProbLo(0); #endif @@ -1116,7 +1158,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { warpx_computedive(i, j, k, dcomp, divEfab, Exfab, Eyfab, Ezfab, dxinv, dyinv, dzinv -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ ,rmin #endif ); |