diff options
Diffstat (limited to 'Source/Parallelization')
-rw-r--r-- | Source/Parallelization/GuardCellManager.H | 1 | ||||
-rw-r--r-- | Source/Parallelization/GuardCellManager.cpp | 8 | ||||
-rw-r--r-- | Source/Parallelization/WarpXComm.cpp | 60 | ||||
-rw-r--r-- | Source/Parallelization/WarpXComm_K.H | 202 |
4 files changed, 264 insertions, 7 deletions
diff --git a/Source/Parallelization/GuardCellManager.H b/Source/Parallelization/GuardCellManager.H index 5e9971544..d2f764fbb 100644 --- a/Source/Parallelization/GuardCellManager.H +++ b/Source/Parallelization/GuardCellManager.H @@ -47,6 +47,7 @@ public: const int maxwell_solver_id, const int max_level, const amrex::Array<amrex::Real,3> v_galilean, + const amrex::Array<amrex::Real,3> v_comoving, const bool safe_guard_cells); // Guard cells allocated for MultiFabs E and B diff --git a/Source/Parallelization/GuardCellManager.cpp b/Source/Parallelization/GuardCellManager.cpp index 1b8d38f0c..85490f2e0 100644 --- a/Source/Parallelization/GuardCellManager.cpp +++ b/Source/Parallelization/GuardCellManager.cpp @@ -28,6 +28,7 @@ guardCellManager::Init( const int maxwell_solver_id, const int max_level, const amrex::Array<amrex::Real,3> v_galilean, + const amrex::Array<amrex::Real,3> v_comoving, const bool safe_guard_cells) { // When using subcycling, the particles on the fine level perform two pushes @@ -37,10 +38,9 @@ guardCellManager::Init( int ngy_tmp = (max_level > 0 && do_subcycling == 1) ? nox+1 : nox; int ngz_tmp = (max_level > 0 && do_subcycling == 1) ? nox+1 : nox; - if ((v_galilean[0]!=0) or - (v_galilean[1]!=0) or - (v_galilean[2]!=0)){ - // Add one guard cell in the case of the galilean algorithm + // Add one guard cell in the case of the Galilean or comoving algorithms + if (v_galilean[0] != 0. || v_galilean[1] != 0. || v_galilean[2] != 0. || + v_comoving[0] != 0. || v_comoving[1] != 0. || v_comoving[2] != 0. ) { ngx_tmp += 1; ngy_tmp += 1; ngz_tmp += 1; diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index e6d4c91d4..6f408613d 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -10,6 +10,9 @@ #include "WarpX.H" #include "WarpXSumGuardCells.H" #include "Utils/CoarsenMR.H" +#ifdef WARPX_USE_PSATD +#include "FieldSolver/SpectralSolver/SpectralKSpace.H" +#endif #include <algorithm> #include <cstdlib> @@ -70,6 +73,42 @@ WarpX::UpdateAuxilaryData () void WarpX::UpdateAuxilaryDataStagToNodal () { +#ifdef WARPX_USE_PSATD + const int fg_nox = WarpX::field_gathering_nox; + const int fg_noy = WarpX::field_gathering_noy; + const int fg_noz = WarpX::field_gathering_noz; + + // Compute real-space stencil coefficients along x + amrex::Vector<Real> h_stencil_coef_x = getFornbergStencilCoefficients(fg_nox, false); + amrex::Gpu::DeviceVector<Real> d_stencil_coef_x(h_stencil_coef_x.size()); + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, + h_stencil_coef_x.begin(), + h_stencil_coef_x.end(), + d_stencil_coef_x.begin()); + amrex::Gpu::synchronize(); + amrex::Real const* p_stencil_coef_x = d_stencil_coef_x.data(); + + // Compute real-space stencil coefficients along y + amrex::Vector<Real> h_stencil_coef_y = getFornbergStencilCoefficients(fg_noy, false); + amrex::Gpu::DeviceVector<Real> d_stencil_coef_y(h_stencil_coef_y.size()); + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, + h_stencil_coef_y.begin(), + h_stencil_coef_y.end(), + d_stencil_coef_y.begin()); + amrex::Gpu::synchronize(); + amrex::Real const* p_stencil_coef_y = d_stencil_coef_y.data(); + + // Compute real-space stencil coefficients along z + amrex::Vector<Real> h_stencil_coef_z = getFornbergStencilCoefficients(fg_noz, false); + amrex::Gpu::DeviceVector<Real> d_stencil_coef_z(h_stencil_coef_z.size()); + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, + h_stencil_coef_z.begin(), + h_stencil_coef_z.end(), + d_stencil_coef_z.begin()); + amrex::Gpu::synchronize(); + amrex::Real const* p_stencil_coef_z = d_stencil_coef_z.data(); +#endif + // For level 0, we only need to do the average. #ifdef _OPENMP #pragma omp parallel if (Gpu::notInLaunchRegion()) @@ -90,19 +129,34 @@ WarpX::UpdateAuxilaryDataStagToNodal () Array4<Real const> const& ey_fp = Efield_fp[0][1]->const_array(mfi); Array4<Real const> const& ez_fp = Efield_fp[0][2]->const_array(mfi); - const Box& bx = mfi.fabbox(); - amrex::ParallelFor(bx, - [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + Box bx = mfi.validbox(); + // TODO It seems like it is necessary to loop over the valid box grown + // with 2 guard cells. Should this number of guard cells be expressed + // in terms of the parameters defined in the guardCellManager class? + bx.grow(2); + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept { +// FDTD variant +#ifndef WARPX_USE_PSATD warpx_interp_nd_bfield_x(j,k,l, bx_aux, bx_fp); warpx_interp_nd_bfield_y(j,k,l, by_aux, by_fp); warpx_interp_nd_bfield_z(j,k,l, bz_aux, bz_fp); warpx_interp_nd_efield_x(j,k,l, ex_aux, ex_fp); warpx_interp_nd_efield_y(j,k,l, ey_aux, ey_fp); warpx_interp_nd_efield_z(j,k,l, ez_aux, ez_fp); +// PSATD variant +#else + warpx_interp_nd_bfield_x(j,k,l, bx_aux, bx_fp, fg_noy, fg_noz, p_stencil_coef_y, p_stencil_coef_z); + warpx_interp_nd_bfield_y(j,k,l, by_aux, by_fp, fg_nox, fg_noz, p_stencil_coef_x, p_stencil_coef_z); + warpx_interp_nd_bfield_z(j,k,l, bz_aux, bz_fp, fg_nox, fg_noy, p_stencil_coef_x, p_stencil_coef_y); + warpx_interp_nd_efield_x(j,k,l, ex_aux, ex_fp, fg_nox, p_stencil_coef_x); + warpx_interp_nd_efield_y(j,k,l, ey_aux, ey_fp, fg_noy, p_stencil_coef_y); + warpx_interp_nd_efield_z(j,k,l, ez_aux, ez_fp, fg_noz, p_stencil_coef_z); +#endif }); } + // NOTE: high-order interpolation is not implemented for mesh refinement for (int lev = 1; lev <= finest_level; ++lev) { BoxArray const& nba = Bfield_aux[lev][0]->boxArray(); diff --git a/Source/Parallelization/WarpXComm_K.H b/Source/Parallelization/WarpXComm_K.H index b490636ee..7a8095f18 100644 --- a/Source/Parallelization/WarpXComm_K.H +++ b/Source/Parallelization/WarpXComm_K.H @@ -378,6 +378,10 @@ void warpx_interp_nd_bfield_z (int j, int k, int l, Bza(j,k,l) = bg + (bf-bc); } +// With the FDTD Maxwell solver, this is the linear interpolation function used to +// interpolate the Bx field from a staggered grid to a nodal grid, before gathering +// the field from the nodes to the particle positions (momentum-conserving gathering) +#ifndef WARPX_USE_PSATD AMREX_GPU_DEVICE AMREX_FORCE_INLINE void warpx_interp_nd_bfield_x (int j, int k, int l, amrex::Array4<amrex::Real> const& Bxa, @@ -391,6 +395,45 @@ void warpx_interp_nd_bfield_x (int j, int k, int l, #endif } +// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to +// interpolate the Bx field from a staggered grid to a nodal grid, before gathering +// the field from the nodes to the particle positions (momentum-conserving gathering) +#else +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_bfield_x (int j, int k, int l, + amrex::Array4<amrex::Real> const& Bxa, + amrex::Array4<amrex::Real const> const& Bxf, + const int noy, + const int noz, + amrex::Real const* stencil_coef_y, + amrex::Real const* stencil_coef_z) +{ +#if (AMREX_SPACEDIM == 2) + amrex::ignore_unused(noy); + amrex::ignore_unused(stencil_coef_y); + amrex::Real res = 0.; + for (int nz = 1; nz < noz/2+1; nz++) { + res += 0.5 * stencil_coef_z[nz-1] * (Bxf(j, k + nz - 1, l) + Bxf(j, k - nz, l)); + } + Bxa(j,k,l) = res; +#else + amrex::Real res = 0.; + for (int nz = 1; nz < noz/2+1; nz++) { + for (int ny = 1; ny < noy/2+1; ny++) { + res += 0.25 * stencil_coef_y[ny-1] * stencil_coef_z[nz-1] * + (Bxf(j, k + ny - 1, l + nz - 1) + Bxf(j, k - ny, l + nz - 1) + + Bxf(j, k + ny - 1, l - nz ) + Bxf(j, k - ny, l - nz )); + } + } + Bxa(j,k,l) = res; +#endif +} +#endif + +// With the FDTD Maxwell solver, this is the linear interpolation function used to +// interpolate the By field from a staggered grid to a nodal grid, before gathering +// the field from the nodes to the particle positions (momentum-conserving gathering) +#ifndef WARPX_USE_PSATD AMREX_GPU_DEVICE AMREX_FORCE_INLINE void warpx_interp_nd_bfield_y (int j, int k, int l, amrex::Array4<amrex::Real> const& Bya, @@ -404,6 +447,47 @@ void warpx_interp_nd_bfield_y (int j, int k, int l, #endif } +// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to +// interpolate the By field from a staggered grid to a nodal grid, before gathering +// the field from the nodes to the particle positions (momentum-conserving gathering) +#else +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_bfield_y (int j, int k, int l, + amrex::Array4<amrex::Real> const& Bya, + amrex::Array4<amrex::Real const> const& Byf, + const int nox, + const int noz, + amrex::Real const* stencil_coef_x, + amrex::Real const* stencil_coef_z) +{ +#if (AMREX_SPACEDIM == 2) + amrex::Real res = 0.; + for (int nz = 1; nz < noz/2+1; nz++) { + for (int nx = 1; nx < nox/2+1; nx++) { + res += 0.25 * stencil_coef_x[nx-1] * stencil_coef_z[nz-1] * + (Byf(j + nx - 1, k + nz - 1, l) + Byf(j - nx, k + nz - 1, l) + + Byf(j + nx - 1, k - nz , l) + Byf(j - nx, k - nz , l)); + } + } + Bya(j,k,l) = res; +#else + amrex::Real res = 0.; + for (int nz = 1; nz < noz/2+1; nz++) { + for (int nx = 1; nx < nox/2+1; nx++) { + res += 0.25 * stencil_coef_x[nx-1] * stencil_coef_z[nz-1] * + (Byf(j + nx - 1, k, l + nz - 1) + Byf(j - nx, k, l + nz - 1) + + Byf(j + nx - 1, k, l - nz ) + Byf(j - nx, k, l - nz )); + } + } + Bya(j,k,l) = res; +#endif +} +#endif + +// With the FDTD Maxwell solver, this is the linear interpolation function used to +// interpolate the Bz field from a staggered grid to a nodal grid, before gathering +// the field from the nodes to the particle positions (momentum-conserving gathering) +#ifndef WARPX_USE_PSATD AMREX_GPU_DEVICE AMREX_FORCE_INLINE void warpx_interp_nd_bfield_z (int j, int k, int l, amrex::Array4<amrex::Real> const& Bza, @@ -417,6 +501,41 @@ void warpx_interp_nd_bfield_z (int j, int k, int l, #endif } +// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to +// interpolate the Bz field from a staggered grid to a nodal grid, before gathering +// the field from the nodes to the particle positions (momentum-conserving gathering) +#else +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_bfield_z (int j, int k, int l, + amrex::Array4<amrex::Real> const& Bza, + amrex::Array4<amrex::Real const> const& Bzf, + const int nox, + const int noy, + amrex::Real const* stencil_coef_x, + amrex::Real const* stencil_coef_y) +{ +#if (AMREX_SPACEDIM == 2) + amrex::ignore_unused(noy); + amrex::ignore_unused(stencil_coef_y); + amrex::Real res = 0.; + for (int nx = 1; nx < nox/2+1; nx++) { + res += 0.5 * stencil_coef_x[nx-1] * (Bzf(j + nx - 1, k, l) + Bzf(j - nx, k, l)); + } + Bza(j,k,l) = res; +#else + amrex::Real res = 0.; + for (int ny = 1; ny < noy/2+1; ny++) { + for (int nx = 1; nx < nox/2+1; nx++) { + res += 0.25 * stencil_coef_x[nx-1] * stencil_coef_y[ny-1] * + (Bzf(j + nx - 1, k + ny - 1, l) + Bzf(j - nx, k + ny - 1, l) + + Bzf(j + nx - 1, k - ny , l) + Bzf(j - nx, k - ny , l)); + } + } + Bza(j,k,l) = res; +#endif +} +#endif + AMREX_GPU_DEVICE AMREX_FORCE_INLINE void warpx_interp_nd_efield_x (int j, int k, int l, amrex::Array4<amrex::Real> const& Exa, @@ -621,6 +740,10 @@ void warpx_interp_nd_efield_z (int j, int k, int l, Eza(j,k,l) = eg + (ef-ec); } +// With the FDTD Maxwell solver, this is the linear interpolation function used to +// interpolate the Ex field from a staggered grid to a nodal grid, before gathering +// the field from the nodes to the particle positions (momentum-conserving gathering) +#ifndef WARPX_USE_PSATD AMREX_GPU_DEVICE AMREX_FORCE_INLINE void warpx_interp_nd_efield_x (int j, int k, int l, amrex::Array4<amrex::Real> const& Exa, @@ -629,6 +752,29 @@ void warpx_interp_nd_efield_x (int j, int k, int l, Exa(j,k,l) = 0.5*(Exf(j-1,k,l) + Exf(j,k,l)); } +// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to +// interpolate the Ex field from a staggered grid to a nodal grid, before gathering +// the field from the nodes to the particle positions (momentum-conserving gathering) +#else +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_efield_x (int j, int k, int l, + amrex::Array4<amrex::Real> const& Exa, + amrex::Array4<amrex::Real const> const& Exf, + const int nox, + amrex::Real const* stencil_coef_x) +{ + amrex::Real res = 0.; + for (int nx = 1; nx < nox/2+1; nx++) { + res += 0.5 * stencil_coef_x[nx-1] * (Exf(j + nx - 1, k, l) + Exf(j - nx, k, l)); + } + Exa(j,k,l) = res; +} +#endif + +// With the FDTD Maxwell solver, this is the linear interpolation function used to +// interpolate the Ey field from a staggered grid to a nodal grid, before gathering +// the field from the nodes to the particle positions (momentum-conserving gathering) +#ifndef WARPX_USE_PSATD AMREX_GPU_DEVICE AMREX_FORCE_INLINE void warpx_interp_nd_efield_y (int j, int k, int l, amrex::Array4<amrex::Real> const& Eya, @@ -642,6 +788,35 @@ void warpx_interp_nd_efield_y (int j, int k, int l, #endif } +// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to +// interpolate the Ey field from a staggered grid to a nodal grid, before gathering +// the field from the nodes to the particle positions (momentum-conserving gathering) +#else +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_efield_y (int j, int k, int l, + amrex::Array4<amrex::Real> const& Eya, + amrex::Array4<amrex::Real const> const& Eyf, + const int noy, + amrex::Real const* stencil_coef_y) +{ +#if (AMREX_SPACEDIM == 2) + amrex::ignore_unused(noy); + amrex::ignore_unused(stencil_coef_y); + Eya(j,k,l) = Eyf(j,k,l); +#else + amrex::Real res = 0.; + for (int ny = 1; ny < noy/2+1; ny++) { + res += 0.5 * stencil_coef_y[ny-1] * (Eyf(j, k + ny - 1, l) + Eyf(j, k - ny, l)); + } + Eya(j,k,l) = res; +#endif +} +#endif + +// With the FDTD Maxwell solver, this is the linear interpolation function used to +// interpolate the Ez field from a staggered grid to a nodal grid, before gathering +// the field from the nodes to the particle positions (momentum-conserving gathering) +#ifndef WARPX_USE_PSATD AMREX_GPU_DEVICE AMREX_FORCE_INLINE void warpx_interp_nd_efield_z (int j, int k, int l, amrex::Array4<amrex::Real> const& Eza, @@ -655,4 +830,31 @@ void warpx_interp_nd_efield_z (int j, int k, int l, #endif } +// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to +// interpolate the Ez field from a staggered grid to a nodal grid, before gathering +// the field from the nodes to the particle positions (momentum-conserving gathering) +#else +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_nd_efield_z (int j, int k, int l, + amrex::Array4<amrex::Real> const& Eza, + amrex::Array4<amrex::Real const> const& Ezf, + const int noz, + amrex::Real const* stencil_coef_z) +{ +#if (AMREX_SPACEDIM == 2) + amrex::Real res = 0.; + for (int nz = 1; nz < noz/2+1; nz++) { + res += 0.5 * stencil_coef_z[nz-1] * (Ezf(j, k + nz - 1, l) + Ezf(j, k - nz, l)); + } + Eza(j,k,l) = res; +#else + amrex::Real res = 0.; + for (int nz = 1; nz < noz/2+1; nz++) { + res += 0.5 * stencil_coef_z[nz-1] * (Ezf(j, k, l + nz - 1) + Ezf(j, k, l - nz)); + } + Eza(j,k,l) = res; +#endif +} +#endif + #endif |