diff options
Diffstat (limited to '')
-rw-r--r-- | Source/Parallelization/WarpXComm.cpp | 85 | ||||
-rw-r--r-- | Source/Parallelization/WarpXComm_K.H | 414 | ||||
-rw-r--r-- | Source/WarpX.H | 10 | ||||
-rw-r--r-- | Source/WarpX.cpp | 29 |
4 files changed, 235 insertions, 303 deletions
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 4f7a75f3f..cc7f4b7cc 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -41,43 +41,16 @@ WarpX::UpdateAuxilaryDataStagToNodal () "WarpX::UpdateAuxilaryDataStagToNodal: PSATD solver requires " "WarpX build with spectral solver support."); } -#else - amrex::Gpu::DeviceVector<Real> d_stencil_coef_x; - amrex::Gpu::DeviceVector<Real> d_stencil_coef_y; - amrex::Gpu::DeviceVector<Real> d_stencil_coef_z; - if (maxwell_solver_id == MaxwellSolverAlgo::PSATD) { - const int fg_nox = WarpX::field_gathering_nox; - const int fg_noy = WarpX::field_gathering_noy; - const int fg_noz = WarpX::field_gathering_noz; - - // Compute real-space stencil coefficients along x - amrex::Vector<Real> h_stencil_coef_x = getFornbergStencilCoefficients(fg_nox, false); - d_stencil_coef_x.resize(h_stencil_coef_x.size()); - amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, - h_stencil_coef_x.begin(), - h_stencil_coef_x.end(), - d_stencil_coef_x.begin()); - - // Compute real-space stencil coefficients along y - amrex::Vector<Real> h_stencil_coef_y = getFornbergStencilCoefficients(fg_noy, false); - d_stencil_coef_y.resize(h_stencil_coef_y.size()); - amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, - h_stencil_coef_y.begin(), - h_stencil_coef_y.end(), - d_stencil_coef_y.begin()); - - // Compute real-space stencil coefficients along z - amrex::Vector<Real> h_stencil_coef_z = getFornbergStencilCoefficients(fg_noz, false); - d_stencil_coef_z.resize(h_stencil_coef_z.size()); - amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, - h_stencil_coef_z.begin(), - h_stencil_coef_z.end(), - d_stencil_coef_z.begin()); - - amrex::Gpu::synchronize(); - } #endif + const amrex::IntVect& Bx_stag = Bfield_fp[0][0]->ixType().toIntVect(); + const amrex::IntVect& By_stag = Bfield_fp[0][1]->ixType().toIntVect(); + const amrex::IntVect& Bz_stag = Bfield_fp[0][2]->ixType().toIntVect(); + + const amrex::IntVect& Ex_stag = Efield_fp[0][0]->ixType().toIntVect(); + const amrex::IntVect& Ey_stag = Efield_fp[0][1]->ixType().toIntVect(); + const amrex::IntVect& Ez_stag = Efield_fp[0][2]->ixType().toIntVect(); + // For level 0, we only need to do the average. #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) @@ -109,28 +82,40 @@ WarpX::UpdateAuxilaryDataStagToNodal () const int fg_nox = WarpX::field_gathering_nox; const int fg_noy = WarpX::field_gathering_noy; const int fg_noz = WarpX::field_gathering_noz; - amrex::Real const * r_stencil_coef_x = d_stencil_coef_x.data(); - amrex::Real const * r_stencil_coef_y = d_stencil_coef_y.data(); - amrex::Real const * r_stencil_coef_z = d_stencil_coef_z.data(); + // Device vectors for Fornberg stencil coefficients used for finite-order centering + amrex::Real const * stencil_coeffs_x = WarpX::device_centering_stencil_coeffs_x.data(); + amrex::Real const * stencil_coeffs_y = WarpX::device_centering_stencil_coeffs_y.data(); + amrex::Real const * stencil_coeffs_z = WarpX::device_centering_stencil_coeffs_z.data(); amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept { - warpx_interp_nd_bfield_x(j,k,l, bx_aux, bx_fp, fg_noy, fg_noz, r_stencil_coef_y, r_stencil_coef_z); - warpx_interp_nd_bfield_y(j,k,l, by_aux, by_fp, fg_nox, fg_noz, r_stencil_coef_x, r_stencil_coef_z); - warpx_interp_nd_bfield_z(j,k,l, bz_aux, bz_fp, fg_nox, fg_noy, r_stencil_coef_x, r_stencil_coef_y); - warpx_interp_nd_efield_x(j,k,l, ex_aux, ex_fp, fg_nox, r_stencil_coef_x); - warpx_interp_nd_efield_y(j,k,l, ey_aux, ey_fp, fg_noy, r_stencil_coef_y); - warpx_interp_nd_efield_z(j,k,l, ez_aux, ez_fp, fg_noz, r_stencil_coef_z); + warpx_interp(j, k, l, bx_aux, bx_fp, Bx_stag, fg_nox, fg_noy, fg_noz, + stencil_coeffs_x, stencil_coeffs_y, stencil_coeffs_z); + + warpx_interp(j, k, l, by_aux, by_fp, By_stag, fg_nox, fg_noy, fg_noz, + stencil_coeffs_x, stencil_coeffs_y, stencil_coeffs_z); + + warpx_interp(j, k, l, bz_aux, bz_fp, Bz_stag, fg_nox, fg_noy, fg_noz, + stencil_coeffs_x, stencil_coeffs_y, stencil_coeffs_z); + + warpx_interp(j, k, l, ex_aux, ex_fp, Ex_stag, fg_nox, fg_noy, fg_noz, + stencil_coeffs_x, stencil_coeffs_y, stencil_coeffs_z); + + warpx_interp(j, k, l, ey_aux, ey_fp, Ey_stag, fg_nox, fg_noy, fg_noz, + stencil_coeffs_x, stencil_coeffs_y, stencil_coeffs_z); + + warpx_interp(j, k, l, ez_aux, ez_fp, Ez_stag, fg_nox, fg_noy, fg_noz, + stencil_coeffs_x, stencil_coeffs_y, stencil_coeffs_z); }); #endif } else { // FDTD amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept { - warpx_interp_nd_bfield_x(j,k,l, bx_aux, bx_fp); - warpx_interp_nd_bfield_y(j,k,l, by_aux, by_fp); - warpx_interp_nd_bfield_z(j,k,l, bz_aux, bz_fp); - warpx_interp_nd_efield_x(j,k,l, ex_aux, ex_fp); - warpx_interp_nd_efield_y(j,k,l, ey_aux, ey_fp); - warpx_interp_nd_efield_z(j,k,l, ez_aux, ez_fp); + warpx_interp(j, k, l, bx_aux, bx_fp, Bx_stag); + warpx_interp(j, k, l, by_aux, by_fp, By_stag); + warpx_interp(j, k, l, bz_aux, bz_fp, Bz_stag); + warpx_interp(j, k, l, ex_aux, ex_fp, Ex_stag); + warpx_interp(j, k, l, ey_aux, ey_fp, Ey_stag); + warpx_interp(j, k, l, ez_aux, ez_fp, Ez_stag); }); } } diff --git a/Source/Parallelization/WarpXComm_K.H b/Source/Parallelization/WarpXComm_K.H index 092a13565..0f183e422 100644 --- a/Source/Parallelization/WarpXComm_K.H +++ b/Source/Parallelization/WarpXComm_K.H @@ -277,161 +277,6 @@ void warpx_interp_nd_bfield_z (int j, int k, int l, Bza(j,k,l) = bg + (bf-bc); } -// With the FDTD Maxwell solver, this is the linear interpolation function used to -// interpolate the Bx field from a staggered grid to a nodal grid, before gathering -// the field from the nodes to the particle positions (momentum-conserving gathering) -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_bfield_x (int j, int k, int l, - amrex::Array4<amrex::Real> const& Bxa, - amrex::Array4<amrex::Real const> const& Bxf) -{ - using namespace amrex; -#if (AMREX_SPACEDIM == 2) - Bxa(j,k,0) = 0.5_rt*(Bxf(j,k-1,0) + Bxf(j,k,0)); - amrex::ignore_unused(l); -#else - Bxa(j,k,l) = 0.25_rt*(Bxf(j,k-1,l-1) + Bxf(j,k,l-1) + Bxf(j,k-1,l) + Bxf(j,k,l)); -#endif -} - -// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to -// interpolate the Bx field from a staggered grid to a nodal grid, before gathering -// the field from the nodes to the particle positions (momentum-conserving gathering) -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_bfield_x (int j, int k, int l, - amrex::Array4<amrex::Real> const& Bxa, - amrex::Array4<amrex::Real const> const& Bxf, - const int noy, - const int noz, - amrex::Real const* stencil_coef_y, - amrex::Real const* stencil_coef_z) -{ - using namespace amrex; -#if (AMREX_SPACEDIM == 2) - amrex::ignore_unused(noy); - amrex::ignore_unused(stencil_coef_y); - amrex::Real res = 0.; - for (int nz = 1; nz < noz/2+1; nz++) { - res += 0.5_rt * stencil_coef_z[nz-1] * (Bxf(j, k + nz - 1, l) + Bxf(j, k - nz, l)); - } - Bxa(j,k,l) = res; -#else - amrex::Real res = 0.; - for (int nz = 1; nz < noz/2+1; nz++) { - for (int ny = 1; ny < noy/2+1; ny++) { - res += 0.25_rt * stencil_coef_y[ny-1] * stencil_coef_z[nz-1] * - (Bxf(j, k + ny - 1, l + nz - 1) + Bxf(j, k - ny, l + nz - 1) - + Bxf(j, k + ny - 1, l - nz ) + Bxf(j, k - ny, l - nz )); - } - } - Bxa(j,k,l) = res; -#endif -} - -// With the FDTD Maxwell solver, this is the linear interpolation function used to -// interpolate the By field from a staggered grid to a nodal grid, before gathering -// the field from the nodes to the particle positions (momentum-conserving gathering) -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_bfield_y (int j, int k, int l, - amrex::Array4<amrex::Real> const& Bya, - amrex::Array4<amrex::Real const> const& Byf) -{ - using namespace amrex; -#if (AMREX_SPACEDIM == 2) - Bya(j,k,0) = 0.25_rt*(Byf(j,k,0) + Byf(j-1,k,0) + Byf(j,k-1,0) + Byf(j-1,k-1,0)); - amrex::ignore_unused(l); -#else - Bya(j,k,l) = 0.25_rt*(Byf(j-1,k,l-1) + Byf(j,k,l-1) + Byf(j-1,k,l) + Byf(j,k,l)); -#endif -} - -// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to -// interpolate the By field from a staggered grid to a nodal grid, before gathering -// the field from the nodes to the particle positions (momentum-conserving gathering) -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_bfield_y (int j, int k, int l, - amrex::Array4<amrex::Real> const& Bya, - amrex::Array4<amrex::Real const> const& Byf, - const int nox, - const int noz, - amrex::Real const* stencil_coef_x, - amrex::Real const* stencil_coef_z) -{ - using namespace amrex; -#if (AMREX_SPACEDIM == 2) - amrex::Real res = 0.; - for (int nz = 1; nz < noz/2+1; nz++) { - for (int nx = 1; nx < nox/2+1; nx++) { - res += 0.25_rt * stencil_coef_x[nx-1] * stencil_coef_z[nz-1] * - (Byf(j + nx - 1, k + nz - 1, l) + Byf(j - nx, k + nz - 1, l) - + Byf(j + nx - 1, k - nz , l) + Byf(j - nx, k - nz , l)); - } - } - Bya(j,k,l) = res; -#else - amrex::Real res = 0.; - for (int nz = 1; nz < noz/2+1; nz++) { - for (int nx = 1; nx < nox/2+1; nx++) { - res += 0.25_rt * stencil_coef_x[nx-1] * stencil_coef_z[nz-1] * - (Byf(j + nx - 1, k, l + nz - 1) + Byf(j - nx, k, l + nz - 1) - + Byf(j + nx - 1, k, l - nz ) + Byf(j - nx, k, l - nz )); - } - } - Bya(j,k,l) = res; -#endif -} - -// With the FDTD Maxwell solver, this is the linear interpolation function used to -// interpolate the Bz field from a staggered grid to a nodal grid, before gathering -// the field from the nodes to the particle positions (momentum-conserving gathering) -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_bfield_z (int j, int k, int l, - amrex::Array4<amrex::Real> const& Bza, - amrex::Array4<amrex::Real const> const& Bzf) -{ - using namespace amrex; -#if (AMREX_SPACEDIM == 2) - Bza(j,k,0) = 0.5_rt*(Bzf(j-1,k,0) + Bzf(j,k,0)); - amrex::ignore_unused(l); -#else - Bza(j,k,l) = 0.25_rt*(Bzf(j-1,k-1,l) + Bzf(j,k-1,l) + Bzf(j-1,k,l) + Bzf(j,k,l)); -#endif -} - -// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to -// interpolate the Bz field from a staggered grid to a nodal grid, before gathering -// the field from the nodes to the particle positions (momentum-conserving gathering) -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_bfield_z (int j, int k, int l, - amrex::Array4<amrex::Real> const& Bza, - amrex::Array4<amrex::Real const> const& Bzf, - const int nox, - const int noy, - amrex::Real const* stencil_coef_x, - amrex::Real const* stencil_coef_y) -{ - using namespace amrex; -#if (AMREX_SPACEDIM == 2) - amrex::ignore_unused(noy); - amrex::ignore_unused(stencil_coef_y); - amrex::Real res = 0.; - for (int nx = 1; nx < nox/2+1; nx++) { - res += 0.5_rt * stencil_coef_x[nx-1] * (Bzf(j + nx - 1, k, l) + Bzf(j - nx, k, l)); - } - Bza(j,k,l) = res; -#else - amrex::Real res = 0.; - for (int ny = 1; ny < noy/2+1; ny++) { - for (int nx = 1; nx < nox/2+1; nx++) { - res += 0.25_rt * stencil_coef_x[nx-1] * stencil_coef_y[ny-1] * - (Bzf(j + nx - 1, k + ny - 1, l) + Bzf(j - nx, k + ny - 1, l) - + Bzf(j + nx - 1, k - ny , l) + Bzf(j - nx, k - ny , l)); - } - } - Bza(j,k,l) = res; -#endif -} - AMREX_GPU_DEVICE AMREX_FORCE_INLINE void warpx_interp_nd_efield_x (int j, int k, int l, amrex::Array4<amrex::Real> const& Exa, @@ -641,118 +486,181 @@ void warpx_interp_nd_efield_z (int j, int k, int l, Eza(j,k,l) = eg + (ef-ec); } -// With the FDTD Maxwell solver, this is the linear interpolation function used to -// interpolate the Ex field from a staggered grid to a nodal grid, before gathering -// the field from the nodes to the particle positions (momentum-conserving gathering) -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_efield_x (int j, int k, int l, - amrex::Array4<amrex::Real> const& Exa, - amrex::Array4<amrex::Real const> const& Exf) -{ - using namespace amrex; - Exa(j,k,l) = 0.5_rt*(Exf(j-1,k,l) + Exf(j,k,l)); -} - -// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to -// interpolate the Ex field from a staggered grid to a nodal grid, before gathering -// the field from the nodes to the particle positions (momentum-conserving gathering) +/** + * \brief Arbitrary-order interpolation function used to interpolate a given field from the Yee grid + * to the nodal grid, before gathering the field from the nodes to the particle positions + * (momentum-conserving field gathering). With the FDTD solver, this performs simple linear interpolation. + * With the PSATD solver, this performs arbitrary-order interpolation based on the Fornberg coefficients. + * The result is stored in the output array \c arr_aux, allocated on the \c aux patch. + * + * \param[in] j index along x of the output array + * \param[in] k index along y (in 3D) or z (in 2D) of the output array + * \param[in] l index along z (in 3D, \c l = 0 in 2D) of the output array + * \param[in,out] arr_aux output array allocated on the \c aux patch where interpolated values are stored + * \param[in] arr_fine input array allocated on the fine patch + * \param[in] arr_stag \c IndexType of the input array (Yee staggering) + * \param[in] nox order of finite-order interpolation along x + * \param[in] noy order of finite-order interpolation along y + * \param[in] noz order of finite-order interpolation along z + * \param[in] stencil_coeffs_x array of Fornberg coefficients for finite-order interpolation stencil along x + * \param[in] stencil_coeffs_y array of Fornberg coefficients for finite-order interpolation stencil along y + * \param[in] stencil_coeffs_z array of Fornberg coefficients for finite-order interpolation stencil along z + */ AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_efield_x (int j, int k, int l, - amrex::Array4<amrex::Real> const& Exa, - amrex::Array4<amrex::Real const> const& Exf, - const int nox, - amrex::Real const* stencil_coef_x) +void warpx_interp (const int j, + const int k, + const int l, + amrex::Array4<amrex::Real > const& arr_aux, + amrex::Array4<amrex::Real const> const& arr_fine, + const amrex::IntVect& arr_stag, + const int nox = 2, + const int noy = 2, + const int noz = 2, + amrex::Real const* stencil_coeffs_x = nullptr, + amrex::Real const* stencil_coeffs_y = nullptr, + amrex::Real const* stencil_coeffs_z = nullptr) { using namespace amrex; - amrex::Real res = 0.; - for (int nx = 1; nx < nox/2+1; nx++) { - res += 0.5_rt * stencil_coef_x[nx-1] * (Exf(j + nx - 1, k, l) + Exf(j - nx, k, l)); - } - Exa(j,k,l) = res; -} -// With the FDTD Maxwell solver, this is the linear interpolation function used to -// interpolate the Ey field from a staggered grid to a nodal grid, before gathering -// the field from the nodes to the particle positions (momentum-conserving gathering) -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_efield_y (int j, int k, int l, - amrex::Array4<amrex::Real> const& Eya, - amrex::Array4<amrex::Real const> const& Eyf) -{ - using namespace amrex; + // Avoid compiler warnings #if (AMREX_SPACEDIM == 2) - Eya(j,k,0) = Eyf(j,k,0); - amrex::ignore_unused(l); -#else - Eya(j,k,l) = 0.5_rt*(Eyf(j,k-1,l) + Eyf(j,k,l)); + amrex::ignore_unused(noy, stencil_coeffs_y); #endif -} -// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to -// interpolate the Ey field from a staggered grid to a nodal grid, before gathering -// the field from the nodes to the particle positions (momentum-conserving gathering) -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_efield_y (int j, int k, int l, - amrex::Array4<amrex::Real> const& Eya, - amrex::Array4<amrex::Real const> const& Eyf, - const int noy, - amrex::Real const* stencil_coef_y) -{ - using namespace amrex; -#if (AMREX_SPACEDIM == 2) - amrex::ignore_unused(noy); - amrex::ignore_unused(stencil_coef_y); - Eya(j,k,l) = Eyf(j,k,l); -#else - amrex::Real res = 0._rt; - for (int ny = 1; ny < noy/2+1; ny++) { - res += 0.5_rt * stencil_coef_y[ny-1] * (Eyf(j, k + ny - 1, l) + Eyf(j, k - ny, l)); - } - Eya(j,k,l) = res; + // Avoid compiler warnings +#ifndef WARPX_USE_PSATD + amrex::ignore_unused(stencil_coeffs_x, stencil_coeffs_y, stencil_coeffs_z); #endif -} -// With the FDTD Maxwell solver, this is the linear interpolation function used to -// interpolate the Ez field from a staggered grid to a nodal grid, before gathering -// the field from the nodes to the particle positions (momentum-conserving gathering) -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_efield_z (int j, int k, int l, - amrex::Array4<amrex::Real> const& Eza, - amrex::Array4<amrex::Real const> const& Ezf) -{ - using namespace amrex; -#if (AMREX_SPACEDIM == 2) - Eza(j,k,0) = 0.5_rt*(Ezf(j,k-1,0) + Ezf(j,k,0)); - amrex::ignore_unused(l); -#else - Eza(j,k,l) = 0.5_rt*(Ezf(j,k,l-1) + Ezf(j,k,l)); + // Staggering (s = 0 if cell-centered, s = 1 if nodal) + const int sj = arr_stag[0]; + const int sk = arr_stag[1]; +#if (AMREX_SPACEDIM == 3) + const int sl = arr_stag[2]; #endif -} -// With the PSATD Maxwell solver, this is the arbitrary-order interpolation function used to -// interpolate the Ez field from a staggered grid to a nodal grid, before gathering -// the field from the nodes to the particle positions (momentum-conserving gathering) -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_efield_z (int j, int k, int l, - amrex::Array4<amrex::Real> const& Eza, - amrex::Array4<amrex::Real const> const& Ezf, - const int noz, - amrex::Real const* stencil_coef_z) -{ - using namespace amrex; -#if (AMREX_SPACEDIM == 2) - amrex::Real res = 0._rt; - for (int nz = 1; nz < noz/2+1; nz++) { - res += 0.5_rt * stencil_coef_z[nz-1] * (Ezf(j, k + nz - 1, l) + Ezf(j, k - nz, l)); + // Number of points (+1) used for interpolation from the staggered grid to the nodal grid + // (if the input field is nodal along a given direction, then s = 1, the variable n = 0 + // is actually never used, and there is no interpolation, only a copy of one single value) + const int nj = (sj == 0) ? nox/2 + 1 : 0; +#if (AMREX_SPACEDIM == 2) + const int nk = (sk == 0) ? noz/2 + 1 : 0; +#elif (AMREX_SPACEDIM == 3) + const int nk = (sk == 0) ? noy/2 + 1 : 0; + const int nl = (sl == 0) ? noz/2 + 1 : 0; +#endif + + // Additional normalization factor + const amrex::Real wj = (sj == 0) ? 0.5_rt : 1.0_rt; + const amrex::Real wk = (sk == 0) ? 0.5_rt : 1.0_rt; +#if (AMREX_SPACEDIM == 2) + constexpr amrex::Real wl = 1.0_rt; +#elif (AMREX_SPACEDIM == 3) + const amrex::Real wl = (sl == 0) ? 0.5_rt : 1.0_rt; +#endif + + // Auxiliary variables for stencil coefficients +#ifdef WARPX_USE_PSATD + amrex::Real cj = 1.0_rt; + amrex::Real ck = 1.0_rt; + amrex::Real cl = 1.0_rt; + int dj, dk; +#if (AMREX_SPACEDIM == 3) + int dl; +#endif +#endif + + // Min and max for interpolation loop along j + const int jmin = (sj == 0) ? j - nj + 1 : j; + const int jmax = (sj == 0) ? j + nj - 2 : j; + + // Min and max for interpolation loop along k + const int kmin = (sk == 0) ? k - nk + 1 : k; + const int kmax = (sk == 0) ? k + nk - 2 : k; + + // Min and max for interpolation loop along l +#if (AMREX_SPACEDIM == 2) + // l = 0 always + const int lmin = l; + const int lmax = l; +#elif (AMREX_SPACEDIM == 3) + const int lmin = (sl == 0) ? l - nl + 1 : l; + const int lmax = (sl == 0) ? l + nl - 2 : l; +#endif + + amrex::Real res = 0.0_rt; + +#ifndef WARPX_USE_PSATD // FDTD (linear interpolation) + + // Example of 1D interpolation from nodal grid to nodal grid: + // + // j + // --o-----o-----o-- result(j) = f(j) + // --o-----o-----o-- + // j-1 j j+1 + // + // Example of 1D interpolation from staggered grid to nodal grid: + // + // j + // --o-----o-----o-- result(j) = (f(j-1) + f(j)) / 2 + // -----x-----x----- + // j-1 j + + for (int ll = lmin; ll <= lmax; ll++) + { + for (int kk = kmin; kk <= kmax; kk++) + { + for (int jj = jmin; jj <= jmax; jj++) + { + res += arr_fine(jj,kk,ll); + } + } } - Eza(j,k,l) = res; -#else - amrex::Real res = 0.; - for (int nz = 1; nz < noz/2+1; nz++) { - res += 0.5_rt * stencil_coef_z[nz-1] * (Ezf(j, k, l + nz - 1) + Ezf(j, k, l - nz)); + +#else // PSATD (finite-order interpolation) + + // Example of 1D interpolation from nodal grid to nodal grid: + // + // j + // --o-----o-----o-- result(j) = f(j) + // --o-----o-----o-- + // j-1 j j+1 + // + // Example of 1D interpolation from staggered grid to nodal grid: + // + // j + // --o-----o-----o-----o-----o-----o-----o-- result(j) = c_0 * (f(j-1) + f(j) ) / 2 + // -----x-----x-----x-----x-----x-----x----- + c_1 * (f(j-2) + f(j+1)) / 2 + // j-3 j-2 j-1 j j+1 j+2 + c_2 * (f(j-3) + f(j+2)) / 2 + // c_2 c_1 c_0 c_0 c_1 c_2 + ... + + for (int ll = lmin; ll <= lmax; ll++) + { +#if (AMREX_SPACEDIM == 3) + dl = (l - ll > 0) ? l - ll - 1 : ll - l; + if (sl == 0) cl = stencil_coeffs_z[dl]; +#endif + for (int kk = kmin; kk <= kmax; kk++) + { + dk = (k - kk > 0) ? k - kk - 1 : kk - k; +#if (AMREX_SPACEDIM == 2) + if (sk == 0) ck = stencil_coeffs_z[dk]; +#elif (AMREX_SPACEDIM == 3) + if (sk == 0) ck = stencil_coeffs_y[dk]; +#endif + for (int jj = jmin; jj <= jmax; jj++) + { + dj = (j - jj > 0) ? j - jj - 1 : jj - j; + if (sj == 0) cj = stencil_coeffs_x[dj]; + + res += cj * ck * cl * arr_fine(jj,kk,ll); + } + } } - Eza(j,k,l) = res; + #endif + + arr_aux(j,k,l) = wj * wk * wl * res; } #endif diff --git a/Source/WarpX.H b/Source/WarpX.H index 26c2b78d4..4ee7fe47e 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -573,6 +573,16 @@ public: void ApplyFilterandSumBoundaryRho (int lev, int glev, amrex::MultiFab& rho, int icomp, int ncomp); +#ifdef WARPX_USE_PSATD + // Host and device vectors for Fornberg stencil coefficients used for finite-order centering + amrex::Vector<amrex::Real> host_centering_stencil_coeffs_x; + amrex::Vector<amrex::Real> host_centering_stencil_coeffs_y; + amrex::Vector<amrex::Real> host_centering_stencil_coeffs_z; + amrex::Gpu::DeviceVector<amrex::Real> device_centering_stencil_coeffs_x; + amrex::Gpu::DeviceVector<amrex::Real> device_centering_stencil_coeffs_y; + amrex::Gpu::DeviceVector<amrex::Real> device_centering_stencil_coeffs_z; +#endif + protected: /** diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 7fdd6a3f1..ff42c4758 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -11,6 +11,9 @@ */ #include "WarpX.H" #include "FieldSolver/WarpX_FDTD.H" +#ifdef WARPX_USE_PSATD +#include "FieldSolver/SpectralSolver/SpectralKSpace.H" +#endif #include "Python/WarpXWrappers.h" #include "Utils/WarpXConst.H" #include "Utils/WarpXUtil.H" @@ -693,6 +696,32 @@ WarpX::ReadParameters () field_gathering_nox == 2 && field_gathering_noy == 2 && field_gathering_noz == 2, "High-order interpolation (order > 2) is not implemented with mesh refinement"); } + + // Host vectors for Fornberg stencil coefficients used for finite-order centering + host_centering_stencil_coeffs_x = getFornbergStencilCoefficients(field_gathering_nox, false); + host_centering_stencil_coeffs_y = getFornbergStencilCoefficients(field_gathering_noy, false); + host_centering_stencil_coeffs_z = getFornbergStencilCoefficients(field_gathering_noz, false); + + // Device vectors for Fornberg stencil coefficients used for finite-order centering + device_centering_stencil_coeffs_x.resize(host_centering_stencil_coeffs_x.size()); + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, + host_centering_stencil_coeffs_x.begin(), + host_centering_stencil_coeffs_x.end(), + device_centering_stencil_coeffs_x.begin()); + + device_centering_stencil_coeffs_y.resize(host_centering_stencil_coeffs_y.size()); + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, + host_centering_stencil_coeffs_y.begin(), + host_centering_stencil_coeffs_y.end(), + device_centering_stencil_coeffs_y.begin()); + + device_centering_stencil_coeffs_z.resize(host_centering_stencil_coeffs_z.size()); + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, + host_centering_stencil_coeffs_z.begin(), + host_centering_stencil_coeffs_z.end(), + device_centering_stencil_coeffs_z.begin()); + + amrex::Gpu::synchronize(); } #endif |