diff options
Diffstat (limited to 'Source')
38 files changed, 1182 insertions, 684 deletions
diff --git a/Source/FieldSolver/FiniteDifferenceSolver/EvolveB.cpp b/Source/FieldSolver/FiniteDifferenceSolver/EvolveB.cpp new file mode 100644 index 000000000..16227febc --- /dev/null +++ b/Source/FieldSolver/FiniteDifferenceSolver/EvolveB.cpp @@ -0,0 +1,221 @@ +/* Copyright 2020 Remi Lehe + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#include "WarpXAlgorithmSelection.H" +#include "FiniteDifferenceSolver.H" +#ifdef WARPX_DIM_RZ +# include "FiniteDifferenceAlgorithms/CylindricalYeeAlgorithm.H" +#else +# include "FiniteDifferenceAlgorithms/CartesianYeeAlgorithm.H" +# include "FiniteDifferenceAlgorithms/CartesianCKCAlgorithm.H" +# include "FiniteDifferenceAlgorithms/CartesianNodalAlgorithm.H" +#endif +#include <AMReX_Gpu.H> + +using namespace amrex; + +/** + * \brief Update the B field, over one timestep + */ +void FiniteDifferenceSolver::EvolveB ( + std::array< std::unique_ptr<amrex::MultiFab>, 3 >& Bfield, + std::array< std::unique_ptr<amrex::MultiFab>, 3 > const& Efield, + amrex::Real const dt ) { + + // Select algorithm (The choice of algorithm is a runtime option, + // but we compile code for each algorithm, using templates) +#ifdef WARPX_DIM_RZ + if (m_fdtd_algo == MaxwellSolverAlgo::Yee){ + + EvolveBCylindrical <CylindricalYeeAlgorithm> ( Bfield, Efield, dt ); + +#else + if (m_do_nodal) { + + EvolveBCartesian <CartesianNodalAlgorithm> ( Bfield, Efield, dt ); + + } else if (m_fdtd_algo == MaxwellSolverAlgo::Yee) { + + EvolveBCartesian <CartesianYeeAlgorithm> ( Bfield, Efield, dt ); + + } else if (m_fdtd_algo == MaxwellSolverAlgo::CKC) { + + EvolveBCartesian <CartesianCKCAlgorithm> ( Bfield, Efield, dt ); + +#endif + } else { + amrex::Abort("Unknown algorithm"); + } + +} + + +#ifndef WARPX_DIM_RZ + +template<typename T_Algo> +void FiniteDifferenceSolver::EvolveBCartesian ( + std::array< std::unique_ptr<amrex::MultiFab>, 3 >& Bfield, + std::array< std::unique_ptr<amrex::MultiFab>, 3 > const& Efield, + amrex::Real const dt ) { + + // Loop through the grids, and over the tiles within each grid +#ifdef _OPENMP +#pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) +#endif + for ( MFIter mfi(*Bfield[0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + // Extract field data for this grid/tile + Array4<Real> const& Bx = Bfield[0]->array(mfi); + Array4<Real> const& By = Bfield[1]->array(mfi); + Array4<Real> const& Bz = Bfield[2]->array(mfi); + Array4<Real> const& Ex = Efield[0]->array(mfi); + Array4<Real> const& Ey = Efield[1]->array(mfi); + Array4<Real> const& Ez = Efield[2]->array(mfi); + + // Extract stencil coefficients + Real const * const AMREX_RESTRICT coefs_x = m_stencil_coefs_x.dataPtr(); + int const n_coefs_x = m_stencil_coefs_x.size(); + Real const * const AMREX_RESTRICT coefs_y = m_stencil_coefs_y.dataPtr(); + int const n_coefs_y = m_stencil_coefs_y.size(); + Real const * const AMREX_RESTRICT coefs_z = m_stencil_coefs_z.dataPtr(); + int const n_coefs_z = m_stencil_coefs_z.size(); + + // Extract tileboxes for which to loop + Box const& tbx = mfi.tilebox(Bfield[0]->ixType().ixType()); + Box const& tby = mfi.tilebox(Bfield[1]->ixType().ixType()); + Box const& tbz = mfi.tilebox(Bfield[2]->ixType().ixType()); + + // Loop over the cells and update the fields + amrex::ParallelFor(tbx, tby, tbz, + + [=] AMREX_GPU_DEVICE (int i, int j, int k){ + Bx(i, j, k) += dt * T_Algo::UpwardDz(Ey, coefs_z, n_coefs_z, i, j, k) + - dt * T_Algo::UpwardDy(Ez, coefs_y, n_coefs_y, i, j, k); + }, + + [=] AMREX_GPU_DEVICE (int i, int j, int k){ + By(i, j, k) += dt * T_Algo::UpwardDx(Ez, coefs_x, n_coefs_x, i, j, k) + - dt * T_Algo::UpwardDz(Ex, coefs_z, n_coefs_z, i, j, k); + }, + + [=] AMREX_GPU_DEVICE (int i, int j, int k){ + Bz(i, j, k) += dt * T_Algo::UpwardDy(Ex, coefs_y, n_coefs_y, i, j, k) + - dt * T_Algo::UpwardDx(Ey, coefs_x, n_coefs_x, i, j, k); + } + + ); + + } + +} + +#else // corresponds to ifndef WARPX_DIM_RZ + +template<typename T_Algo> +void FiniteDifferenceSolver::EvolveBCylindrical ( + std::array< std::unique_ptr<amrex::MultiFab>, 3 >& Bfield, + std::array< std::unique_ptr<amrex::MultiFab>, 3 > const& Efield, + amrex::Real const dt ) { + + // Loop through the grids, and over the tiles within each grid +#ifdef _OPENMP +#pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) +#endif + for ( MFIter mfi(*Bfield[0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { + + // Extract field data for this grid/tile + Array4<Real> const& Br = Bfield[0]->array(mfi); + Array4<Real> const& Bt = Bfield[1]->array(mfi); + Array4<Real> const& Bz = Bfield[2]->array(mfi); + Array4<Real> const& Er = Efield[0]->array(mfi); + Array4<Real> const& Et = Efield[1]->array(mfi); + Array4<Real> const& Ez = Efield[2]->array(mfi); + + // Extract stencil coefficients + Real const * const AMREX_RESTRICT coefs_r = m_stencil_coefs_r.dataPtr(); + int const n_coefs_r = m_stencil_coefs_r.size(); + Real const * const AMREX_RESTRICT coefs_z = m_stencil_coefs_z.dataPtr(); + int const n_coefs_z = m_stencil_coefs_z.size(); + + // Extract cylindrical specific parameters + Real const dr = m_dr; + int const nmodes = m_nmodes; + Real const rmin = m_rmin; + + // Extract tileboxes for which to loop + Box const& tbr = mfi.tilebox(Bfield[0]->ixType().ixType()); + Box const& tbt = mfi.tilebox(Bfield[1]->ixType().ixType()); + Box const& tbz = mfi.tilebox(Bfield[2]->ixType().ixType()); + + // Loop over the cells and update the fields + amrex::ParallelFor(tbr, tbt, tbz, + + [=] AMREX_GPU_DEVICE (int i, int j, int k){ + Real const r = rmin + i*dr; // r on nodal point (Br is nodal in r) + if (r != 0) { // Off-axis, regular Maxwell equations + Br(i, j, 0, 0) += dt * T_Algo::UpwardDz(Et, coefs_z, n_coefs_z, i, j, 0, 0); // Mode m=0 + for (int m=1; m<nmodes; m++) { // Higher-order modes + Br(i, j, 0, 2*m-1) += dt*( + T_Algo::UpwardDz(Et, coefs_z, n_coefs_z, i, j, 0, 2*m-1) + - m * Ez(i, j, 0, 2*m )/r ); // Real part + Br(i, j, 0, 2*m ) += dt*( + T_Algo::UpwardDz(Et, coefs_z, n_coefs_z, i, j, 0, 2*m ) + + m * Ez(i, j, 0, 2*m-1)/r ); // Imaginary part + } + } else { // r==0: On-axis corrections + // Ensure that Br remains 0 on axis (except for m=1) + Br(i, j, 0, 0) = 0.; // Mode m=0 + for (int m=1; m<nmodes; m++) { // Higher-order modes + if (m == 1){ + // For m==1, Ez is linear in r, for small r + // Therefore, the formula below regularizes the singularity + Br(i, j, 0, 2*m-1) += dt*( + T_Algo::UpwardDz(Et, coefs_z, n_coefs_z, i, j, 0, 2*m-1) + - m * Ez(i+1, j, 0, 2*m )/dr ); // Real part + Br(i, j, 0, 2*m ) += dt*( + T_Algo::UpwardDz(Et, coefs_z, n_coefs_z, i, j, 0, 2*m ) + + m * Ez(i+1, j, 0, 2*m-1)/dr ); // Imaginary part + } else { + Br(i, j, 0, 2*m-1) = 0.; + Br(i, j, 0, 2*m ) = 0.; + } + } + } + }, + + [=] AMREX_GPU_DEVICE (int i, int j, int k){ + Bt(i, j, 0, 0) += dt*( + T_Algo::UpwardDr(Ez, coefs_r, n_coefs_r, i, j, 0, 0) + - T_Algo::UpwardDz(Er, coefs_z, n_coefs_z, i, j, 0, 0)); // Mode m=0 + for (int m=1 ; m<nmodes ; m++) { // Higher-order modes + Bt(i, j, 0, 2*m-1) += dt*( + T_Algo::UpwardDr(Ez, coefs_r, n_coefs_r, i, j, 0, 2*m-1) + - T_Algo::UpwardDz(Er, coefs_z, n_coefs_z, i, j, 0, 2*m-1)); // Real part + Bt(i, j, 0, 2*m ) += dt*( + T_Algo::UpwardDr(Ez, coefs_r, n_coefs_r, i, j, 0, 2*m ) + - T_Algo::UpwardDz(Er, coefs_z, n_coefs_z, i, j, 0, 2*m )); // Imaginary part + } + }, + + [=] AMREX_GPU_DEVICE (int i, int j, int k){ + Real const r = rmin + (i + 0.5)*dr; // r on a cell-centered grid (Bz is cell-centered in r) + Bz(i, j, 0, 0) += dt*( - T_Algo::UpwardDrr_over_r(Et, r, dr, coefs_r, n_coefs_r, i, j, 0, 0)); + for (int m=1 ; m<nmodes ; m++) { // Higher-order modes + Bz(i, j, 0, 2*m-1) += dt*( m * Er(i, j, 0, 2*m )/r + - T_Algo::UpwardDrr_over_r(Et, r, dr, coefs_r, n_coefs_r, i, j, 0, 2*m-1)); // Real part + Bz(i, j, 0, 2*m ) += dt*(-m * Er(i, j, 0, 2*m-1)/r + - T_Algo::UpwardDrr_over_r(Et, r, dr, coefs_r, n_coefs_r, i, j, 0, 2*m )); // Imaginary part + } + } + + ); + + } + +} + +#endif // corresponds to ifndef WARPX_DIM_RZ diff --git a/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceAlgorithms/CartesianCKCAlgorithm.H b/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceAlgorithms/CartesianCKCAlgorithm.H new file mode 100644 index 000000000..fa5dd073d --- /dev/null +++ b/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceAlgorithms/CartesianCKCAlgorithm.H @@ -0,0 +1,221 @@ +/* Copyright 2020 Remi Lehe + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef WARPX_FINITE_DIFFERENCE_ALGORITHM_CARTESIAN_CKC_H_ +#define WARPX_FINITE_DIFFERENCE_ALGORITHM_CARTESIAN_CKC_H_ + +#include <AMReX_REAL.H> +#include <AMReX_Array4.H> +#include <AMReX_Gpu.H> + +/** + * This struct contains only static functions to initialize the stencil coefficients + * and to compute finite-difference derivatives for the Cartesian CKC algorithm. + */ +struct CartesianCKCAlgorithm { + + static void InitializeStencilCoefficients ( + std::array<amrex::Real,3>& cell_size, + amrex::Gpu::ManagedVector<amrex::Real>& stencil_coefs_x, + amrex::Gpu::ManagedVector<amrex::Real>& stencil_coefs_y, + amrex::Gpu::ManagedVector<amrex::Real>& stencil_coefs_z ) { + + using namespace amrex; + + // Compute Cole-Karkkainen-Cowan coefficients according + // to Cowan - PRST-AB 16, 041303 (2013) + Real const inv_dx = 1./cell_size[0]; + Real const inv_dy = 1./cell_size[1]; + Real const inv_dz = 1./cell_size[2]; +#if defined WARPX_DIM_3D + Real const delta = std::max( { inv_dx,inv_dy,inv_dz } ); + Real const rx = (inv_dx/delta)*(inv_dx/delta); + Real const ry = (inv_dy/delta)*(inv_dy/delta); + Real const rz = (inv_dz/delta)*(inv_dz/delta); + Real const beta = 0.125*(1. - rx*ry*rz/(ry*rz + rz*rx + rx*ry)); + Real const betaxy = ry*beta*inv_dx; + Real const betaxz = rz*beta*inv_dx; + Real const betayx = rx*beta*inv_dy; + Real const betayz = rz*beta*inv_dy; + Real const betazx = rx*beta*inv_dz; + Real const betazy = ry*beta*inv_dz; + Real const gamma = (0.0625 - 0.125*ry*rz/(ry*rz + rz*rx + rx*ry)); + Real const gammax = ry*rz*gamma; + Real const gammay = rx*rz*gamma; + Real const gammaz = rx*ry*gamma; + Real const alphax = (1. - 2.*ry*beta - 2.*rz*beta - 4.*ry*rz*gamma)*inv_dx; + Real const alphay = (1. - 2.*rx*beta - 2.*rz*beta - 4.*rx*rz*gamma)*inv_dy; + Real const alphaz = (1. - 2.*rx*beta - 2.*ry*beta - 4.*rx*ry*gamma)*inv_dz; +#elif defined WARPX_DIM_XZ + Real const delta = std::max(inv_dx,inv_dz); + Real const rx = (inv_dx/delta)*(inv_dx/delta); + Real const rz = (inv_dz/delta)*(inv_dz/delta); + Real const beta = 0.125; + Real const betaxz = beta*rz*inv_dx; + Real const betazx = beta*rx*inv_dz; + Real const alphax = (1. - 2.*rz*beta)*inv_dx; + Real const alphaz = (1. - 2.*rx*beta)*inv_dz; + // Other coefficients are 0 in 2D Cartesian + // (and will actually not be used in the stencil) + Real const gammax=0, gammay=0, gammaz=0; + Real const betaxy=0, betazy=0, betayx=0, betayz=0; + Real const alphay=0; +#endif + + // Store the coefficients in array `stencil_coefs`, in prescribed order + stencil_coefs_x.resize(6); + stencil_coefs_x[0] = inv_dx; + stencil_coefs_x[1] = alphax; + stencil_coefs_x[2] = betaxy; + stencil_coefs_x[3] = betaxz; + stencil_coefs_x[4] = gammax; + stencil_coefs_y.resize(6); + stencil_coefs_y[0] = inv_dy; + stencil_coefs_y[1] = alphay; + stencil_coefs_y[2] = betayz; + stencil_coefs_y[3] = betayx; + stencil_coefs_y[4] = gammay; + stencil_coefs_z.resize(6); + stencil_coefs_z[0] = inv_dz; + stencil_coefs_z[1] = alphaz; + stencil_coefs_z[2] = betazx; + stencil_coefs_z[3] = betazy; + stencil_coefs_z[4] = gammaz; + } + + /** + /* Perform derivative along x on a cell-centered grid, from a nodal field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real UpwardDx ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_x, int const n_coefs_x, + int const i, int const j, int const k ) { + + amrex::Real const alphax = coefs_x[1]; + amrex::Real const betaxy = coefs_x[2]; + amrex::Real const betaxz = coefs_x[3]; + amrex::Real const gammax = coefs_x[4]; +#if defined WARPX_DIM_3D + return alphax * (F(i+1,j ,k ) - F(i, j, k )) + + betaxy * (F(i+1,j+1,k ) - F(i ,j+1,k ) + + F(i+1,j-1,k ) - F(i ,j-1,k )) + + betaxz * (F(i+1,j ,k+1) - F(i ,j ,k+1) + + F(i+1,j ,k-1) - F(i ,j ,k-1)) + + gammax * (F(i+1,j+1,k+1) - F(i ,j+1,k+1) + + F(i+1,j-1,k+1) - F(i ,j-1,k+1) + + F(i+1,j+1,k-1) - F(i ,j+1,k-1) + + F(i+1,j-1,k-1) - F(i ,j-1,k-1)); +#elif (defined WARPX_DIM_XZ) + return alphax * (F(i+1,j ,k ) - F(i, j, k )) + + betaxz * (F(i+1,j+1,k ) - F(i ,j+1,k ) + + F(i+1,j-1,k ) - F(i ,j-1,k )); +#endif + }; + + /** + /* Perform derivative along x on a nodal grid, from a cell-centered field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real DownwardDx ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_x, int const n_coefs_x, + int const i, int const j, int const k ) { + + amrex::Real const inv_dx = coefs_x[0]; + return inv_dx*( F(i,j,k) - F(i-1,j,k) ); + }; + + /** + /* Perform derivative along y on a cell-centered grid, from a nodal field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real UpwardDy ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_y, int const n_coefs_y, + int const i, int const j, int const k ) { + +#if defined WARPX_DIM_3D + amrex::Real const alphay = coefs_y[1]; + amrex::Real const betayz = coefs_y[2]; + amrex::Real const betayx = coefs_y[3]; + amrex::Real const gammay = coefs_y[4]; + return alphay * (F(i ,j+1,k ) - F(i ,j ,k )) + + betayx * (F(i+1,j+1,k ) - F(i+1,j ,k ) + + F(i-1,j+1,k ) - F(i-1,j ,k )) + + betayz * (F(i ,j+1,k+1) - F(i ,j ,k+1) + + F(i ,j+1,k-1) - F(i ,j ,k-1)) + + gammay * (F(i+1,j+1,k+1) - F(i+1,j ,k+1) + + F(i-1,j+1,k+1) - F(i-1,j ,k+1) + + F(i+1,j+1,k-1) - F(i+1,j ,k-1) + + F(i-1,j+1,k-1) - F(i-1,j ,k-1)); +#elif (defined WARPX_DIM_XZ) + return 0; // 2D Cartesian: derivative along y is 0 +#endif + }; + + /** + /* Perform derivative along y on a nodal grid, from a cell-centered field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real DownwardDy ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_y, int const n_coefs_y, + int const i, int const j, int const k ) { + +#if defined WARPX_DIM_3D + amrex::Real const inv_dy = coefs_y[0]; + return inv_dy*( F(i,j,k) - F(i,j-1,k) ); +#elif (defined WARPX_DIM_XZ) + return 0; // 2D Cartesian: derivative along y is 0 +#endif + }; + + /** + /* Perform derivative along z on a cell-centered grid, from a nodal field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real UpwardDz ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_z, int const n_coefs_z, + int const i, int const j, int const k ) { + + amrex::Real const alphaz = coefs_z[1]; + amrex::Real const betazx = coefs_z[2]; + amrex::Real const betazy = coefs_z[3]; + amrex::Real const gammaz = coefs_z[4]; +#if defined WARPX_DIM_3D + return alphaz * (F(i ,j ,k+1) - F(i ,j ,k )) + + betazx * (F(i+1,j ,k+1) - F(i+1,j ,k ) + + F(i-1,j ,k+1) - F(i-1,j ,k )) + + betazy * (F(i ,j+1,k+1) - F(i ,j+1,k ) + + F(i ,j-1,k+1) - F(i ,j-1,k )) + + gammaz * (F(i+1,j+1,k+1) - F(i+1,j+1,k ) + + F(i-1,j+1,k+1) - F(i-1,j+1,k ) + + F(i+1,j-1,k+1) - F(i+1,j-1,k ) + + F(i-1,j-1,k+1) - F(i-1,j-1,k )); +#elif (defined WARPX_DIM_XZ) + return alphaz * (F(i ,j+1,k ) - F(i ,j ,k )) + + betazx * (F(i+1,j+1,k ) - F(i+1,j ,k ) + + F(i-1,j+1,k ) - F(i-1,j ,k )); +#endif + }; + + /** + /* Perform derivative along z on a nodal grid, from a cell-centered field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real DownwardDz ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_z, int const n_coefs_z, + int const i, int const j, int const k ) { + + amrex::Real const inv_dz = coefs_z[0]; +#if defined WARPX_DIM_3D + return inv_dz*( F(i,j,k) - F(i,j,k-1) ); +#elif (defined WARPX_DIM_XZ) + return inv_dz*( F(i,j,k) - F(i,j-1,k) ); +#endif + }; + +}; + +#endif // WARPX_FINITE_DIFFERENCE_ALGORITHM_CARTESIAN_CKC_H_ diff --git a/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceAlgorithms/CartesianNodalAlgorithm.H b/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceAlgorithms/CartesianNodalAlgorithm.H new file mode 100644 index 000000000..69622c5fe --- /dev/null +++ b/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceAlgorithms/CartesianNodalAlgorithm.H @@ -0,0 +1,130 @@ +/* Copyright 2020 Remi Lehe + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef WARPX_FINITE_DIFFERENCE_ALGORITHM_CARTESIAN_NODAL_H_ +#define WARPX_FINITE_DIFFERENCE_ALGORITHM_CARTESIAN_NODAL_H_ + +#include <AMReX_REAL.H> +#include <AMReX_Array4.H> +#include <AMReX_Gpu.H> + +/** + * This struct contains only static functions to initialize the stencil coefficients + * and to compute finite-difference derivatives for the Cartesian nodal algorithm. + */ +struct CartesianNodalAlgorithm { + + static void InitializeStencilCoefficients ( + std::array<amrex::Real,3>& cell_size, + amrex::Gpu::ManagedVector<amrex::Real>& stencil_coefs_x, + amrex::Gpu::ManagedVector<amrex::Real>& stencil_coefs_y, + amrex::Gpu::ManagedVector<amrex::Real>& stencil_coefs_z ) { + + // Store the inverse cell size along each direction in the coefficients + stencil_coefs_x.resize(1); + stencil_coefs_x[0] = 1./cell_size[0]; + stencil_coefs_y.resize(1); + stencil_coefs_y[0] = 1./cell_size[1]; + stencil_coefs_z.resize(1); + stencil_coefs_z[0] = 1./cell_size[2]; + } + + /** + /* Perform derivative along x + /* (For a solver on a staggered grid, `UpwardDx` and `DownwardDx` take into + /* account the staggering; but for `CartesianNodalAlgorithm`, they are equivalent) */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real UpwardDx ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_x, int const n_coefs_x, + int const i, int const j, int const k ) { + + amrex::Real const inv_dx = coefs_x[0]; + return 0.5*inv_dx*( F(i+1,j,k) - F(i-1,j,k) ); + }; + + /** + /* Perform derivative along x + /* (For a solver on a staggered grid, `UpwardDx` and `DownwardDx` take into + /* account the staggering; but for `CartesianNodalAlgorithm`, they are equivalent) */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real DownwardDx ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_x, int const n_coefs_x, + int const i, int const j, int const k ) { + + return UpwardDx( F, coefs_x, n_coefs_x, i, j, k ); + // For CartesianNodalAlgorithm, UpwardDx and DownwardDx are equivalent + }; + + /** + /* Perform derivative along y + /* (For a solver on a staggered grid, `UpwardDy` and `DownwardDy` take into + /* account the staggering; but for `CartesianNodalAlgorithm`, they are equivalent) */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real UpwardDy ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_y, int const n_coefs_y, + int const i, int const j, int const k ) { + +#if defined WARPX_DIM_3D + amrex::Real const inv_dy = coefs_y[0]; + return 0.5*inv_dy*( F(i,j+1,k) - F(i,j-1,k) ); +#elif (defined WARPX_DIM_XZ) + return 0; // 2D Cartesian: derivative along y is 0 +#endif + }; + + /** + /* Perform derivative along y + /* (For a solver on a staggered grid, `UpwardDy` and `DownwardDy` take into + /* account the staggering; but for `CartesianNodalAlgorithm`, they are equivalent) */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real DownwardDy ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_y, int const n_coefs_y, + int const i, int const j, int const k ) { + + return UpwardDy( F, coefs_y, n_coefs_y, i, j, k ); + // For CartesianNodalAlgorithm, UpwardDy and DownwardDy are equivalent + }; + + /** + /* Perform derivative along z + /* (For a solver on a staggered grid, `UpwardDz` and `DownwardDz` take into + /* account the staggering; but for `CartesianNodalAlgorithm`, they are equivalent) */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real UpwardDz ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_z, int const n_coefs_z, + int const i, int const j, int const k ) { + + amrex::Real const inv_dz = coefs_z[0]; +#if defined WARPX_DIM_3D + return 0.5*inv_dz*( F(i,j,k+1) - F(i,j,k-1) ); +#elif (defined WARPX_DIM_XZ) + return 0.5*inv_dz*( F(i,j+1,k) - F(i,j-1,k) ); +#endif + }; + + /** + /* Perform derivative along z + /* (For a solver on a staggered grid, `UpwardDz` and `DownwardDz` take into + /* account the staggering; but for `CartesianNodalAlgorithm`, they are equivalent) */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real DownwardDz ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_z, int const n_coefs_z, + int const i, int const j, int const k ) { + + return UpwardDz( F, coefs_z, n_coefs_z, i, j, k ); + // For CartesianNodalAlgorithm, UpwardDz and DownwardDz are equivalent + }; + +}; + +#endif // WARPX_FINITE_DIFFERENCE_ALGORITHM_CARTESIAN_NODAL_H_ diff --git a/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceAlgorithms/CartesianYeeAlgorithm.H b/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceAlgorithms/CartesianYeeAlgorithm.H new file mode 100644 index 000000000..268c5aa89 --- /dev/null +++ b/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceAlgorithms/CartesianYeeAlgorithm.H @@ -0,0 +1,126 @@ +/* Copyright 2020 Remi Lehe + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef WARPX_FINITE_DIFFERENCE_ALGORITHM_CARTESIAN_YEE_H_ +#define WARPX_FINITE_DIFFERENCE_ALGORITHM_CARTESIAN_YEE_H_ + +#include <AMReX_REAL.H> +#include <AMReX_Array4.H> +#include <AMReX_Gpu.H> + +/** + * This struct contains only static functions to initialize the stencil coefficients + * and to compute finite-difference derivatives for the Cartesian Yee algorithm. + */ +struct CartesianYeeAlgorithm { + + static void InitializeStencilCoefficients ( + std::array<amrex::Real,3>& cell_size, + amrex::Gpu::ManagedVector<amrex::Real>& stencil_coefs_x, + amrex::Gpu::ManagedVector<amrex::Real>& stencil_coefs_y, + amrex::Gpu::ManagedVector<amrex::Real>& stencil_coefs_z ) { + + // Store the inverse cell size along each direction in the coefficients + stencil_coefs_x.resize(1); + stencil_coefs_x[0] = 1./cell_size[0]; + stencil_coefs_y.resize(1); + stencil_coefs_y[0] = 1./cell_size[1]; + stencil_coefs_z.resize(1); + stencil_coefs_z[0] = 1./cell_size[2]; + } + + /** + /* Perform derivative along x on a cell-centered grid, from a nodal field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real UpwardDx ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_x, int const n_coefs_x, + int const i, int const j, int const k ) { + + amrex::Real const inv_dx = coefs_x[0]; + return inv_dx*( F(i+1,j,k) - F(i,j,k) ); + }; + + /** + /* Perform derivative along x on a nodal grid, from a cell-centered field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real DownwardDx ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_x, int const n_coefs_x, + int const i, int const j, int const k ) { + + amrex::Real const inv_dx = coefs_x[0]; + return inv_dx*( F(i,j,k) - F(i-1,j,k) ); + }; + + /** + /* Perform derivative along y on a cell-centered grid, from a nodal field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real UpwardDy ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_y, int const n_coefs_y, + int const i, int const j, int const k ) { + +#if defined WARPX_DIM_3D + amrex::Real const inv_dy = coefs_y[0]; + return inv_dy*( F(i,j+1,k) - F(i,j,k) ); +#elif (defined WARPX_DIM_XZ) + return 0; // 2D Cartesian: derivative along y is 0 +#endif + }; + + /** + /* Perform derivative along y on a nodal grid, from a cell-centered field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real DownwardDy ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_y, int const n_coefs_y, + int const i, int const j, int const k ) { + +#if defined WARPX_DIM_3D + amrex::Real const inv_dy = coefs_y[0]; + return inv_dy*( F(i,j,k) - F(i,j-1,k) ); +#elif (defined WARPX_DIM_XZ) + return 0; // 2D Cartesian: derivative along y is 0 +#endif + }; + + /** + /* Perform derivative along z on a cell-centered grid, from a nodal field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real UpwardDz ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_z, int const n_coefs_z, + int const i, int const j, int const k ) { + + amrex::Real const inv_dz = coefs_z[0]; +#if defined WARPX_DIM_3D + return inv_dz*( F(i,j,k+1) - F(i,j,k) ); +#elif (defined WARPX_DIM_XZ) + return inv_dz*( F(i,j+1,k) - F(i,j,k) ); +#endif + }; + + /** + /* Perform derivative along z on a nodal grid, from a cell-centered field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real DownwardDz ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_z, int const n_coefs_z, + int const i, int const j, int const k ) { + + amrex::Real const inv_dz = coefs_z[0]; +#if defined WARPX_DIM_3D + return inv_dz*( F(i,j,k) - F(i,j,k-1) ); +#elif (defined WARPX_DIM_XZ) + return inv_dz*( F(i,j,k) - F(i,j-1,k) ); +#endif + }; + +}; + +#endif // WARPX_FINITE_DIFFERENCE_ALGORITHM_CARTESIAN_YEE_H_ diff --git a/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceAlgorithms/CylindricalYeeAlgorithm.H b/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceAlgorithms/CylindricalYeeAlgorithm.H new file mode 100644 index 000000000..ab32c8bcb --- /dev/null +++ b/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceAlgorithms/CylindricalYeeAlgorithm.H @@ -0,0 +1,113 @@ +/* Copyright 2020 Remi Lehe + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef WARPX_FINITE_DIFFERENCE_ALGORITHM_CYLINDRICAL_YEE_H_ +#define WARPX_FINITE_DIFFERENCE_ALGORITHM_CYLINDRICAL_YEE_H_ + +#include <AMReX_REAL.H> +#include <AMReX_Array4.H> +#include <AMReX_Gpu.H> + +/** + * This struct contains only static functions to initialize the stencil coefficients + * and to compute finite-difference derivatives for the Cartesian Yee algorithm. + */ +struct CylindricalYeeAlgorithm { + + static void InitializeStencilCoefficients ( + std::array<amrex::Real,3>& cell_size, + amrex::Gpu::ManagedVector<amrex::Real>& stencil_coefs_r, + amrex::Gpu::ManagedVector<amrex::Real>& stencil_coefs_z ) { + + // Store the inverse cell size along each direction in the coefficients + stencil_coefs_r.resize(1); + stencil_coefs_r[0] = 1./cell_size[0]; // 1./dr + stencil_coefs_z.resize(1); + stencil_coefs_z[0] = 1./cell_size[2]; // 1./dz + } + + /** Applies the differential operator `1/r * d(rF)/dr`, + * where `F` is on a *nodal* grid in `r` + * and the differential operator is evaluated on a *cell-centered* grid. + * The input parameter `r` is given at the cell-centered position */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real UpwardDrr_over_r ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const r, amrex::Real const dr, + amrex::Real const * const coefs_r, int const n_coefs_r, + int const i, int const j, int const k, int const comp ) { + + amrex::Real const inv_dr = coefs_r[0]; + return 1./r * inv_dr*( (r+0.5*dr)*F(i+1,j,k,comp) - (r-0.5*dr)*F(i,j,k,comp) ); + }; + + /** Applies the differential operator `1/r * d(rF)/dr`, + * where `F` is on a *cell-centered* grid in `r` + * and the differential operator is evaluated on a *nodal* grid. + * The input parameter `r` is given at the cell-centered position */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real DownwardDrr_over_r ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const r, amrex::Real const dr, + amrex::Real const * const coefs_r, int const n_coefs_r, + int const i, int const j, int const k, int const comp ) { + + amrex::Real const inv_dr = coefs_r[0]; + return 1./r * inv_dr*( (r+0.5*dr)*F(i,j,k,comp) - (r-0.5*dr)*F(i-1,j,k,comp) ); + }; + + /** + /* Perform derivative along r on a cell-centered grid, from a nodal field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real UpwardDr ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_r, int const n_coefs_r, + int const i, int const j, int const k, int const comp ) { + + amrex::Real const inv_dr = coefs_r[0]; + return inv_dr*( F(i+1,j,k,comp) - F(i,j,k,comp) ); + }; + + /** + /* Perform derivative along r on a nodal grid, from a cell-centered field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real DownwardDr ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_r, int const n_coefs_r, + int const i, int const j, int const k, int const comp ) { + + amrex::Real const inv_dr = coefs_r[0]; + return inv_dr*( F(i,j,k,comp) - F(i-1,j,k,comp) ); + }; + + /** + /* Perform derivative along z on a cell-centered grid, from a nodal field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real UpwardDz ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_z, int const n_coefs_z, + int const i, int const j, int const k, int const comp ) { + + amrex::Real const inv_dz = coefs_z[0]; + return inv_dz*( F(i,j+1,k,comp) - F(i,j,k,comp) ); + }; + + /** + /* Perform derivative along z on a nodal grid, from a cell-centered field `F`*/ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static amrex::Real DownwardDz ( + amrex::Array4<amrex::Real> const& F, + amrex::Real const * const coefs_z, int const n_coefs_z, + int const i, int const j, int const k, int const comp ) { + + amrex::Real const inv_dz = coefs_z[0]; + return inv_dz*( F(i,j,k,comp) - F(i,j-1,k,comp) ); + }; + +}; + +#endif // WARPX_FINITE_DIFFERENCE_ALGORITHM_CYLINDRICAL_YEE_H_ diff --git a/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceSolver.H b/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceSolver.H new file mode 100644 index 000000000..4bf88077f --- /dev/null +++ b/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceSolver.H @@ -0,0 +1,76 @@ +/* Copyright 2020 Remi Lehe + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef WARPX_FINITE_DIFFERENCE_SOLVER_H_ +#define WARPX_FINITE_DIFFERENCE_SOLVER_H_ + +#include <AMReX_MultiFab.H> + +/** + * \brief Top-level class for the electromagnetic finite-difference solver + * + * Stores the coefficients of the finite-difference stencils, + * and has member functions to update fields over one time step. + */ +class FiniteDifferenceSolver +{ + public: + + // Constructor + /** \brief Initialize the finite-difference Maxwell solver (for a given refinement level) + * + * This function initializes the stencil coefficients for the chosen finite-difference algorithm + * + * \param fdtd_algo Identifies the chosen algorithm, as defined in WarpXAlgorithmSelection.H + * \param cell_size Cell size along each dimension, for the chosen refinement level + * \param do_nodal Whether the solver is applied to a nodal or staggered grid + */ + FiniteDifferenceSolver ( + int const fdtd_algo, + std::array<amrex::Real,3> cell_size, + bool const do_nodal ); + + void EvolveB ( std::array< std::unique_ptr<amrex::MultiFab>, 3 >& Bfield, + std::array< std::unique_ptr<amrex::MultiFab>, 3 > const& Efield, + amrex::Real const dt ); + private: + + int m_fdtd_algo; + bool m_do_nodal; + +#ifdef WARPX_DIM_RZ + amrex::Real m_dr, m_rmin; + amrex::Real m_nmodes; + amrex::Gpu::ManagedVector<amrex::Real> m_stencil_coefs_r; + amrex::Gpu::ManagedVector<amrex::Real> m_stencil_coefs_z; +#else + amrex::Gpu::ManagedVector<amrex::Real> m_stencil_coefs_x; + amrex::Gpu::ManagedVector<amrex::Real> m_stencil_coefs_y; + amrex::Gpu::ManagedVector<amrex::Real> m_stencil_coefs_z; +#endif + + public: + // The member functions below contain extended __device__ lambda. + // In order to compile with nvcc, they need to be public. + +#ifdef WARPX_DIM_RZ + template< typename T_Algo > + void EvolveBCylindrical ( + std::array< std::unique_ptr<amrex::MultiFab>, 3 >& Bfield, + std::array< std::unique_ptr<amrex::MultiFab>, 3 > const& Efield, + amrex::Real const dt ); +#else + template< typename T_Algo > + void EvolveBCartesian ( + std::array< std::unique_ptr<amrex::MultiFab>, 3 >& Bfield, + std::array< std::unique_ptr<amrex::MultiFab>, 3 > const& Efield, + amrex::Real const dt ); +#endif + +}; + +#endif // WARPX_FINITE_DIFFERENCE_SOLVER_H_ diff --git a/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceSolver.cpp b/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceSolver.cpp new file mode 100644 index 000000000..ea7af6677 --- /dev/null +++ b/Source/FieldSolver/FiniteDifferenceSolver/FiniteDifferenceSolver.cpp @@ -0,0 +1,58 @@ +/* Copyright 2020 Remi Lehe + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#include "WarpXAlgorithmSelection.H" +#ifdef WARPX_DIM_RZ +# include "FiniteDifferenceAlgorithms/CylindricalYeeAlgorithm.H" +#else +# include "FiniteDifferenceAlgorithms/CartesianYeeAlgorithm.H" +# include "FiniteDifferenceAlgorithms/CartesianCKCAlgorithm.H" +# include "FiniteDifferenceAlgorithms/CartesianNodalAlgorithm.H" +#endif +#include "FiniteDifferenceSolver.H" +#include "WarpX.H" + +/* This function initializes the stencil coefficients for the chosen finite-difference algorithm */ +FiniteDifferenceSolver::FiniteDifferenceSolver ( + int const fdtd_algo, + std::array<amrex::Real,3> cell_size, + bool do_nodal ) { + + // Register the type of finite-difference algorithm + m_fdtd_algo = fdtd_algo; + m_do_nodal = do_nodal; + + // Calculate coefficients of finite-difference stencil +#ifdef WARPX_DIM_RZ + m_dr = cell_size[0]; + m_nmodes = WarpX::GetInstance().n_rz_azimuthal_modes; + m_rmin = WarpX::GetInstance().Geom(0).ProbLo(0); + if (fdtd_algo == MaxwellSolverAlgo::Yee) { + + CylindricalYeeAlgorithm::InitializeStencilCoefficients( cell_size, + m_stencil_coefs_r, m_stencil_coefs_z ); +#else + if (do_nodal) { + + CartesianNodalAlgorithm::InitializeStencilCoefficients( cell_size, + m_stencil_coefs_x, m_stencil_coefs_y, m_stencil_coefs_z ); + + } else if (fdtd_algo == MaxwellSolverAlgo::Yee) { + + CartesianYeeAlgorithm::InitializeStencilCoefficients( cell_size, + m_stencil_coefs_x, m_stencil_coefs_y, m_stencil_coefs_z ); + + } else if (fdtd_algo == MaxwellSolverAlgo::CKC) { + + CartesianCKCAlgorithm::InitializeStencilCoefficients( cell_size, + m_stencil_coefs_x, m_stencil_coefs_y, m_stencil_coefs_z ); + +#endif + } else { + amrex::Abort("Unknown algorithm"); + } +}; diff --git a/Source/FieldSolver/FiniteDifferenceSolver/Make.package b/Source/FieldSolver/FiniteDifferenceSolver/Make.package new file mode 100644 index 000000000..289ed98f2 --- /dev/null +++ b/Source/FieldSolver/FiniteDifferenceSolver/Make.package @@ -0,0 +1,6 @@ +CEXE_headers += FiniteDifferenceSolver.H +CEXE_sources += FiniteDifferenceSolver.cpp +CEXE_sources += EvolveB.cpp + +INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/FieldSolver/FiniteDifferenceSolver +VPATH_LOCATIONS += $(WARPX_HOME)/Source/FieldSolver/FiniteDifferenceSolver diff --git a/Source/FieldSolver/Make.package b/Source/FieldSolver/Make.package index 018cfbfba..522c4c07a 100644 --- a/Source/FieldSolver/Make.package +++ b/Source/FieldSolver/Make.package @@ -7,6 +7,7 @@ ifeq ($(USE_PSATD),TRUE) include $(WARPX_HOME)/Source/FieldSolver/PicsarHybridSpectralSolver/Make.package endif endif +include $(WARPX_HOME)/Source/FieldSolver/FiniteDifferenceSolver/Make.package INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/FieldSolver VPATH_LOCATIONS += $(WARPX_HOME)/Source/FieldSolver diff --git a/Source/FieldSolver/WarpXPushFieldsEM.cpp b/Source/FieldSolver/WarpXPushFieldsEM.cpp index 085f34441..3c4b4439c 100644 --- a/Source/FieldSolver/WarpXPushFieldsEM.cpp +++ b/Source/FieldSolver/WarpXPushFieldsEM.cpp @@ -118,129 +118,16 @@ WarpX::EvolveB (int lev, amrex::Real a_dt) void WarpX::EvolveB (int lev, PatchType patch_type, amrex::Real a_dt) { - const int patch_level = (patch_type == PatchType::fine) ? lev : lev-1; - const std::array<Real,3>& dx = WarpX::CellSize(patch_level); - const Real dtsdx = a_dt/dx[0], dtsdy = a_dt/dx[1], dtsdz = a_dt/dx[2]; - const Real dxinv = 1./dx[0]; - MultiFab *Ex, *Ey, *Ez, *Bx, *By, *Bz; - if (patch_type == PatchType::fine) - { - Ex = Efield_fp[lev][0].get(); - Ey = Efield_fp[lev][1].get(); - Ez = Efield_fp[lev][2].get(); - Bx = Bfield_fp[lev][0].get(); - By = Bfield_fp[lev][1].get(); - Bz = Bfield_fp[lev][2].get(); + if (patch_type == PatchType::fine) { + m_fdtd_solver_fp[lev]->EvolveB( Bfield_fp[lev], Efield_fp[lev], a_dt ); + } else { + m_fdtd_solver_cp[lev]->EvolveB( Bfield_cp[lev], Efield_cp[lev], a_dt ); } - else - { - Ex = Efield_cp[lev][0].get(); - Ey = Efield_cp[lev][1].get(); - Ez = Efield_cp[lev][2].get(); - Bx = Bfield_cp[lev][0].get(); - By = Bfield_cp[lev][1].get(); - Bz = Bfield_cp[lev][2].get(); - } - - MultiFab* cost = costs[lev].get(); - const IntVect& rr = (lev > 0) ? refRatio(lev-1) : IntVect::TheUnitVector(); - - // xmin is only used by the kernel for cylindrical geometry, - // in which case it is actually rmin. - const Real xmin = Geom(0).ProbLo(0); - - // Loop through the grids, and over the tiles within each grid -#ifdef _OPENMP -#pragma omp parallel if (Gpu::notInLaunchRegion()) -#endif - for ( MFIter mfi(*Bx, TilingIfNotGPU()); mfi.isValid(); ++mfi ) - { - Real wt = amrex::second(); - - const Box& tbx = mfi.tilebox(Bx_nodal_flag); - const Box& tby = mfi.tilebox(By_nodal_flag); - const Box& tbz = mfi.tilebox(Bz_nodal_flag); - - auto const& Bxfab = Bx->array(mfi); - auto const& Byfab = By->array(mfi); - auto const& Bzfab = Bz->array(mfi); - auto const& Exfab = Ex->array(mfi); - auto const& Eyfab = Ey->array(mfi); - auto const& Ezfab = Ez->array(mfi); - if (do_nodal) { - amrex::ParallelFor(tbx, tby, tbz, - [=] AMREX_GPU_DEVICE (int j, int k, int l) - { - warpx_push_bx_nodal(j,k,l,Bxfab,Eyfab,Ezfab,dtsdy,dtsdz); - }, - [=] AMREX_GPU_DEVICE (int j, int k, int l) - { - warpx_push_by_nodal(j,k,l,Byfab,Exfab,Ezfab,dtsdx,dtsdz); - }, - [=] AMREX_GPU_DEVICE (int j, int k, int l) - { - warpx_push_bz_nodal(j,k,l,Bzfab,Exfab,Eyfab,dtsdx,dtsdy); - }); - } else if (WarpX::maxwell_fdtd_solver_id == 0) { - const long nmodes = n_rz_azimuthal_modes; - amrex::ParallelFor(tbx, tby, tbz, - [=] AMREX_GPU_DEVICE (int j, int k, int l) - { - warpx_push_bx_yee(j,k,l,Bxfab,Eyfab,Ezfab,dtsdx,dtsdy,dtsdz,dxinv,xmin,nmodes); - }, - [=] AMREX_GPU_DEVICE (int j, int k, int l) - { - warpx_push_by_yee(j,k,l,Byfab,Exfab,Ezfab,dtsdx,dtsdz,nmodes); - }, - [=] AMREX_GPU_DEVICE (int j, int k, int l) - { - warpx_push_bz_yee(j,k,l,Bzfab,Exfab,Eyfab,dtsdx,dtsdy,dxinv,xmin,nmodes); - }); - } else if (WarpX::maxwell_fdtd_solver_id == 1) { - Real betaxy, betaxz, betayx, betayz, betazx, betazy; - Real gammax, gammay, gammaz; - Real alphax, alphay, alphaz; - warpx_calculate_ckc_coefficients(dtsdx, dtsdy, dtsdz, - betaxy, betaxz, betayx, betayz, betazx, betazy, - gammax, gammay, gammaz, - alphax, alphay, alphaz); - amrex::ParallelFor(tbx, tby, tbz, - [=] AMREX_GPU_DEVICE (int j, int k, int l) - { - warpx_push_bx_ckc(j,k,l,Bxfab,Eyfab,Ezfab, - betaxy, betaxz, betayx, betayz, betazx, betazy, - gammax, gammay, gammaz, - alphax, alphay, alphaz); - }, - [=] AMREX_GPU_DEVICE (int j, int k, int l) - { - warpx_push_by_ckc(j,k,l,Byfab,Exfab,Ezfab, - betaxy, betaxz, betayx, betayz, betazx, betazy, - gammax, gammay, gammaz, - alphax, alphay, alphaz); - }, - [=] AMREX_GPU_DEVICE (int j, int k, int l) - { - warpx_push_bz_ckc(j,k,l,Bzfab,Exfab,Eyfab, - betaxy, betaxz, betayx, betayz, betazx, betazy, - gammax, gammay, gammaz, - alphax, alphay, alphaz); - }); - } - if (cost) { - Box cbx = mfi.tilebox(IntVect{AMREX_D_DECL(0,0,0)}); - if (patch_type == PatchType::coarse) cbx.refine(rr); - wt = (amrex::second() - wt) / cbx.d_numPts(); - auto costfab = cost->array(mfi); - amrex::ParallelFor(cbx, - [=] AMREX_GPU_DEVICE (int i, int j, int k) - { - costfab(i,j,k) += wt; - }); - } - } + const int patch_level = (patch_type == PatchType::fine) ? lev : lev-1; + const std::array<Real,3>& dx = WarpX::CellSize(patch_level); + const Real dtsdx = a_dt/dx[0], dtsdy = a_dt/dx[1], dtsdz = a_dt/dx[2]; if (do_pml && pml[lev]->ok()) { diff --git a/Source/FieldSolver/WarpX_FDTD.H b/Source/FieldSolver/WarpX_FDTD.H index 4ad251264..8a2202059 100644 --- a/Source/FieldSolver/WarpX_FDTD.H +++ b/Source/FieldSolver/WarpX_FDTD.H @@ -10,105 +10,6 @@ #include <AMReX_FArrayBox.H> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void warpx_push_bx_yee(int i, int j, int k, - amrex::Array4<amrex::Real> const& Bx, - amrex::Array4<amrex::Real const> const& Ey, - amrex::Array4<amrex::Real const> const& Ez, - amrex::Real dtsdx, amrex::Real dtsdy, amrex::Real dtsdz, - amrex::Real dxinv, amrex::Real rmin, const long nmodes) -{ -#if defined WARPX_DIM_3D - Bx(i,j,k) += - dtsdy * (Ez(i,j+1,k ) - Ez(i,j,k)) - + dtsdz * (Ey(i,j ,k+1) - Ey(i,j,k)); -#elif (defined WARPX_DIM_XZ) - Bx(i,j,0) += + dtsdz * (Ey(i,j+1,0) - Ey(i,j,0)); -#elif (defined WARPX_DIM_RZ) - if (i != 0 || rmin != 0.) { - Bx(i,j,0,0) += + dtsdz * (Ey(i,j+1,0,0) - Ey(i,j,0,0)); - } else { - Bx(i,j,0,0) = 0.; - } - for (int imode=1 ; imode < nmodes ; imode++) { - if (i == 0 && rmin == 0) { - if (imode == 1) { - // For the mode m = 1, the bulk equation diverges on axis - // (due to the 1/r terms). The following expressions regularize - // these divergences by assuming, on axis : - // Ez/r = 0/r + dEz/dr - Bx(i,j,0,2*imode-1) = Bx(i,j,0,2*imode-1) - imode*dtsdx*Ez(i+1,j,0,2*imode) - + dtsdz*(Ey(i,j+1,0,2*imode-1) - Ey(i,j,0,2*imode-1)); - Bx(i,j,0,2*imode) = Bx(i,j,0,2*imode) + imode*dtsdx*Ez(i+1,j,0,2*imode-1) - + dtsdz*(Ey(i,j+1,0,2*imode) - Ey(i,j,0,2*imode)); - } else { - Bx(i,j,0,2*imode-1) = 0.; - Bx(i,j,0,2*imode) = 0.; - } - } else { - // Br(i,j,m) = Br(i,j,m) + I*m*dt*Ez(i,j,m)/r + dtsdz*(Et(i,j+1,m) - Et(i,j,m)) - const amrex::Real r = rmin*dxinv + i; - Bx(i,j,0,2*imode-1) = Bx(i,j,0,2*imode-1) - imode*dtsdx*Ez(i,j,0,2*imode)/r - + dtsdz*(Ey(i,j+1,0,2*imode-1) - Ey(i,j,0,2*imode-1)); - Bx(i,j,0,2*imode) = Bx(i,j,0,2*imode) + imode*dtsdx*Ez(i,j,0,2*imode-1)/r - + dtsdz*(Ey(i,j+1,0,2*imode) - Ey(i,j,0,2*imode)); - } - } -#endif -} - -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void warpx_push_by_yee(int i, int j, int k, - amrex::Array4<amrex::Real> const& By, - amrex::Array4<amrex::Real const> const& Ex, - amrex::Array4<amrex::Real const> const& Ez, - amrex::Real dtsdx, amrex::Real dtsdz, - const long nmodes) -{ -#if defined WARPX_DIM_3D - By(i,j,k) += + dtsdx * (Ez(i+1,j,k ) - Ez(i,j,k)) - - dtsdz * (Ex(i ,j,k+1) - Ex(i,j,k)); -#elif (defined WARPX_DIM_XZ) || (defined WARPX_DIM_RZ) - // Note that the 2D Cartesian and RZ mode 0 are the same - By(i,j,0,0) += + dtsdx * (Ez(i+1,j ,0,0) - Ez(i,j,0,0)) - - dtsdz * (Ex(i ,j+1,0,0) - Ex(i,j,0,0)); -#if (defined WARPX_DIM_RZ) - for (int imode=1 ; imode < nmodes ; imode++) { - // Bt(i,j,m) = Bt(i,j,m) + dtsdr*(Ez(i+1,j,m) - Ez(i,j,m)) - dtsdz*(Er(i,j+1,m) - Er(i,j,m)) - By(i,j,0,2*imode-1) += + dtsdx*(Ez(i+1,j ,0,2*imode-1) - Ez(i,j,0,2*imode-1)) - - dtsdz*(Ex(i ,j+1,0,2*imode-1) - Ex(i,j,0,2*imode-1)); - By(i,j,0,2*imode) += + dtsdx*(Ez(i+1,j ,0,2*imode) - Ez(i,j,0,2*imode)) - - dtsdz*(Ex(i ,j+1,0,2*imode) - Ex(i,j,0,2*imode)); - } -#endif -#endif -} - -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void warpx_push_bz_yee(int i, int j, int k, - amrex::Array4<amrex::Real> const& Bz, - amrex::Array4<amrex::Real const> const& Ex, - amrex::Array4<amrex::Real const> const& Ey, - amrex::Real dtsdx, amrex::Real dtsdy, - amrex::Real dxinv, amrex::Real rmin, const long nmodes) -{ -#if defined WARPX_DIM_3D - Bz(i,j,k) += - dtsdx * (Ey(i+1,j ,k) - Ey(i,j,k)) - + dtsdy * (Ex(i ,j+1,k) - Ex(i,j,k)); -#elif defined WARPX_DIM_XZ - Bz(i,j,0) += - dtsdx * (Ey(i+1,j,0) - Ey(i,j,0)); -#elif defined WARPX_DIM_RZ - const amrex::Real r = rmin*dxinv + i + 0.5; - const amrex::Real ru = 1. + 0.5/(rmin*dxinv + i + 0.5); - const amrex::Real rd = 1. - 0.5/(rmin*dxinv + i + 0.5); - Bz(i,j,0,0) += - dtsdx*(ru*Ey(i+1,j,0,0) - rd*Ey(i,j,0,0)); - for (int imode=1 ; imode < nmodes ; imode++) { - // Bz(i,j,m) = Bz(i,j,m) - dtsdr*(ru*Et(i+1,j,m) - rd*Et(i,j,m)) - I*m*dt*Er(i,j,m)/r - Bz(i,j,0,2*imode-1) += - dtsdx*(ru*Ey(i+1,j,0,2*imode-1) - rd*Ey(i,j,0,2*imode-1)) + imode*dtsdx*Ex(i,j,0,2*imode)/r; - Bz(i,j,0,2*imode) += - dtsdx*(ru*Ey(i+1,j,0,2*imode) - rd*Ey(i,j,0,2*imode)) - imode*dtsdx*Ex(i,j,0,2*imode-1)/r; - } -#endif -} - -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void warpx_push_ex_yee(int i, int j, int k, amrex::Array4<amrex::Real> const& Ex, amrex::Array4<amrex::Real const> const& By, @@ -342,117 +243,6 @@ static void warpx_calculate_ckc_coefficients(amrex::Real dtsdx, amrex::Real dtsd } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void warpx_push_bx_ckc(int j, int k, int l, - amrex::Array4<amrex::Real> const& Bx, - amrex::Array4<amrex::Real const> const& Ey, - amrex::Array4<amrex::Real const> const& Ez, - amrex::Real betaxy, amrex::Real betaxz, amrex::Real betayx, - amrex::Real betayz, amrex::Real betazx, amrex::Real betazy, - amrex::Real gammax, amrex::Real gammay, amrex::Real gammaz, - amrex::Real alphax, amrex::Real alphay, amrex::Real alphaz) -{ -#if defined WARPX_DIM_3D - Bx(j,k,l) += - alphay * (Ez(j ,k+1,l ) - Ez(j, k ,l )) - - betayx * (Ez(j+1,k+1,l ) - Ez(j+1,k ,l ) - + Ez(j-1,k+1,l ) - Ez(j-1,k ,l )) - - betayz * (Ez(j ,k+1,l+1) - Ez(j ,k ,l+1) - + Ez(j ,k+1,l-1) - Ez(j ,k ,l-1)) - - gammay * (Ez(j+1,k+1,l+1) - Ez(j+1,k ,l+1) - + Ez(j-1,k+1,l+1) - Ez(j-1,k ,l+1) - + Ez(j+1,k+1,l-1) - Ez(j+1,k ,l-1) - + Ez(j-1,k+1,l-1) - Ez(j-1,k ,l-1)) - + alphaz * (Ey(j ,k ,l+1) - Ey(j, k, l )) - + betazx * (Ey(j+1,k ,l+1) - Ey(j+1,k ,l ) - + Ey(j-1,k ,l+1) - Ey(j-1,k ,l )) - + betazy * (Ey(j ,k+1,l+1) - Ey(j ,k+1,l ) - + Ey(j ,k-1,l+1) - Ey(j ,k-1,l )) - + gammaz * (Ey(j+1,k+1,l+1) - Ey(j+1,k+1,l ) - + Ey(j-1,k+1,l+1) - Ey(j-1,k+1,l ) - + Ey(j+1,k-1,l+1) - Ey(j+1,k-1,l ) - + Ey(j-1,k-1,l+1) - Ey(j-1,k-1,l )); -#elif defined WARPX_DIM_XZ - Bx(j,k,0) += + alphaz * (Ey(j ,k+1,0) - Ey(j, k,0)) - + betazx * (Ey(j+1,k+1,0) - Ey(j+1,k,0) - + Ey(j-1,k+1,0) - Ey(j-1,k,0)); -#endif -} - -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void warpx_push_by_ckc(int j, int k, int l, - amrex::Array4<amrex::Real> const& By, - amrex::Array4<amrex::Real const> const& Ex, - amrex::Array4<amrex::Real const> const& Ez, - amrex::Real betaxy, amrex::Real betaxz, amrex::Real betayx, - amrex::Real betayz, amrex::Real betazx, amrex::Real betazy, - amrex::Real gammax, amrex::Real gammay, amrex::Real gammaz, - amrex::Real alphax, amrex::Real alphay, amrex::Real alphaz) -{ -#if defined WARPX_DIM_3D - By(j,k,l) += + alphax * (Ez(j+1,k ,l ) - Ez(j, k, l )) - + betaxy * (Ez(j+1,k+1,l ) - Ez(j ,k+1,l ) - + Ez(j+1,k-1,l ) - Ez(j ,k-1,l )) - + betaxz * (Ez(j+1,k ,l+1) - Ez(j ,k ,l+1) - + Ez(j+1,k ,l-1) - Ez(j ,k ,l-1)) - + gammax * (Ez(j+1,k+1,l+1) - Ez(j ,k+1,l+1) - + Ez(j+1,k-1,l+1) - Ez(j ,k-1,l+1) - + Ez(j+1,k+1,l-1) - Ez(j ,k+1,l-1) - + Ez(j+1,k-1,l-1) - Ez(j ,k-1,l-1)) - - alphaz * (Ex(j ,k ,l+1) - Ex(j ,k ,l )) - - betazx * (Ex(j+1,k ,l+1) - Ex(j+1,k ,l ) - + Ex(j-1,k ,l+1) - Ex(j-1,k ,l )) - - betazy * (Ex(j ,k+1,l+1) - Ex(j ,k+1,l ) - + Ex(j ,k-1,l+1) - Ex(j ,k-1,l )) - - gammaz * (Ex(j+1,k+1,l+1) - Ex(j+1,k+1,l ) - + Ex(j-1,k+1,l+1) - Ex(j-1,k+1,l ) - + Ex(j+1,k-1,l+1) - Ex(j+1,k-1,l ) - + Ex(j-1,k-1,l+1) - Ex(j-1,k-1,l )); -#elif defined WARPX_DIM_XZ - By(j,k,0) += + alphax * (Ez(j+1,k ,0) - Ez(j,k ,0)) - + betaxz * (Ez(j+1,k+1,0) - Ez(j,k+1,0) - + Ez(j+1,k-1,0) - Ez(j,k-1,0)) - - alphaz * (Ex(j ,k+1,0) - Ex(j,k ,0)) - - betazx * (Ex(j+1,k+1,0) - Ex(j+1,k,0) - + Ex(j-1,k+1,0) - Ex(j-1,k,0)); -#endif -} - -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void warpx_push_bz_ckc(int j, int k, int l, - amrex::Array4<amrex::Real> const& Bz, - amrex::Array4<amrex::Real const> const& Ex, - amrex::Array4<amrex::Real const> const& Ey, - amrex::Real betaxy, amrex::Real betaxz, amrex::Real betayx, - amrex::Real betayz, amrex::Real betazx, amrex::Real betazy, - amrex::Real gammax, amrex::Real gammay, amrex::Real gammaz, - amrex::Real alphax, amrex::Real alphay, amrex::Real alphaz) -{ -#if defined WARPX_DIM_3D - Bz(j,k,l) += - alphax * (Ey(j+1,k ,l ) - Ey(j ,k ,l )) - - betaxy * (Ey(j+1,k+1,l ) - Ey(j ,k+1,l ) - + Ey(j+1,k-1,l ) - Ey(j ,k-1,l )) - - betaxz * (Ey(j+1,k ,l+1) - Ey(j ,k ,l+1) - + Ey(j+1,k ,l-1) - Ey(j ,k ,l-1)) - - gammax * (Ey(j+1,k+1,l+1) - Ey(j ,k+1,l+1) - + Ey(j+1,k-1,l+1) - Ey(j ,k-1,l+1) - + Ey(j+1,k+1,l-1) - Ey(j ,k+1,l-1) - + Ey(j+1,k-1,l-1) - Ey(j ,k-1,l-1)) - + alphay * (Ex(j ,k+1,l ) - Ex(j ,k ,l )) - + betayx * (Ex(j+1,k+1,l ) - Ex(j+1,k ,l ) - + Ex(j-1,k+1,l ) - Ex(j-1,k ,l )) - + betayz * (Ex(j ,k+1,l+1) - Ex(j ,k ,l+1) - + Ex(j ,k+1,l-1) - Ex(j ,k ,l-1)) - + gammay * (Ex(j+1,k+1,l+1) - Ex(j+1,k ,l+1) - + Ex(j-1,k+1,l+1) - Ex(j-1,k ,l+1) - + Ex(j+1,k+1,l-1) - Ex(j+1,k ,l-1) - + Ex(j-1,k+1,l-1) - Ex(j-1,k ,l-1)); -#elif defined WARPX_DIM_XZ - Bz(j,k,0) += - alphax * (Ey(j+1,k ,0) - Ey(j,k ,0)) - - betaxz * (Ey(j+1,k+1,0) - Ey(j,k+1,0) - + Ey(j+1,k-1,0) - Ey(j,k-1,0)); -#endif -} - -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void warpx_push_ex_f_ckc(int j, int k, int l, amrex::Array4<amrex::Real> const& Ex, amrex::Array4<amrex::Real const> const& F, diff --git a/Source/FieldSolver/WarpX_K.H b/Source/FieldSolver/WarpX_K.H index a4f657909..226c0abc6 100644 --- a/Source/FieldSolver/WarpX_K.H +++ b/Source/FieldSolver/WarpX_K.H @@ -10,53 +10,6 @@ #include <AMReX_FArrayBox.H> AMREX_GPU_HOST_DEVICE AMREX_INLINE -void warpx_push_bx_nodal (int j, int k, int l, - amrex::Array4<amrex::Real> const& Bx, - amrex::Array4<amrex::Real const> const& Ey, - amrex::Array4<amrex::Real const> const& Ez, - amrex::Real dtsdy, amrex::Real dtsdz) -{ -#if (AMREX_SPACEDIM == 3) - Bx(j,k,l) = Bx(j,k,l) - 0.5*dtsdy * (Ez(j,k+1,l ) - Ez(j,k-1,l )) - + 0.5*dtsdz * (Ey(j,k ,l+1) - Ey(j,k ,l-1)); -#else - Bx(j,k,0) = Bx(j,k,0) + 0.5*dtsdz * (Ey(j,k+1,0) - Ey(j,k-1,0)); -#endif -} - -AMREX_GPU_HOST_DEVICE AMREX_INLINE -void warpx_push_by_nodal (int j, int k, int l, - amrex::Array4<amrex::Real> const& By, - amrex::Array4<amrex::Real const> const& Ex, - amrex::Array4<amrex::Real const> const& Ez, - amrex::Real dtsdx, - amrex::Real dtsdz) -{ -#if (AMREX_SPACEDIM == 3) - By(j,k,l) = By(j,k,l) + 0.5*dtsdx * (Ez(j+1,k,l ) - Ez(j-1,k,l )) - - 0.5*dtsdz * (Ex(j ,k,l+1) - Ex(j ,k,l-1)); -#else - By(j,k,0) = By(j,k,0) + 0.5*dtsdx * (Ez(j+1,k ,0) - Ez(j-1,k ,0)) - - 0.5*dtsdz * (Ex(j ,k+1,0) - Ex(j ,k-1,0)); -#endif -} - -AMREX_GPU_HOST_DEVICE AMREX_INLINE -void warpx_push_bz_nodal (int j, int k, int l, - amrex::Array4<amrex::Real> const& Bz, - amrex::Array4<amrex::Real const> const& Ex, - amrex::Array4<amrex::Real const> const& Ey, - amrex::Real dtsdx, amrex::Real dtsdy) -{ -#if (AMREX_SPACEDIM == 3) - Bz(j,k,l) = Bz(j,k,l) - 0.5*dtsdx * (Ey(j+1,k ,l) - Ey(j-1,k ,l)) - + 0.5*dtsdy * (Ex(j ,k+1,l) - Ex(j ,k-1,l)); -#else - Bz(j,k,0) = Bz(j,k,0) - 0.5*dtsdx * (Ey(j+1,k ,0) - Ey(j-1,k ,0)); -#endif -} - -AMREX_GPU_HOST_DEVICE AMREX_INLINE void warpx_push_ex_nodal (int j, int k, int l, amrex::Array4<amrex::Real> const& Ex, amrex::Array4<amrex::Real const> const& By, diff --git a/Source/Initialization/InjectorDensity.H b/Source/Initialization/InjectorDensity.H index 4558eeb96..8bb5650ba 100644 --- a/Source/Initialization/InjectorDensity.H +++ b/Source/Initialization/InjectorDensity.H @@ -45,7 +45,7 @@ struct InjectorDensityParser } // InjectorDensityParser constructs this GpuParser from WarpXParser. - GpuParser m_parser; + GpuParser<3> m_parser; }; // struct whose getDensity returns local density computed from predefined profile. @@ -152,8 +152,6 @@ struct InjectorDensity ~InjectorDensity (); - std::size_t sharedMemoryNeeded () const noexcept; - // call getDensity from the object stored in the union // (the union is called Object, and the instance is called object). AMREX_GPU_HOST_DEVICE diff --git a/Source/Initialization/InjectorDensity.cpp b/Source/Initialization/InjectorDensity.cpp index f59202db9..accadc74a 100644 --- a/Source/Initialization/InjectorDensity.cpp +++ b/Source/Initialization/InjectorDensity.cpp @@ -34,23 +34,6 @@ InjectorDensity::~InjectorDensity () } } -// Compute the amount of memory needed in GPU Shared Memory. -std::size_t -InjectorDensity::sharedMemoryNeeded () const noexcept -{ - switch (type) - { - case Type::parser: - { - // For parser injector, the 3D position of each particle - // and time, t, is stored in shared memory. - return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4; - } - default: - return 0; - } -} - InjectorDensityPredefined::InjectorDensityPredefined ( std::string const& a_species_name) noexcept : profile(Profile::null) diff --git a/Source/Initialization/InjectorMomentum.H b/Source/Initialization/InjectorMomentum.H index bb5a70784..1d407508c 100644 --- a/Source/Initialization/InjectorMomentum.H +++ b/Source/Initialization/InjectorMomentum.H @@ -223,7 +223,7 @@ struct InjectorMomentumParser return amrex::XDim3{m_ux_parser(x,y,z),m_uy_parser(x,y,z),m_uz_parser(x,y,z)}; } - GpuParser m_ux_parser, m_uy_parser, m_uz_parser; + GpuParser<3> m_ux_parser, m_uy_parser, m_uz_parser; }; // Base struct for momentum injector. @@ -301,8 +301,6 @@ struct InjectorMomentum ~InjectorMomentum (); - std::size_t sharedMemoryNeeded () const noexcept; - // call getMomentum from the object stored in the union // (the union is called Object, and the instance is called object). AMREX_GPU_HOST_DEVICE diff --git a/Source/Initialization/InjectorMomentum.cpp b/Source/Initialization/InjectorMomentum.cpp index edbba8ac5..0765eb0a3 100644 --- a/Source/Initialization/InjectorMomentum.cpp +++ b/Source/Initialization/InjectorMomentum.cpp @@ -28,21 +28,3 @@ InjectorMomentum::~InjectorMomentum () } } } - -// Compute the amount of memory needed in GPU Shared Memory. -std::size_t -InjectorMomentum::sharedMemoryNeeded () const noexcept -{ - switch (type) - { - case Type::parser: - { - // For parser injector, the 3D position of each particle and time, t, - // is stored in shared memory. - return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4; - } - default: - return 0; - } -} - diff --git a/Source/Initialization/InjectorPosition.H b/Source/Initialization/InjectorPosition.H index a8d2200e9..0ef6c0390 100644 --- a/Source/Initialization/InjectorPosition.H +++ b/Source/Initialization/InjectorPosition.H @@ -105,8 +105,6 @@ struct InjectorPosition void operator= (InjectorPosition const&) = delete; void operator= (InjectorPosition &&) = delete; - std::size_t sharedMemoryNeeded () const noexcept { return 0; } - // call getPositionUnitBox from the object stored in the union // (the union is called Object, and the instance is called object). AMREX_GPU_HOST_DEVICE diff --git a/Source/Initialization/PlasmaInjector.H b/Source/Initialization/PlasmaInjector.H index 70d99b9a3..308b4121e 100644 --- a/Source/Initialization/PlasmaInjector.H +++ b/Source/Initialization/PlasmaInjector.H @@ -83,15 +83,6 @@ public: InjectorDensity* getInjectorDensity (); InjectorMomentum* getInjectorMomentum (); - // When running on GPU, injector for position, momentum and density store - // particle 3D positions in shared memory IF using the parser. - std::size_t - sharedMemoryNeeded () const noexcept { - return amrex::max(inj_pos->sharedMemoryNeeded(), - inj_rho->sharedMemoryNeeded(), - inj_mom->sharedMemoryNeeded()); - } - protected: amrex::Real mass, charge; diff --git a/Source/Initialization/PlasmaInjector.cpp b/Source/Initialization/PlasmaInjector.cpp index cacbaab75..5fa82e48f 100644 --- a/Source/Initialization/PlasmaInjector.cpp +++ b/Source/Initialization/PlasmaInjector.cpp @@ -201,7 +201,7 @@ void PlasmaInjector::parseDensity (ParmParse& pp) Store_parserString(pp, "density_function(x,y,z)", str_density_function); // Construct InjectorDensity with InjectorDensityParser. inj_rho.reset(new InjectorDensity((InjectorDensityParser*)nullptr, - makeParser(str_density_function))); + makeParser(str_density_function,{"x","y","z"}))); } else { StringParseAbortMessage("Density profile type", rho_prof_s); } @@ -324,9 +324,9 @@ void PlasmaInjector::parseMomentum (ParmParse& pp) str_momentum_function_uz); // Construct InjectorMomentum with InjectorMomentumParser. inj_mom.reset(new InjectorMomentum((InjectorMomentumParser*)nullptr, - makeParser(str_momentum_function_ux), - makeParser(str_momentum_function_uy), - makeParser(str_momentum_function_uz))); + makeParser(str_momentum_function_ux,{"x","y","z"}), + makeParser(str_momentum_function_uy,{"x","y","z"}), + makeParser(str_momentum_function_uz,{"x","y","z"}))); } else { StringParseAbortMessage("Momentum distribution type", mom_dist_s); } diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp index 957e22b68..e5571c519 100644 --- a/Source/Initialization/WarpXInitData.cpp +++ b/Source/Initialization/WarpXInitData.cpp @@ -319,12 +319,12 @@ WarpX::InitLevelData (int lev, Real time) Store_parserString(pp, "Bz_external_grid_function(x,y,z)", str_Bz_ext_grid_function); - Bxfield_parser.reset(new ParserWrapper( - makeParser(str_Bx_ext_grid_function))); - Byfield_parser.reset(new ParserWrapper( - makeParser(str_By_ext_grid_function))); - Bzfield_parser.reset(new ParserWrapper( - makeParser(str_Bz_ext_grid_function))); + Bxfield_parser.reset(new ParserWrapper<3>( + makeParser(str_Bx_ext_grid_function,{"x","y","z"}))); + Byfield_parser.reset(new ParserWrapper<3>( + makeParser(str_By_ext_grid_function,{"x","y","z"}))); + Bzfield_parser.reset(new ParserWrapper<3>( + makeParser(str_Bz_ext_grid_function,{"x","y","z"}))); // Initialize Bfield_fp with external function InitializeExternalFieldsOnGridUsingParser(Bfield_fp[lev][0].get(), @@ -371,12 +371,12 @@ WarpX::InitLevelData (int lev, Real time) Store_parserString(pp, "Ez_external_grid_function(x,y,z)", str_Ez_ext_grid_function); - Exfield_parser.reset(new ParserWrapper( - makeParser(str_Ex_ext_grid_function))); - Eyfield_parser.reset(new ParserWrapper( - makeParser(str_Ey_ext_grid_function))); - Ezfield_parser.reset(new ParserWrapper( - makeParser(str_Ez_ext_grid_function))); + Exfield_parser.reset(new ParserWrapper<3>( + makeParser(str_Ex_ext_grid_function,{"x","y","z"}))); + Eyfield_parser.reset(new ParserWrapper<3>( + makeParser(str_Ey_ext_grid_function,{"x","y","z"}))); + Ezfield_parser.reset(new ParserWrapper<3>( + makeParser(str_Ez_ext_grid_function,{"x","y","z"}))); // Initialize Efield_fp with external function InitializeExternalFieldsOnGridUsingParser(Efield_fp[lev][0].get(), @@ -467,8 +467,8 @@ WarpX::InitLevelDataFFT (int lev, Real time) void WarpX::InitializeExternalFieldsOnGridUsingParser ( MultiFab *mfx, MultiFab *mfy, MultiFab *mfz, - ParserWrapper *xfield_parser, ParserWrapper *yfield_parser, - ParserWrapper *zfield_parser, IntVect x_nodal_flag, + ParserWrapper<3> *xfield_parser, ParserWrapper<3> *yfield_parser, + ParserWrapper<3> *zfield_parser, IntVect x_nodal_flag, IntVect y_nodal_flag, IntVect z_nodal_flag, const int lev) { @@ -518,7 +518,7 @@ WarpX::InitializeExternalFieldsOnGridUsingParser ( Real z = k*dx_lev[2] + real_box.lo(2) + fac_z; #endif // Initialize the x-component of the field. - mfxfab(i,j,k) = xfield_parser->getField(x,y,z); + mfxfab(i,j,k) = (*xfield_parser)(x,y,z); }, [=] AMREX_GPU_DEVICE (int i, int j, int k) { Real fac_x = (1.0 - mfy_type[0]) * dx_lev[0]*0.5; @@ -534,7 +534,7 @@ WarpX::InitializeExternalFieldsOnGridUsingParser ( Real z = k*dx_lev[2] + real_box.lo(2) + fac_z; #endif // Initialize the y-component of the field. - mfyfab(i,j,k) = yfield_parser->getField(x,y,z); + mfyfab(i,j,k) = (*yfield_parser)(x,y,z); }, [=] AMREX_GPU_DEVICE (int i, int j, int k) { Real fac_x = (1.0 - mfz_type[0]) * dx_lev[0]*0.5; @@ -550,13 +550,9 @@ WarpX::InitializeExternalFieldsOnGridUsingParser ( Real z = k*dx_lev[2] + real_box.lo(2) + fac_z; #endif // Initialize the z-component of the field. - mfzfab(i,j,k) = zfield_parser->getField(x,y,z); - }, - /* To allocate shared memory for the GPU threads. */ - /* But, for now only 4 doubles (x,y,z,t) are allocated. */ - amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4 + mfzfab(i,j,k) = (*zfield_parser)(x,y,z); + } ); - } } diff --git a/Source/Parser/GpuParser.H b/Source/Parser/GpuParser.H index c6d870800..65db03524 100644 --- a/Source/Parser/GpuParser.H +++ b/Source/Parser/GpuParser.H @@ -10,42 +10,36 @@ #include <WarpXParser.H> #include <AMReX_Gpu.H> +#include <AMReX_Array.H> +#include <AMReX_TypeTraits.H> // When compiled for CPU, wrap WarpXParser and enable threading. // When compiled for GPU, store one copy of the parser in // CUDA managed memory for __device__ code, and one copy of the parser // in CUDA managed memory for __host__ code. This way, the parser can be // efficiently called from both host and device. +template <int N> class GpuParser { public: GpuParser (WarpXParser const& wp); void clear (); + template <typename... Ts> AMREX_GPU_HOST_DEVICE - amrex::Real - operator() (amrex::Real x, amrex::Real y, amrex::Real z, amrex::Real t=0.0) const noexcept + std::enable_if_t<sizeof...(Ts) == N + and amrex::Same<amrex::Real,Ts...>::value, + amrex::Real> + operator() (Ts... var) const noexcept { #ifdef AMREX_USE_GPU - -#ifdef AMREX_DEVICE_COMPILE + amrex::GpuArray<amrex::Real,N> l_var{var...}; +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) // WarpX compiled for GPU, function compiled for __device__ - // the 3D position of each particle is stored in shared memory. - amrex::Gpu::SharedMemory<amrex::Real> gsm; - amrex::Real* p = gsm.dataPtr(); - int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y); - p[tid*4] = x; - p[tid*4+1] = y; - p[tid*4+2] = z; - p[tid*4+3] = t; - return wp_ast_eval(m_gpu_parser.ast); + return wp_ast_eval(m_gpu_parser.ast, l_var.data()); #else // WarpX compiled for GPU, function compiled for __host__ - m_var.x = x; - m_var.y = y; - m_var.z = z; - m_t = t; - return wp_ast_eval(m_cpu_parser.ast); + return wp_ast_eval(m_cpu_parser->ast, nullptr); #endif #else @@ -55,11 +49,8 @@ public: #else int tid = 0; #endif - m_var[tid].x = x; - m_var[tid].y = y; - m_var[tid].z = z; - m_t[tid] = t; - return wp_ast_eval(m_parser[tid]->ast); + m_var[tid] = amrex::GpuArray<amrex::Real,N>{var...}; + return wp_ast_eval(m_parser[tid]->ast, nullptr); #endif } @@ -70,16 +61,85 @@ private: // Copy of the parser running on __device__ struct wp_parser m_gpu_parser; // Copy of the parser running on __host__ - struct wp_parser m_cpu_parser; - mutable amrex::XDim3 m_var; - mutable amrex::Real m_t; + struct wp_parser* m_cpu_parser; + mutable amrex::GpuArray<amrex::Real,N> m_var; #else // Only one parser struct wp_parser** m_parser; - mutable amrex::XDim3* m_var; - mutable amrex::Real* m_t; + mutable amrex::GpuArray<amrex::Real,N>* m_var; int nthreads; #endif }; +template <int N> +GpuParser<N>::GpuParser (WarpXParser const& wp) +{ +#ifdef AMREX_USE_GPU + + struct wp_parser* a_wp = wp.m_parser; + // Initialize GPU parser: allocate memory in CUDA managed memory, + // copy all data needed on GPU to m_gpu_parser + m_gpu_parser.sz_mempool = wp_ast_size(a_wp->ast); + m_gpu_parser.p_root = (struct wp_node*) + amrex::The_Managed_Arena()->alloc(m_gpu_parser.sz_mempool); + m_gpu_parser.p_free = m_gpu_parser.p_root; + // 0: don't free the source + m_gpu_parser.ast = wp_parser_ast_dup(&m_gpu_parser, a_wp->ast, 0); + for (int i = 0; i < N; ++i) { + wp_parser_regvar_gpu(&m_gpu_parser, wp.m_varnames[i].c_str(), i); + } + + // Initialize CPU parser: + m_cpu_parser = wp_parser_dup(a_wp); + for (int i = 0; i < N; ++i) { + wp_parser_regvar(m_cpu_parser, wp.m_varnames[i].c_str(), &m_var[i]); + } + +#else // not defined AMREX_USE_GPU + +#ifdef _OPENMP + nthreads = omp_get_max_threads(); +#else // _OPENMP + nthreads = 1; +#endif // _OPENMP + + m_parser = ::new struct wp_parser*[nthreads]; + m_var = ::new amrex::GpuArray<amrex::Real,N>[nthreads]; + + for (int tid = 0; tid < nthreads; ++tid) + { +#ifdef _OPENMP + m_parser[tid] = wp_parser_dup(wp.m_parser[tid]); + for (int i = 0; i < N; ++i) { + wp_parser_regvar(m_parser[tid], wp.m_varnames[tid][i].c_str(), &(m_var[tid][i])); + } +#else // _OPENMP + m_parser[tid] = wp_parser_dup(wp.m_parser); + for (int i = 0; i < N; ++i) { + wp_parser_regvar(m_parser[tid], wp.m_varnames[i].c_str(), &(m_var[tid][i])); + } +#endif // _OPENMP + } + +#endif // AMREX_USE_GPU +} + + +template <int N> +void +GpuParser<N>::clear () +{ +#ifdef AMREX_USE_GPU + amrex::The_Managed_Arena()->free(m_gpu_parser.ast); + wp_parser_delete(m_cpu_parser); +#else + for (int tid = 0; tid < nthreads; ++tid) + { + wp_parser_delete(m_parser[tid]); + } + ::delete[] m_parser; + ::delete[] m_var; +#endif +} + #endif diff --git a/Source/Parser/GpuParser.cpp b/Source/Parser/GpuParser.cpp deleted file mode 100644 index 22fab6313..000000000 --- a/Source/Parser/GpuParser.cpp +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright 2019-2020 Maxence Thevenet, Revathi Jambunathan, Weiqun Zhang - * - * - * This file is part of WarpX. - * - * License: BSD-3-Clause-LBNL - */ -#include <GpuParser.H> - -GpuParser::GpuParser (WarpXParser const& wp) -{ -#ifdef AMREX_USE_GPU - - struct wp_parser* a_wp = wp.m_parser; - // Initialize GPU parser: allocate memory in CUDA managed memory, - // copy all data needed on GPU to m_gpu_parser - m_gpu_parser.sz_mempool = wp_ast_size(a_wp->ast); - m_gpu_parser.p_root = (struct wp_node*) - amrex::The_Managed_Arena()->alloc(m_gpu_parser.sz_mempool); - m_gpu_parser.p_free = m_gpu_parser.p_root; - // 0: don't free the source - m_gpu_parser.ast = wp_parser_ast_dup(&m_gpu_parser, a_wp->ast, 0); - wp_parser_regvar_gpu(&m_gpu_parser, "x", 0); - wp_parser_regvar_gpu(&m_gpu_parser, "y", 1); - wp_parser_regvar_gpu(&m_gpu_parser, "z", 2); - wp_parser_regvar_gpu(&m_gpu_parser, "t", 3); - - // Initialize CPU parser: allocate memory in CUDA managed memory, - // copy all data needed on CPU to m_cpu_parser - m_cpu_parser.sz_mempool = wp_ast_size(a_wp->ast); - m_cpu_parser.p_root = (struct wp_node*) - amrex::The_Managed_Arena()->alloc(m_cpu_parser.sz_mempool); - m_cpu_parser.p_free = m_cpu_parser.p_root; - // 0: don't free the source - m_cpu_parser.ast = wp_parser_ast_dup(&m_cpu_parser, a_wp->ast, 0); - wp_parser_regvar(&m_cpu_parser, "x", &(m_var.x)); - wp_parser_regvar(&m_cpu_parser, "y", &(m_var.y)); - wp_parser_regvar(&m_cpu_parser, "z", &(m_var.z)); - wp_parser_regvar(&m_cpu_parser, "t", &(m_t)); - -#else // not defined AMREX_USE_GPU - -#ifdef _OPENMP - nthreads = omp_get_max_threads(); -#else // _OPENMP - nthreads = 1; -#endif // _OPENMP - - m_parser = ::new struct wp_parser*[nthreads]; - m_var = ::new amrex::XDim3[nthreads]; - m_t = ::new amrex::Real[nthreads]; - - for (int tid = 0; tid < nthreads; ++tid) - { -#ifdef _OPENMP - m_parser[tid] = wp_parser_dup(wp.m_parser[tid]); -#else // _OPENMP - m_parser[tid] = wp_parser_dup(wp.m_parser); -#endif // _OPENMP - wp_parser_regvar(m_parser[tid], "x", &(m_var[tid].x)); - wp_parser_regvar(m_parser[tid], "y", &(m_var[tid].y)); - wp_parser_regvar(m_parser[tid], "z", &(m_var[tid].z)); - wp_parser_regvar(m_parser[tid], "t", &(m_t[tid])); - } - -#endif // AMREX_USE_GPU -} - -void -GpuParser::clear () -{ -#ifdef AMREX_USE_GPU - amrex::The_Managed_Arena()->free(m_gpu_parser.ast); - amrex::The_Managed_Arena()->free(m_cpu_parser.ast); -#else - for (int tid = 0; tid < nthreads; ++tid) - { - wp_parser_delete(m_parser[tid]); - } - ::delete[] m_parser; - ::delete[] m_var; -#endif -} - diff --git a/Source/Parser/Make.package b/Source/Parser/Make.package index 15115c138..be07e3a7d 100644 --- a/Source/Parser/Make.package +++ b/Source/Parser/Make.package @@ -4,7 +4,6 @@ cEXE_headers += wp_parser_y.h wp_parser.tab.h wp_parser.lex.h wp_parser_c.h CEXE_sources += WarpXParser.cpp CEXE_headers += WarpXParser.H CEXE_headers += GpuParser.H -CEXE_sources += GpuParser.cpp CEXE_headers += WarpXParserWrapper.H INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Parser diff --git a/Source/Parser/WarpXParser.H b/Source/Parser/WarpXParser.H index 863b35fb8..703b1effc 100644 --- a/Source/Parser/WarpXParser.H +++ b/Source/Parser/WarpXParser.H @@ -21,7 +21,7 @@ #include <omp.h> #endif -class GpuParser; +template <int N> class GpuParser; class WarpXParser { @@ -56,7 +56,7 @@ public: std::set<std::string> symbols () const; - friend class GpuParser; + template <int N> friend class GpuParser; private: void clear (); @@ -71,9 +71,11 @@ private: #ifdef _OPENMP std::vector<struct wp_parser*> m_parser; mutable std::vector<std::array<amrex::Real,16> > m_variables; + mutable std::vector<std::vector<std::string> > m_varnames; #else struct wp_parser* m_parser = nullptr; mutable std::array<amrex::Real,16> m_variables; + mutable std::vector<std::string> m_varnames; #endif }; @@ -82,9 +84,9 @@ amrex::Real WarpXParser::eval () const noexcept { #ifdef _OPENMP - return wp_ast_eval(m_parser[omp_get_thread_num()]->ast); + return wp_ast_eval(m_parser[omp_get_thread_num()]->ast,nullptr); #else - return wp_ast_eval(m_parser->ast); + return wp_ast_eval(m_parser->ast,nullptr); #endif } diff --git a/Source/Parser/WarpXParser.cpp b/Source/Parser/WarpXParser.cpp index 8c8be7ecb..dd000792b 100644 --- a/Source/Parser/WarpXParser.cpp +++ b/Source/Parser/WarpXParser.cpp @@ -27,6 +27,7 @@ WarpXParser::define (std::string const& func_body) int nthreads = omp_get_max_threads(); m_variables.resize(nthreads); + m_varnames.resize(nthreads); m_parser.resize(nthreads); m_parser[0] = wp_c_parser_new(f.c_str()); #pragma omp parallel @@ -53,6 +54,7 @@ void WarpXParser::clear () { m_expression.clear(); + m_varnames.clear(); #ifdef _OPENMP @@ -80,8 +82,10 @@ WarpXParser::registerVariable (std::string const& name, amrex::Real& var) // We assume this is called inside OMP parallel region #ifdef _OPENMP wp_parser_regvar(m_parser[omp_get_thread_num()], name.c_str(), &var); + m_varnames[omp_get_thread_num()].push_back(name); #else wp_parser_regvar(m_parser, name.c_str(), &var); + m_varnames.push_back(name); #endif } @@ -98,6 +102,7 @@ WarpXParser::registerVariables (std::vector<std::string> const& names) auto& v = m_variables[tid]; for (int j = 0; j < names.size(); ++j) { wp_parser_regvar(p, names[j].c_str(), &(v[j])); + m_varnames[tid].push_back(names[j]); } } @@ -105,6 +110,7 @@ WarpXParser::registerVariables (std::vector<std::string> const& names) for (int j = 0; j < names.size(); ++j) { wp_parser_regvar(m_parser, names[j].c_str(), &(m_variables[j])); + m_varnames.push_back(names[j]); } #endif diff --git a/Source/Parser/WarpXParserWrapper.H b/Source/Parser/WarpXParserWrapper.H index 2c76d97a3..38147aba5 100644 --- a/Source/Parser/WarpXParserWrapper.H +++ b/Source/Parser/WarpXParserWrapper.H @@ -18,24 +18,16 @@ * in a safe way. The ParserWrapper struct is used to avoid memory leak * in the EB parser functions. */ +template <int N> struct ParserWrapper - : public amrex::Gpu::Managed + : public amrex::Gpu::Managed, public GpuParser<N> { - ParserWrapper (WarpXParser const& a_parser) noexcept - : m_parser(a_parser) {} + using GpuParser<N>::GpuParser; - ~ParserWrapper() { - m_parser.clear(); - } + ParserWrapper (ParserWrapper<N> const&) = delete; + void operator= (ParserWrapper<N> const&) = delete; - AMREX_GPU_HOST_DEVICE - amrex::Real - getField (amrex::Real x, amrex::Real y, amrex::Real z, amrex::Real t=0.0) const noexcept - { - return m_parser(x,y,z,t); - } - - GpuParser m_parser; + ~ParserWrapper() { GpuParser<N>::clear(); } }; #endif diff --git a/Source/Parser/wp_parser_c.h b/Source/Parser/wp_parser_c.h index 2cf0e2c00..c9c0d82ac 100644 --- a/Source/Parser/wp_parser_c.h +++ b/Source/Parser/wp_parser_c.h @@ -23,16 +23,10 @@ extern "C" { AMREX_GPU_HOST_DEVICE inline amrex_real -wp_ast_eval (struct wp_node* node) +wp_ast_eval (struct wp_node* node, amrex_real const* x) { amrex_real result; -#ifdef AMREX_DEVICE_COMPILE - extern __shared__ amrex_real extern_xyz[]; - int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y); - amrex_real* x = extern_xyz + tid*4; // parser assumes 4 independent variables (x,y,z,t) -#endif - switch (node->type) { case WP_NUMBER: @@ -42,7 +36,7 @@ wp_ast_eval (struct wp_node* node) } case WP_SYMBOL: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i =((struct wp_symbol*)node)->ip.i; result = x[i]; #else @@ -52,45 +46,45 @@ wp_ast_eval (struct wp_node* node) } case WP_ADD: { - result = wp_ast_eval(node->l) + wp_ast_eval(node->r); + result = wp_ast_eval(node->l,x) + wp_ast_eval(node->r,x); break; } case WP_SUB: { - result = wp_ast_eval(node->l) - wp_ast_eval(node->r); + result = wp_ast_eval(node->l,x) - wp_ast_eval(node->r,x); break; } case WP_MUL: { - result = wp_ast_eval(node->l) * wp_ast_eval(node->r); + result = wp_ast_eval(node->l,x) * wp_ast_eval(node->r,x); break; } case WP_DIV: { - result = wp_ast_eval(node->l) / wp_ast_eval(node->r); + result = wp_ast_eval(node->l,x) / wp_ast_eval(node->r,x); break; } case WP_NEG: { - result = -wp_ast_eval(node->l); + result = -wp_ast_eval(node->l,x); break; } case WP_F1: { result = wp_call_f1(((struct wp_f1*)node)->ftype, - wp_ast_eval(((struct wp_f1*)node)->l)); + wp_ast_eval(((struct wp_f1*)node)->l,x)); break; } case WP_F2: { result = wp_call_f2(((struct wp_f2*)node)->ftype, - wp_ast_eval(((struct wp_f2*)node)->l), - wp_ast_eval(((struct wp_f2*)node)->r)); + wp_ast_eval(((struct wp_f2*)node)->l,x), + wp_ast_eval(((struct wp_f2*)node)->r,x)); break; } case WP_ADD_VP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->rip.i; result = node->lvp.v + x[i]; #else @@ -100,7 +94,7 @@ wp_ast_eval (struct wp_node* node) } case WP_ADD_PP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->lvp.ip.i; int j = node->rip.i; result = x[i] + x[j]; @@ -111,7 +105,7 @@ wp_ast_eval (struct wp_node* node) } case WP_SUB_VP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->rip.i; result = node->lvp.v - x[i]; #else @@ -121,7 +115,7 @@ wp_ast_eval (struct wp_node* node) } case WP_SUB_PP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->lvp.ip.i; int j = node->rip.i; result = x[i] - x[j]; @@ -132,7 +126,7 @@ wp_ast_eval (struct wp_node* node) } case WP_MUL_VP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->rip.i; result = node->lvp.v * x[i]; #else @@ -142,7 +136,7 @@ wp_ast_eval (struct wp_node* node) } case WP_MUL_PP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->lvp.ip.i; int j = node->rip.i; result = x[i] * x[j]; @@ -153,7 +147,7 @@ wp_ast_eval (struct wp_node* node) } case WP_DIV_VP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->rip.i; result = node->lvp.v / x[i]; #else @@ -163,7 +157,7 @@ wp_ast_eval (struct wp_node* node) } case WP_DIV_PP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->lvp.ip.i; int j = node->rip.i; result = x[i] / x[j]; @@ -174,7 +168,7 @@ wp_ast_eval (struct wp_node* node) } case WP_NEG_P: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->rip.i; result = -x[i]; #else diff --git a/Source/Parser/wp_parser_y.c b/Source/Parser/wp_parser_y.c index b71b42638..57293ab87 100644 --- a/Source/Parser/wp_parser_y.c +++ b/Source/Parser/wp_parser_y.c @@ -80,7 +80,7 @@ yyerror (char const *s, ...) { va_list vl; va_start(vl, s); -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) printf(s,"\n"); assert(0); #else diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package index 3bd4829a0..7ec1eb73a 100644 --- a/Source/Particles/Make.package +++ b/Source/Particles/Make.package @@ -1,5 +1,7 @@ -F90EXE_sources += interpolate_cic.F90 -F90EXE_sources += push_particles_ES.F90 +ifeq ($(DO_ELECTROSTATIC),TRUE) + F90EXE_sources += interpolate_cic.F90 + F90EXE_sources += push_particles_ES.F90 +endif CEXE_sources += MultiParticleContainer.cpp CEXE_sources += WarpXParticleContainer.cpp diff --git a/Source/Particles/MultiParticleContainer.H b/Source/Particles/MultiParticleContainer.H index 8d951263e..5f809f2cc 100644 --- a/Source/Particles/MultiParticleContainer.H +++ b/Source/Particles/MultiParticleContainer.H @@ -223,13 +223,13 @@ public: amrex::Vector<amrex::Real> m_B_external_particle; amrex::Vector<amrex::Real> m_E_external_particle; // ParserWrapper for B_external on the particle - std::unique_ptr<ParserWrapper> m_Bx_particle_parser; - std::unique_ptr<ParserWrapper> m_By_particle_parser; - std::unique_ptr<ParserWrapper> m_Bz_particle_parser; + std::unique_ptr<ParserWrapper<4> > m_Bx_particle_parser; + std::unique_ptr<ParserWrapper<4> > m_By_particle_parser; + std::unique_ptr<ParserWrapper<4> > m_Bz_particle_parser; // ParserWrapper for E_external on the particle - std::unique_ptr<ParserWrapper> m_Ex_particle_parser; - std::unique_ptr<ParserWrapper> m_Ey_particle_parser; - std::unique_ptr<ParserWrapper> m_Ez_particle_parser; + std::unique_ptr<ParserWrapper<4> > m_Ex_particle_parser; + std::unique_ptr<ParserWrapper<4> > m_Ey_particle_parser; + std::unique_ptr<ParserWrapper<4> > m_Ez_particle_parser; protected: diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp index ac3bac467..6252d1ac4 100644 --- a/Source/Particles/MultiParticleContainer.cpp +++ b/Source/Particles/MultiParticleContainer.cpp @@ -130,12 +130,12 @@ MultiParticleContainer::ReadParameters () str_Bz_ext_particle_function); // Parser for B_external on the particle - m_Bx_particle_parser.reset(new ParserWrapper( - makeParser(str_Bx_ext_particle_function))); - m_By_particle_parser.reset(new ParserWrapper( - makeParser(str_By_ext_particle_function))); - m_Bz_particle_parser.reset(new ParserWrapper( - makeParser(str_Bz_ext_particle_function))); + m_Bx_particle_parser.reset(new ParserWrapper<4>( + makeParser(str_Bx_ext_particle_function,{"x","y","z","t"}))); + m_By_particle_parser.reset(new ParserWrapper<4>( + makeParser(str_By_ext_particle_function,{"x","y","z","t"}))); + m_Bz_particle_parser.reset(new ParserWrapper<4>( + makeParser(str_Bz_ext_particle_function,{"x","y","z","t"}))); } @@ -155,12 +155,12 @@ MultiParticleContainer::ReadParameters () Store_parserString(pp, "Ez_external_particle_function(x,y,z,t)", str_Ez_ext_particle_function); // Parser for E_external on the particle - m_Ex_particle_parser.reset(new ParserWrapper( - makeParser(str_Ex_ext_particle_function))); - m_Ey_particle_parser.reset(new ParserWrapper( - makeParser(str_Ey_ext_particle_function))); - m_Ez_particle_parser.reset(new ParserWrapper( - makeParser(str_Ez_ext_particle_function))); + m_Ex_particle_parser.reset(new ParserWrapper<4>( + makeParser(str_Ex_ext_particle_function,{"x","y","z","t"}))); + m_Ey_particle_parser.reset(new ParserWrapper<4>( + makeParser(str_Ey_ext_particle_function,{"x","y","z","t"}))); + m_Ez_particle_parser.reset(new ParserWrapper<4>( + makeParser(str_Ez_ext_particle_function,{"x","y","z","t"}))); } diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 6572657ff..0277a9fe6 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -553,7 +553,6 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) overlap_realbox.lo(1), overlap_realbox.lo(2))}; - std::size_t shared_mem_bytes = plasma_injector->sharedMemoryNeeded(); int lrrfac = rrfac; bool loc_do_field_ionization = do_field_ionization; @@ -738,7 +737,7 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) p.pos(0) = xb; p.pos(1) = z; #endif - }, shared_mem_bytes); + }); if (cost) { wt = (amrex::second() - wt) / tile_box.d_numPts(); @@ -992,44 +991,36 @@ PhysicalParticleContainer::AssignExternalFieldOnParticles(WarpXParIter& pti, Real* const AMREX_RESTRICT Exp_data = Exp.dataPtr(); Real* const AMREX_RESTRICT Eyp_data = Eyp.dataPtr(); Real* const AMREX_RESTRICT Ezp_data = Ezp.dataPtr(); - ParserWrapper *xfield_partparser = mypc.m_Ex_particle_parser.get(); - ParserWrapper *yfield_partparser = mypc.m_Ey_particle_parser.get(); - ParserWrapper *zfield_partparser = mypc.m_Ez_particle_parser.get(); + ParserWrapper<4> *xfield_partparser = mypc.m_Ex_particle_parser.get(); + ParserWrapper<4> *yfield_partparser = mypc.m_Ey_particle_parser.get(); + ParserWrapper<4> *zfield_partparser = mypc.m_Ez_particle_parser.get(); Real time = warpx.gett_new(lev); amrex::ParallelFor(pti.numParticles(), [=] AMREX_GPU_DEVICE (long i) { ParticleReal x, y, z; GetPosition(i, x, y, z); - Exp_data[i] = xfield_partparser->getField(x, y, z, time); - Eyp_data[i] = yfield_partparser->getField(x, y, z, time); - Ezp_data[i] = zfield_partparser->getField(x, y, z, time); - }, - /* To allocate shared memory for the GPU threads. */ - /* But, for now only 4 doubles (x,y,z,t) are allocated. */ - amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4 - ); + Exp_data[i] = (*xfield_partparser)(x, y, z, time); + Eyp_data[i] = (*yfield_partparser)(x, y, z, time); + Ezp_data[i] = (*zfield_partparser)(x, y, z, time); + }); } if (mypc.m_B_ext_particle_s=="parse_b_ext_particle_function") { const auto GetPosition = GetParticlePosition(pti); Real* const AMREX_RESTRICT Bxp_data = Bxp.dataPtr(); Real* const AMREX_RESTRICT Byp_data = Byp.dataPtr(); Real* const AMREX_RESTRICT Bzp_data = Bzp.dataPtr(); - ParserWrapper *xfield_partparser = mypc.m_Bx_particle_parser.get(); - ParserWrapper *yfield_partparser = mypc.m_By_particle_parser.get(); - ParserWrapper *zfield_partparser = mypc.m_Bz_particle_parser.get(); + ParserWrapper<4> *xfield_partparser = mypc.m_Bx_particle_parser.get(); + ParserWrapper<4> *yfield_partparser = mypc.m_By_particle_parser.get(); + ParserWrapper<4> *zfield_partparser = mypc.m_Bz_particle_parser.get(); Real time = warpx.gett_new(lev); amrex::ParallelFor(pti.numParticles(), [=] AMREX_GPU_DEVICE (long i) { ParticleReal x, y, z; GetPosition(i, x, y, z); - Bxp_data[i] = xfield_partparser->getField(x, y, z, time); - Byp_data[i] = yfield_partparser->getField(x, y, z, time); - Bzp_data[i] = zfield_partparser->getField(x, y, z, time); - }, - /* To allocate shared memory for the GPU threads. */ - /* But, for now only 4 doubles (x,y,z,t) are allocated. */ - amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4 - ); + Bxp_data[i] = (*xfield_partparser)(x, y, z, time); + Byp_data[i] = (*yfield_partparser)(x, y, z, time); + Bzp_data[i] = (*zfield_partparser)(x, y, z, time); + }); } } diff --git a/Source/Utils/Make.package b/Source/Utils/Make.package index f223c18b8..7e814ba89 100644 --- a/Source/Utils/Make.package +++ b/Source/Utils/Make.package @@ -1,4 +1,6 @@ -F90EXE_sources += utils_ES.F90 +ifeq ($(DO_ELECTROSTATIC),TRUE) + F90EXE_sources += utils_ES.F90 +endif CEXE_headers += WarpX_Complex.H CEXE_sources += WarpXMovingWindow.cpp CEXE_sources += WarpXTagging.cpp diff --git a/Source/Utils/WarpXMovingWindow.cpp b/Source/Utils/WarpXMovingWindow.cpp index f6cd6de20..a94ffede9 100644 --- a/Source/Utils/WarpXMovingWindow.cpp +++ b/Source/Utils/WarpXMovingWindow.cpp @@ -111,8 +111,8 @@ WarpX::MoveWindow (bool move_j) // Shift each component of vector fields (E, B, j) for (int dim = 0; dim < 3; ++dim) { // Fine grid - ParserWrapper *Bfield_parser; - ParserWrapper *Efield_parser; + ParserWrapper<3> *Bfield_parser; + ParserWrapper<3> *Efield_parser; bool use_Bparser = false; bool use_Eparser = false; if (B_ext_grid_s == "parse_b_ext_grid_function") { @@ -233,7 +233,7 @@ WarpX::MoveWindow (bool move_j) void WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir, IntVect ng_extra, amrex::Real external_field, bool useparser, - ParserWrapper *field_parser) + ParserWrapper<3> *field_parser) { BL_PROFILE("WarpX::shiftMF()"); const BoxArray& ba = mf.boxArray(); @@ -329,10 +329,8 @@ WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir, Real fac_z = (1.0 - mf_type[2]) * dx[2]*0.5; Real z = k*dx[2] + real_box.lo(2) + fac_z; #endif - srcfab(i,j,k,n) = field_parser->getField(x,y,z); - } - , amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double)*4 - ); + srcfab(i,j,k,n) = (*field_parser)(x,y,z); + }); } } diff --git a/Source/Utils/WarpXUtil.H b/Source/Utils/WarpXUtil.H index 9231fa60a..cfc1b2440 100644 --- a/Source/Utils/WarpXUtil.H +++ b/Source/Utils/WarpXUtil.H @@ -126,6 +126,6 @@ T trilinear_interp(T x0, T x1,T y0, T y1, T z0, T z1, * * \param parse_function String to read to initialize the parser. */ -WarpXParser makeParser (std::string const& parse_function); +WarpXParser makeParser (std::string const& parse_function, std::vector<std::string> const& varnames); #endif //WARPX_UTILS_H_ diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp index 983654aed..65aa0edb2 100644 --- a/Source/Utils/WarpXUtil.cpp +++ b/Source/Utils/WarpXUtil.cpp @@ -190,16 +190,13 @@ void Store_parserString(amrex::ParmParse& pp, std::string query_string, } -WarpXParser makeParser (std::string const& parse_function) +WarpXParser makeParser (std::string const& parse_function, std::vector<std::string> const& varnames) { WarpXParser parser(parse_function); - parser.registerVariables({"x","y","z","t"}); + parser.registerVariables(varnames); ParmParse pp("my_constants"); std::set<std::string> symbols = parser.symbols(); - symbols.erase("x"); - symbols.erase("y"); - symbols.erase("z"); - symbols.erase("t"); + for (auto const& v : varnames) symbols.erase(v.c_str()); for (auto it = symbols.begin(); it != symbols.end(); ) { Real v; if (pp.query(it->c_str(), v)) { diff --git a/Source/WarpX.H b/Source/WarpX.H index 5ffd9f15b..04ca309e0 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -20,6 +20,7 @@ #include <NCIGodfreyFilter.H> #include "MultiReducedDiags.H" +#include <FiniteDifferenceSolver.H> #ifdef WARPX_USE_PSATD # include <SpectralSolver.H> #endif @@ -91,7 +92,7 @@ public: static void shiftMF (amrex::MultiFab& mf, const amrex::Geometry& geom, int num_shift, int dir, amrex::IntVect ng_extra, amrex::Real external_field=0.0, bool useparser = false, - ParserWrapper *field_parser=nullptr); + ParserWrapper<3> *field_parser=nullptr); static void GotoNextLine (std::istream& is); @@ -116,13 +117,13 @@ public: static std::string str_Ez_ext_grid_function; // ParserWrapper for B_external on the grid - std::unique_ptr<ParserWrapper> Bxfield_parser; - std::unique_ptr<ParserWrapper> Byfield_parser; - std::unique_ptr<ParserWrapper> Bzfield_parser; + std::unique_ptr<ParserWrapper<3> > Bxfield_parser; + std::unique_ptr<ParserWrapper<3> > Byfield_parser; + std::unique_ptr<ParserWrapper<3> > Bzfield_parser; // ParserWrapper for E_external on the grid - std::unique_ptr<ParserWrapper> Exfield_parser; - std::unique_ptr<ParserWrapper> Eyfield_parser; - std::unique_ptr<ParserWrapper> Ezfield_parser; + std::unique_ptr<ParserWrapper<3> > Exfield_parser; + std::unique_ptr<ParserWrapper<3> > Eyfield_parser; + std::unique_ptr<ParserWrapper<3> > Ezfield_parser; // Algorithms static long current_deposition_algo; @@ -410,8 +411,8 @@ public: */ void InitializeExternalFieldsOnGridUsingParser ( amrex::MultiFab *mfx, amrex::MultiFab *mfy, amrex::MultiFab *mfz, - ParserWrapper *xfield_parser, ParserWrapper *yfield_parser, - ParserWrapper *zfield_parser, amrex::IntVect x_nodal_flag, + ParserWrapper<3> *xfield_parser, ParserWrapper<3> *yfield_parser, + ParserWrapper<3> *zfield_parser, amrex::IntVect x_nodal_flag, amrex::IntVect y_nodal_flag, amrex::IntVect z_nodal_flag, const int lev); @@ -748,6 +749,8 @@ private: amrex::Vector<std::unique_ptr<SpectralSolver>> spectral_solver_fp; amrex::Vector<std::unique_ptr<SpectralSolver>> spectral_solver_cp; #endif + amrex::Vector<std::unique_ptr<FiniteDifferenceSolver>> m_fdtd_solver_fp; + amrex::Vector<std::unique_ptr<FiniteDifferenceSolver>> m_fdtd_solver_cp; #ifdef WARPX_USE_PSATD_HYBRID private: diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index bcfab9a69..025a41941 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -237,10 +237,13 @@ WarpX::WarpX () costs.resize(nlevs_max); + // Allocate field solver objects #ifdef WARPX_USE_PSATD spectral_solver_fp.resize(nlevs_max); spectral_solver_cp.resize(nlevs_max); #endif + m_fdtd_solver_fp.resize(nlevs_max); + m_fdtd_solver_cp.resize(nlevs_max); #ifdef WARPX_USE_PSATD_HYBRID Efield_fp_fft.resize(nlevs_max); Bfield_fp_fft.resize(nlevs_max); @@ -867,7 +870,9 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm nox_fft, noy_fft, noz_fft, do_nodal, dx_vect, dt[lev] ) ); } #endif - + std::array<Real,3> const dx = CellSize(lev); + m_fdtd_solver_fp[lev].reset( + new FiniteDifferenceSolver(maxwell_fdtd_solver_id, dx, do_nodal) ); // // The Aux patch (i.e., the full solution) // @@ -939,11 +944,11 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm if (fft_hybrid_mpi_decomposition == false){ // Allocate and initialize the spectral solver std::array<Real,3> cdx = CellSize(lev-1); - #if (AMREX_SPACEDIM == 3) +#if (AMREX_SPACEDIM == 3) RealVect cdx_vect(cdx[0], cdx[1], cdx[2]); - #elif (AMREX_SPACEDIM == 2) +#elif (AMREX_SPACEDIM == 2) RealVect cdx_vect(cdx[0], cdx[2]); - #endif +#endif // Get the cell-centered box, with guard cells BoxArray realspace_ba = cba;// Copy box realspace_ba.enclosedCells().grow(ngE);// cell-centered + guard cells @@ -952,6 +957,9 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm nox_fft, noy_fft, noz_fft, do_nodal, cdx_vect, dt[lev] ) ); } #endif + std::array<Real,3> cdx = CellSize(lev-1); + m_fdtd_solver_cp[lev].reset( + new FiniteDifferenceSolver( maxwell_fdtd_solver_id, cdx, do_nodal ) ); } // |