diff options
Diffstat (limited to 'Source/FieldSolver')
-rw-r--r-- | Source/FieldSolver/Make.package | 15 | ||||
-rw-r--r-- | Source/FieldSolver/WarpXFFT.cpp | 451 | ||||
-rw-r--r-- | Source/FieldSolver/WarpXPushFieldsEM.cpp | 428 | ||||
-rw-r--r-- | Source/FieldSolver/WarpX_K.H | 97 | ||||
-rw-r--r-- | Source/FieldSolver/WarpX_fft.F90 | 319 | ||||
-rw-r--r-- | Source/FieldSolver/openbc_poisson_solver.F90 | 62 | ||||
-rw-r--r-- | Source/FieldSolver/solve_E_nodal.F90 | 73 |
7 files changed, 1445 insertions, 0 deletions
diff --git a/Source/FieldSolver/Make.package b/Source/FieldSolver/Make.package new file mode 100644 index 000000000..42eb6a6f0 --- /dev/null +++ b/Source/FieldSolver/Make.package @@ -0,0 +1,15 @@ +CEXE_headers += WarpX_K.H +CEXE_sources += WarpXPushFieldsEM.cpp +ifeq ($(USE_PSATD),TRUE) + CEXE_sources += WarpXFFT.cpp + F90EXE_sources += WarpX_fft.F90 +endif +ifeq ($(USE_OPENBC_POISSON),TRUE) + F90EXE_sources += openbc_poisson_solver.F90 +endif +ifeq ($DO_ELECTROSTATIC,TRUE) + F90EXE_sources += solve_E_nodal.F90 +endif + +INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/FieldSolver +VPATH_LOCATIONS += $(WARPX_HOME)/Source/FieldSolver diff --git a/Source/FieldSolver/WarpXFFT.cpp b/Source/FieldSolver/WarpXFFT.cpp new file mode 100644 index 000000000..f09290f29 --- /dev/null +++ b/Source/FieldSolver/WarpXFFT.cpp @@ -0,0 +1,451 @@ + +#include <WarpX.H> +#include <WarpX_f.H> +#include <AMReX_iMultiFab.H> + +using namespace amrex; + +constexpr int WarpX::FFTData::N; + +namespace { +static std::unique_ptr<WarpX::FFTData> nullfftdata; // This for process with nz_fft=0 + +/** \brief Returns an "owner mask" which 1 for all cells, except + * for the duplicated (physical) cells of a nodal grid. + * + * More precisely, for these cells (which are represented on several grids) + * the owner mask is 1 only if these cells are at the lower left end of + * the local grid - or if these cells are at the end of the physical domain + * Therefore, there for these cells, there will be only one grid for + * which the owner mask is non-zero. + */ +static iMultiFab +BuildFFTOwnerMask (const MultiFab& mf, const Geometry& geom) +{ + const BoxArray& ba = mf.boxArray(); + const DistributionMapping& dm = mf.DistributionMap(); + iMultiFab mask(ba, dm, 1, 0); + const int owner = 1; + const int nonowner = 0; + mask.setVal(owner); + + const Box& domain_box = amrex::convert(geom.Domain(), ba.ixType()); + + AMREX_ASSERT(ba.complementIn(domain_box).isEmpty()); + +#ifdef _OPENMP +#pragma omp parallel +#endif + for (MFIter mfi(mask); mfi.isValid(); ++mfi) + { + IArrayBox& fab = mask[mfi]; + const Box& bx = fab.box(); + Box bx2 = bx; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + // Detect nodal dimensions + if (bx2.type(idim) == IndexType::NODE) { + // Make sure that this grid does not touch the end of + // the physical domain. + if (bx2.bigEnd(idim) < domain_box.bigEnd(idim)) { + bx2.growHi(idim, -1); + } + } + } + const BoxList& bl = amrex::boxDiff(bx, bx2); + // Set owner mask in these cells + for (const auto& b : bl) { + fab.setVal(nonowner, b, 0, 1); + } + } + + return mask; +} + +/** \brief Copy the data from the FFT grid to the regular grid + * + * Because, for nodal grid, some cells are duplicated on several boxes, + * special care has to be taken in order to have consistent values on + * each boxes when copying this data. Here this is done by setting a + * mask, where, for these duplicated cells, the mask is non-zero on only + * one box. + */ +static void +CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba_valid_fft, const Geometry& geom) +{ + auto idx_type = mf_fft.ixType(); + MultiFab mftmp(amrex::convert(ba_valid_fft,idx_type), mf_fft.DistributionMap(), 1, 0); + + const iMultiFab& mask = BuildFFTOwnerMask(mftmp, geom); + + // Local copy: whenever an MPI rank owns both the data from the FFT + // grid and from the regular grid, for overlapping region, copy it locally +#ifdef _OPENMP +#pragma omp parallel +#endif + for (MFIter mfi(mftmp,true); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + FArrayBox& dstfab = mftmp[mfi]; + + const FArrayBox& srcfab = mf_fft[mfi]; + const Box& srcbox = srcfab.box(); + + if (srcbox.contains(bx)) + { + // Copy the interior region (without guard cells) + dstfab.copy(srcfab, bx, 0, bx, 0, 1); + // Set the value to 0 whenever the mask is 0 + // (i.e. for nodal duplicated cells, there is a single box + // for which the mask is different than 0) + // if mask == 0, set value to zero + dstfab.setValIfNot(0.0, bx, mask[mfi], 0, 1); + } + } + + // Global copy: Get the remaining the data from other procs + // Use ParallelAdd instead of ParallelCopy, so that the value from + // the cell that has non-zero mask is the one which is retained. + mf.setVal(0.0, 0); + mf.ParallelAdd(mftmp); +} + +} + +void +WarpX::AllocLevelDataFFT (int lev) +{ + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(lev == 0, "PSATD doesn't work with mesh refinement yet"); + + static_assert(std::is_standard_layout<FFTData>::value, "FFTData must have standard layout"); + static_assert(sizeof(FFTData) == sizeof(void*)*FFTData::N, "sizeof FFTData is wrong"); + + InitFFTComm(lev); + + BoxArray ba_fp_fft; + DistributionMapping dm_fp_fft; + FFTDomainDecomposition(lev, ba_fp_fft, dm_fp_fft, ba_valid_fp_fft[lev], domain_fp_fft[lev], + geom[lev].Domain()); + + // rho2 has one extra ghost cell, so that it's safe to deposit charge density after + // pushing particle. + + Efield_fp_fft[lev][0].reset(new MultiFab(amrex::convert(ba_fp_fft,Ex_nodal_flag), + dm_fp_fft, 1, 0)); + Efield_fp_fft[lev][1].reset(new MultiFab(amrex::convert(ba_fp_fft,Ey_nodal_flag), + dm_fp_fft, 1, 0)); + Efield_fp_fft[lev][2].reset(new MultiFab(amrex::convert(ba_fp_fft,Ez_nodal_flag), + dm_fp_fft, 1, 0)); + Bfield_fp_fft[lev][0].reset(new MultiFab(amrex::convert(ba_fp_fft,Bx_nodal_flag), + dm_fp_fft, 1, 0)); + Bfield_fp_fft[lev][1].reset(new MultiFab(amrex::convert(ba_fp_fft,By_nodal_flag), + dm_fp_fft, 1, 0)); + Bfield_fp_fft[lev][2].reset(new MultiFab(amrex::convert(ba_fp_fft,Bz_nodal_flag), + dm_fp_fft, 1, 0)); + current_fp_fft[lev][0].reset(new MultiFab(amrex::convert(ba_fp_fft,jx_nodal_flag), + dm_fp_fft, 1, 0)); + current_fp_fft[lev][1].reset(new MultiFab(amrex::convert(ba_fp_fft,jy_nodal_flag), + dm_fp_fft, 1, 0)); + current_fp_fft[lev][2].reset(new MultiFab(amrex::convert(ba_fp_fft,jz_nodal_flag), + dm_fp_fft, 1, 0)); + rho_fp_fft[lev].reset(new MultiFab(amrex::convert(ba_fp_fft,IntVect::TheNodeVector()), + dm_fp_fft, 2, 0)); + + dataptr_fp_fft[lev].reset(new LayoutData<FFTData>(ba_fp_fft, dm_fp_fft)); + + if (lev > 0) + { + BoxArray ba_cp_fft; + DistributionMapping dm_cp_fft; + FFTDomainDecomposition(lev, ba_cp_fft, dm_cp_fft, ba_valid_cp_fft[lev], domain_cp_fft[lev], + amrex::coarsen(geom[lev].Domain(),2)); + + Efield_cp_fft[lev][0].reset(new MultiFab(amrex::convert(ba_cp_fft,Ex_nodal_flag), + dm_cp_fft, 1, 0)); + Efield_cp_fft[lev][1].reset(new MultiFab(amrex::convert(ba_cp_fft,Ey_nodal_flag), + dm_cp_fft, 1, 0)); + Efield_cp_fft[lev][2].reset(new MultiFab(amrex::convert(ba_cp_fft,Ez_nodal_flag), + dm_cp_fft, 1, 0)); + Bfield_cp_fft[lev][0].reset(new MultiFab(amrex::convert(ba_cp_fft,Bx_nodal_flag), + dm_cp_fft, 1, 0)); + Bfield_cp_fft[lev][1].reset(new MultiFab(amrex::convert(ba_cp_fft,By_nodal_flag), + dm_cp_fft, 1, 0)); + Bfield_cp_fft[lev][2].reset(new MultiFab(amrex::convert(ba_cp_fft,Bz_nodal_flag), + dm_cp_fft, 1, 0)); + current_cp_fft[lev][0].reset(new MultiFab(amrex::convert(ba_cp_fft,jx_nodal_flag), + dm_cp_fft, 1, 0)); + current_cp_fft[lev][1].reset(new MultiFab(amrex::convert(ba_cp_fft,jy_nodal_flag), + dm_cp_fft, 1, 0)); + current_cp_fft[lev][2].reset(new MultiFab(amrex::convert(ba_cp_fft,jz_nodal_flag), + dm_cp_fft, 1, 0)); + rho_cp_fft[lev].reset(new MultiFab(amrex::convert(ba_cp_fft,IntVect::TheNodeVector()), + dm_cp_fft, 2, 0)); + + dataptr_cp_fft[lev].reset(new LayoutData<FFTData>(ba_cp_fft, dm_cp_fft)); + } + + InitFFTDataPlan(lev); +} + +/** \brief Create MPI sub-communicators for each FFT group, + * and put them in PICSAR module + * + * These communicators are passed to the parallel FFTW library, in order + * to perform a global FFT within each FFT group. + */ +void +WarpX::InitFFTComm (int lev) +{ + int nprocs = ParallelDescriptor::NProcs(); + ngroups_fft = std::min(ngroups_fft, nprocs); + + // # of processes in the subcommunicator + int np_fft = nprocs / ngroups_fft; + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(np_fft*ngroups_fft == nprocs, + "Number of processes must be divisible by number of FFT groups"); + + int myproc = ParallelDescriptor::MyProc(); + // my color in ngroups_fft subcommunicators. 0 <= color_fft < ngroups_fft + color_fft[lev] = myproc / np_fft; + MPI_Comm_split(ParallelDescriptor::Communicator(), color_fft[lev], myproc, &comm_fft[lev]); + + int fcomm = MPI_Comm_c2f(comm_fft[lev]); + // Set the communicator of the PICSAR module to the one we just created + warpx_fft_mpi_init(fcomm); +} + +/** \brief Perform domain decomposition for the FFTW + * + * Attribute one (unique) box to each proc, in such a way that: + * - The global domain is divided among FFT groups, + * with additional guard cells around each FFT group + * - The domain associated to an FFT group (with its guard cells) + * is further divided in sub-subdomains along z, so as to distribute + * it among the procs within an FFT group + * + * The attribution is done by setting (within this function): + * - ba_fft: the BoxArray representing the final set of sub-domains for the FFT + * (includes/covers the guard cells of the FFT groups) + * - dm_fft: the mapping between these sub-domains and the corresponding proc + * (imposes one unique box for each proc) + * - ba_valid: the BoxArray that contains valid part of the sub-domains of ba_fft + * (i.e. does not include/cover the guard cells of the FFT groups) + * - domain_fft: a Box that represent the domain of the FFT group for the current proc + */ +void +WarpX::FFTDomainDecomposition (int lev, BoxArray& ba_fft, DistributionMapping& dm_fft, + BoxArray& ba_valid, Box& domain_fft, const Box& domain) +{ + + IntVect nguards_fft(AMREX_D_DECL(nox_fft/2,noy_fft/2,noz_fft/2)); + + int nprocs = ParallelDescriptor::NProcs(); + + BoxList bl(domain, ngroups_fft); // This does a multi-D domain decomposition for groups + AMREX_ALWAYS_ASSERT(bl.size() == ngroups_fft); + const Vector<Box>& bldata = bl.data(); + + // This is the domain for the FFT sub-group (including guard cells) + domain_fft = amrex::grow(bldata[color_fft[lev]], nguards_fft); + // Ask FFTW to chop the current FFT sub-group domain in the z-direction + // and give a chunk to each MPI rank in the current sub-group. + int nz_fft, z0_fft; + + warpx_fft_domain_decomp(&nz_fft, &z0_fft, WARPX_TO_FORTRAN_BOX(domain_fft)); + // Each MPI rank adds a box with its chunk of the FFT grid + // (given by the above decomposition) to the list `bx_fft`, + // then list is shared among all MPI ranks via AllGather + Vector<Box> bx_fft; + if (nz_fft > 0) { + Box b = domain_fft; + b.setRange(AMREX_SPACEDIM-1, z0_fft+domain_fft.smallEnd(AMREX_SPACEDIM-1), nz_fft); + bx_fft.push_back(b); + } else { + // Add empty box for the AllGather call + bx_fft.push_back(Box()); + } + amrex::AllGatherBoxes(bx_fft); + AMREX_ASSERT(bx_fft.size() == ParallelDescriptor::NProcs()); + // Build pmap and bx_fft without the empty boxes + Vector<int> pmap; + for (int i = 0; i < bx_fft.size(); ++i) { + if (bx_fft[i].ok()) { + pmap.push_back(i); + } + } + bx_fft.erase(std::remove_if(bx_fft.begin(),bx_fft.end(), + [](Box const& b) { return b.isEmpty(); }), + bx_fft.end()); + AMREX_ASSERT(bx_fft.size() == pmap.size()); + + // Define the AMReX objects for the FFT grid: BoxArray and DistributionMapping + ba_fft.define(BoxList(std::move(bx_fft))); + dm_fft.define(std::move(pmap)); + + // For communication between WarpX normal domain and FFT domain, we need to create a + // special BoxArray ba_valid + const Box foobox(-nguards_fft-2, -nguards_fft-2); + + BoxList bl_valid; // List of boxes: will be filled by the valid part of the subdomains of ba_fft + bl_valid.reserve(ba_fft.size()); + int np_fft = nprocs / ngroups_fft; + for (int i = 0; i < ba_fft.size(); ++i) + { + int igroup = dm_fft[i] / np_fft; // This should be consistent with InitFFTComm + const Box& bx = ba_fft[i] & bldata[igroup]; // Intersection with the domain of + // the FFT group *without* guard cells + if (bx.ok()) + { + bl_valid.push_back(bx); + } + else + { + bl_valid.push_back(foobox); + } + } + + ba_valid.define(std::move(bl_valid)); +} + +/** /brief Set all the flags and metadata of the PICSAR FFT module. + * Allocate the auxiliary arrays of `fft_data` + * + * Note: dataptr_data is a stuct containing 22 pointers to arrays + * 1-11: padded arrays in real space ; 12-22 arrays for the fields in Fourier space + */ +void +WarpX::InitFFTDataPlan (int lev) +{ + auto dx_fp = CellSize(lev); + + if (Efield_fp_fft[lev][0]->local_size() == 1) + //Only one FFT patch on this MPI + { + for (MFIter mfi(*Efield_fp_fft[lev][0]); mfi.isValid(); ++mfi) + { + warpx_fft_dataplan_init(&nox_fft, &noy_fft, &noz_fft, + (*dataptr_fp_fft[lev])[mfi].data, &FFTData::N, + dx_fp.data(), &dt[lev], &fftw_plan_measure, &WarpX::do_nodal ); + } + } + else if (Efield_fp_fft[lev][0]->local_size() == 0) + // No FFT patch on this MPI rank (may happen with FFTW) + // Still need to call the MPI-FFT initialization routines + { + nullfftdata.reset(new FFTData()); + warpx_fft_dataplan_init(&nox_fft, &noy_fft, &noz_fft, + nullfftdata->data, &FFTData::N, + dx_fp.data(), &dt[lev], &fftw_plan_measure, + &WarpX::do_nodal ); + } + else + { + // Multiple FFT patches on this MPI rank + amrex::Abort("WarpX::InitFFTDataPlan: TODO"); + } + + if (lev > 0) + { + amrex::Abort("WarpX::InitFFTDataPlan: TODO"); + } +} + +void +WarpX::FreeFFT (int lev) +{ + nullfftdata.reset(); + + warpx_fft_nullify(); + + if (comm_fft[lev] != MPI_COMM_NULL) { + MPI_Comm_free(&comm_fft[lev]); + } + comm_fft[lev] = MPI_COMM_NULL; +} + +void +WarpX::PushPSATD (amrex::Real a_dt) +{ + for (int lev = 0; lev <= finest_level; ++lev) { + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(dt[lev] == a_dt, "dt must be consistent"); + PushPSATD(lev, a_dt); + } +} + +void +WarpX::PushPSATD (int lev, amrex::Real /* dt */) +{ + BL_PROFILE_VAR_NS("WarpXFFT::CopyDualGrid", blp_copy); + BL_PROFILE_VAR_NS("PICSAR::FftPushEB", blp_push_eb); + + auto period_fp = geom[lev].periodicity(); + + BL_PROFILE_VAR_START(blp_copy); + Efield_fp_fft[lev][0]->ParallelCopy(*Efield_fp[lev][0], 0, 0, 1, 0, 0, period_fp); + Efield_fp_fft[lev][1]->ParallelCopy(*Efield_fp[lev][1], 0, 0, 1, 0, 0, period_fp); + Efield_fp_fft[lev][2]->ParallelCopy(*Efield_fp[lev][2], 0, 0, 1, 0, 0, period_fp); + Bfield_fp_fft[lev][0]->ParallelCopy(*Bfield_fp[lev][0], 0, 0, 1, 0, 0, period_fp); + Bfield_fp_fft[lev][1]->ParallelCopy(*Bfield_fp[lev][1], 0, 0, 1, 0, 0, period_fp); + Bfield_fp_fft[lev][2]->ParallelCopy(*Bfield_fp[lev][2], 0, 0, 1, 0, 0, period_fp); + current_fp_fft[lev][0]->ParallelCopy(*current_fp[lev][0], 0, 0, 1, 0, 0, period_fp); + current_fp_fft[lev][1]->ParallelCopy(*current_fp[lev][1], 0, 0, 1, 0, 0, period_fp); + current_fp_fft[lev][2]->ParallelCopy(*current_fp[lev][2], 0, 0, 1, 0, 0, period_fp); + rho_fp_fft[lev]->ParallelCopy(*rho_fp[lev], 0, 0, 2, 0, 0, period_fp); + BL_PROFILE_VAR_STOP(blp_copy); + + BL_PROFILE_VAR_START(blp_push_eb); + if (Efield_fp_fft[lev][0]->local_size() == 1) + //Only one FFT patch on this MPI + { + for (MFIter mfi(*Efield_fp_fft[lev][0]); mfi.isValid(); ++mfi) + { + warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][0])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][1])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][2])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][0])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][1])[mfi]), + WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][2])[mfi]), + WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][0])[mfi]), + WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][1])[mfi]), + WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][2])[mfi]), + WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],0), + WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],1)); + } + } + else if (Efield_fp_fft[lev][0]->local_size() == 0) + // No FFT patch on this MPI rank + // Still need to call the MPI-FFT routine. + { + FArrayBox fab(Box(IntVect::TheZeroVector(), IntVect::TheUnitVector())); + warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab), + WARPX_TO_FORTRAN_ANYD(fab)); + } + else + // Multiple FFT patches on this MPI rank + { + amrex::Abort("WarpX::PushPSATD: TODO"); + } + BL_PROFILE_VAR_STOP(blp_push_eb); + + BL_PROFILE_VAR_START(blp_copy); + CopyDataFromFFTToValid(*Efield_fp[lev][0], *Efield_fp_fft[lev][0], ba_valid_fp_fft[lev], geom[lev]); + CopyDataFromFFTToValid(*Efield_fp[lev][1], *Efield_fp_fft[lev][1], ba_valid_fp_fft[lev], geom[lev]); + CopyDataFromFFTToValid(*Efield_fp[lev][2], *Efield_fp_fft[lev][2], ba_valid_fp_fft[lev], geom[lev]); + CopyDataFromFFTToValid(*Bfield_fp[lev][0], *Bfield_fp_fft[lev][0], ba_valid_fp_fft[lev], geom[lev]); + CopyDataFromFFTToValid(*Bfield_fp[lev][1], *Bfield_fp_fft[lev][1], ba_valid_fp_fft[lev], geom[lev]); + CopyDataFromFFTToValid(*Bfield_fp[lev][2], *Bfield_fp_fft[lev][2], ba_valid_fp_fft[lev], geom[lev]); + BL_PROFILE_VAR_STOP(blp_copy); + + if (lev > 0) + { + amrex::Abort("WarpX::PushPSATD: TODO"); + } +} diff --git a/Source/FieldSolver/WarpXPushFieldsEM.cpp b/Source/FieldSolver/WarpXPushFieldsEM.cpp new file mode 100644 index 000000000..6d588ae86 --- /dev/null +++ b/Source/FieldSolver/WarpXPushFieldsEM.cpp @@ -0,0 +1,428 @@ + +#include <cmath> +#include <limits> + +#include <WarpX.H> +#include <WarpXConst.H> +#include <WarpX_f.H> +#include <WarpX_K.H> +#ifdef WARPX_USE_PY +#include <WarpX_py.H> +#endif + +#ifdef BL_USE_SENSEI_INSITU +#include <AMReX_AmrMeshInSituBridge.H> +#endif + +using namespace amrex; + +void +WarpX::EvolveB (Real dt) +{ + for (int lev = 0; lev <= finest_level; ++lev) { + EvolveB(lev, dt); + } +} + +void +WarpX::EvolveB (int lev, Real dt) +{ + BL_PROFILE("WarpX::EvolveB()"); + EvolveB(lev, PatchType::fine, dt); + if (lev > 0) + { + EvolveB(lev, PatchType::coarse, dt); + } +} + +void +WarpX::EvolveB (int lev, PatchType patch_type, amrex::Real dt) +{ + const int patch_level = (patch_type == PatchType::fine) ? lev : lev-1; + const std::array<Real,3>& dx = WarpX::CellSize(patch_level); + Real dtsdx = dt/dx[0], dtsdy = dt/dx[1], dtsdz = dt/dx[2]; + + MultiFab *Ex, *Ey, *Ez, *Bx, *By, *Bz; + if (patch_type == PatchType::fine) + { + Ex = Efield_fp[lev][0].get(); + Ey = Efield_fp[lev][1].get(); + Ez = Efield_fp[lev][2].get(); + Bx = Bfield_fp[lev][0].get(); + By = Bfield_fp[lev][1].get(); + Bz = Bfield_fp[lev][2].get(); + } + else + { + Ex = Efield_cp[lev][0].get(); + Ey = Efield_cp[lev][1].get(); + Ez = Efield_cp[lev][2].get(); + Bx = Bfield_cp[lev][0].get(); + By = Bfield_cp[lev][1].get(); + Bz = Bfield_cp[lev][2].get(); + } + + MultiFab* cost = costs[lev].get(); + const IntVect& rr = (lev > 0) ? refRatio(lev-1) : IntVect::TheUnitVector(); + + // Loop through the grids, and over the tiles within each grid +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for ( MFIter mfi(*Bx, TilingIfNotGPU()); mfi.isValid(); ++mfi ) + { + Real wt = amrex::second(); + + const Box& tbx = mfi.tilebox(Bx_nodal_flag); + const Box& tby = mfi.tilebox(By_nodal_flag); + const Box& tbz = mfi.tilebox(Bz_nodal_flag); + + if (do_nodal) { + auto const& Bxfab = Bx->array(mfi); + auto const& Byfab = By->array(mfi); + auto const& Bzfab = Bz->array(mfi); + auto const& Exfab = Ex->array(mfi); + auto const& Eyfab = Ey->array(mfi); + auto const& Ezfab = Ez->array(mfi); + amrex::ParallelFor(tbx, + [=] AMREX_GPU_DEVICE (int j, int k, int l) + { + warpx_push_bx_nodal(j,k,l,Bxfab,Eyfab,Ezfab,dtsdy,dtsdz); + }); + amrex::ParallelFor(tby, + [=] AMREX_GPU_DEVICE (int j, int k, int l) + { + warpx_push_by_nodal(j,k,l,Byfab,Exfab,Ezfab,dtsdx,dtsdz); + }); + amrex::ParallelFor(tbz, + [=] AMREX_GPU_DEVICE (int j, int k, int l) + { + warpx_push_bz_nodal(j,k,l,Bzfab,Exfab,Eyfab,dtsdx,dtsdy); + }); + } else { + // Call picsar routine for each tile + warpx_push_bvec( + tbx.loVect(), tbx.hiVect(), + tby.loVect(), tby.hiVect(), + tbz.loVect(), tbz.hiVect(), + BL_TO_FORTRAN_3D((*Ex)[mfi]), + BL_TO_FORTRAN_3D((*Ey)[mfi]), + BL_TO_FORTRAN_3D((*Ez)[mfi]), + BL_TO_FORTRAN_3D((*Bx)[mfi]), + BL_TO_FORTRAN_3D((*By)[mfi]), + BL_TO_FORTRAN_3D((*Bz)[mfi]), + &dtsdx, &dtsdy, &dtsdz, + &WarpX::maxwell_fdtd_solver_id); + } + + if (cost) { + Box cbx = mfi.tilebox(IntVect{AMREX_D_DECL(0,0,0)}); + if (patch_type == PatchType::coarse) cbx.refine(rr); + wt = (amrex::second() - wt) / cbx.d_numPts(); + auto costfab = cost->array(mfi); + amrex::ParallelFor(cbx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + costfab(i,j,k) += wt; + }); + } + } + + if (do_pml && pml[lev]->ok()) + { + const auto& pml_B = (patch_type == PatchType::fine) ? pml[lev]->GetB_fp() : pml[lev]->GetB_cp(); + const auto& pml_E = (patch_type == PatchType::fine) ? pml[lev]->GetE_fp() : pml[lev]->GetE_cp(); + +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for ( MFIter mfi(*pml_B[0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) + { + const Box& tbx = mfi.tilebox(Bx_nodal_flag); + const Box& tby = mfi.tilebox(By_nodal_flag); + const Box& tbz = mfi.tilebox(Bz_nodal_flag); + + WRPX_PUSH_PML_BVEC( + tbx.loVect(), tbx.hiVect(), + tby.loVect(), tby.hiVect(), + tbz.loVect(), tbz.hiVect(), + BL_TO_FORTRAN_3D((*pml_E[0])[mfi]), + BL_TO_FORTRAN_3D((*pml_E[1])[mfi]), + BL_TO_FORTRAN_3D((*pml_E[2])[mfi]), + BL_TO_FORTRAN_3D((*pml_B[0])[mfi]), + BL_TO_FORTRAN_3D((*pml_B[1])[mfi]), + BL_TO_FORTRAN_3D((*pml_B[2])[mfi]), + &dtsdx, &dtsdy, &dtsdz, + &WarpX::maxwell_fdtd_solver_id); + } + } +} + +void +WarpX::EvolveE (Real dt) +{ + for (int lev = 0; lev <= finest_level; ++lev) + { + EvolveE(lev, dt); + } +} + +void +WarpX::EvolveE (int lev, Real dt) +{ + BL_PROFILE("WarpX::EvolveE()"); + EvolveE(lev, PatchType::fine, dt); + if (lev > 0) + { + EvolveE(lev, PatchType::coarse, dt); + } +} + +void +WarpX::EvolveE (int lev, PatchType patch_type, amrex::Real dt) +{ + const Real mu_c2_dt = (PhysConst::mu0*PhysConst::c*PhysConst::c) * dt; + const Real c2dt = (PhysConst::c*PhysConst::c) * dt; + + int patch_level = (patch_type == PatchType::fine) ? lev : lev-1; + const std::array<Real,3>& dx = WarpX::CellSize(patch_level); + Real dtsdx_c2 = c2dt/dx[0], dtsdy_c2 = c2dt/dx[1], dtsdz_c2 = c2dt/dx[2]; + + MultiFab *Ex, *Ey, *Ez, *Bx, *By, *Bz, *jx, *jy, *jz, *F; + if (patch_type == PatchType::fine) + { + Ex = Efield_fp[lev][0].get(); + Ey = Efield_fp[lev][1].get(); + Ez = Efield_fp[lev][2].get(); + Bx = Bfield_fp[lev][0].get(); + By = Bfield_fp[lev][1].get(); + Bz = Bfield_fp[lev][2].get(); + jx = current_fp[lev][0].get(); + jy = current_fp[lev][1].get(); + jz = current_fp[lev][2].get(); + F = F_fp[lev].get(); + } + else if (patch_type == PatchType::coarse) + { + Ex = Efield_cp[lev][0].get(); + Ey = Efield_cp[lev][1].get(); + Ez = Efield_cp[lev][2].get(); + Bx = Bfield_cp[lev][0].get(); + By = Bfield_cp[lev][1].get(); + Bz = Bfield_cp[lev][2].get(); + jx = current_cp[lev][0].get(); + jy = current_cp[lev][1].get(); + jz = current_cp[lev][2].get(); + F = F_cp[lev].get(); + } + + MultiFab* cost = costs[lev].get(); + const IntVect& rr = (lev > 0) ? refRatio(lev-1) : IntVect::TheUnitVector(); + + // Loop through the grids, and over the tiles within each grid +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for ( MFIter mfi(*Ex, TilingIfNotGPU()); mfi.isValid(); ++mfi ) + { + Real wt = amrex::second(); + + const Box& tex = mfi.tilebox(Ex_nodal_flag); + const Box& tey = mfi.tilebox(Ey_nodal_flag); + const Box& tez = mfi.tilebox(Ez_nodal_flag); + + if (do_nodal) { + auto const& Exfab = Ex->array(mfi); + auto const& Eyfab = Ey->array(mfi); + auto const& Ezfab = Ez->array(mfi); + auto const& Bxfab = Bx->array(mfi); + auto const& Byfab = By->array(mfi); + auto const& Bzfab = Bz->array(mfi); + auto const& jxfab = jx->array(mfi); + auto const& jyfab = jy->array(mfi); + auto const& jzfab = jz->array(mfi); + amrex::ParallelFor(tex, + [=] AMREX_GPU_DEVICE (int j, int k, int l) + { + warpx_push_ex_nodal(j,k,l,Exfab,Byfab,Bzfab,jxfab,mu_c2_dt,dtsdy_c2,dtsdz_c2); + }); + amrex::ParallelFor(tey, + [=] AMREX_GPU_DEVICE (int j, int k, int l) + { + warpx_push_ey_nodal(j,k,l,Eyfab,Bxfab,Bzfab,jyfab,mu_c2_dt,dtsdx_c2,dtsdz_c2); + }); + amrex::ParallelFor(tez, + [=] AMREX_GPU_DEVICE (int j, int k, int l) + { + warpx_push_ez_nodal(j,k,l,Ezfab,Bxfab,Byfab,jzfab,mu_c2_dt,dtsdx_c2,dtsdy_c2); + }); + } else { + // Call picsar routine for each tile + warpx_push_evec( + tex.loVect(), tex.hiVect(), + tey.loVect(), tey.hiVect(), + tez.loVect(), tez.hiVect(), + BL_TO_FORTRAN_3D((*Ex)[mfi]), + BL_TO_FORTRAN_3D((*Ey)[mfi]), + BL_TO_FORTRAN_3D((*Ez)[mfi]), + BL_TO_FORTRAN_3D((*Bx)[mfi]), + BL_TO_FORTRAN_3D((*By)[mfi]), + BL_TO_FORTRAN_3D((*Bz)[mfi]), + BL_TO_FORTRAN_3D((*jx)[mfi]), + BL_TO_FORTRAN_3D((*jy)[mfi]), + BL_TO_FORTRAN_3D((*jz)[mfi]), + &mu_c2_dt, + &dtsdx_c2, &dtsdy_c2, &dtsdz_c2); + } + + if (F) + { + warpx_push_evec_f( + tex.loVect(), tex.hiVect(), + tey.loVect(), tey.hiVect(), + tez.loVect(), tez.hiVect(), + BL_TO_FORTRAN_3D((*Ex)[mfi]), + BL_TO_FORTRAN_3D((*Ey)[mfi]), + BL_TO_FORTRAN_3D((*Ez)[mfi]), + BL_TO_FORTRAN_3D((*F)[mfi]), + &dtsdx_c2, &dtsdy_c2, &dtsdz_c2, + &WarpX::maxwell_fdtd_solver_id); + } + + if (cost) { + Box cbx = mfi.tilebox(IntVect{AMREX_D_DECL(0,0,0)}); + if (patch_type == PatchType::coarse) cbx.refine(rr); + wt = (amrex::second() - wt) / cbx.d_numPts(); + auto costfab = cost->array(mfi); + amrex::ParallelFor(cbx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + costfab(i,j,k) += wt; + }); + } + } + + if (do_pml && pml[lev]->ok()) + { + if (F) pml[lev]->ExchangeF(patch_type, F); + + const auto& pml_B = (patch_type == PatchType::fine) ? pml[lev]->GetB_fp() : pml[lev]->GetB_cp(); + const auto& pml_E = (patch_type == PatchType::fine) ? pml[lev]->GetE_fp() : pml[lev]->GetE_cp(); + const auto& pml_F = (patch_type == PatchType::fine) ? pml[lev]->GetF_fp() : pml[lev]->GetF_cp(); +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for ( MFIter mfi(*pml_E[0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) + { + const Box& tex = mfi.tilebox(Ex_nodal_flag); + const Box& tey = mfi.tilebox(Ey_nodal_flag); + const Box& tez = mfi.tilebox(Ez_nodal_flag); + + WRPX_PUSH_PML_EVEC( + tex.loVect(), tex.hiVect(), + tey.loVect(), tey.hiVect(), + tez.loVect(), tez.hiVect(), + BL_TO_FORTRAN_3D((*pml_E[0])[mfi]), + BL_TO_FORTRAN_3D((*pml_E[1])[mfi]), + BL_TO_FORTRAN_3D((*pml_E[2])[mfi]), + BL_TO_FORTRAN_3D((*pml_B[0])[mfi]), + BL_TO_FORTRAN_3D((*pml_B[1])[mfi]), + BL_TO_FORTRAN_3D((*pml_B[2])[mfi]), + &dtsdx_c2, &dtsdy_c2, &dtsdz_c2); + + if (pml_F) + { + WRPX_PUSH_PML_EVEC_F( + tex.loVect(), tex.hiVect(), + tey.loVect(), tey.hiVect(), + tez.loVect(), tez.hiVect(), + BL_TO_FORTRAN_3D((*pml_E[0])[mfi]), + BL_TO_FORTRAN_3D((*pml_E[1])[mfi]), + BL_TO_FORTRAN_3D((*pml_E[2])[mfi]), + BL_TO_FORTRAN_3D((*pml_F )[mfi]), + &dtsdx_c2, &dtsdy_c2, &dtsdz_c2, + &WarpX::maxwell_fdtd_solver_id); + } + } + } +} + +void +WarpX::EvolveF (Real dt, DtType dt_type) +{ + if (!do_dive_cleaning) return; + + for (int lev = 0; lev <= finest_level; ++lev) + { + EvolveF(lev, dt, dt_type); + } +} + +void +WarpX::EvolveF (int lev, Real dt, DtType dt_type) +{ + if (!do_dive_cleaning) return; + + EvolveF(lev, PatchType::fine, dt, dt_type); + if (lev > 0) EvolveF(lev, PatchType::coarse, dt, dt_type); +} + +void +WarpX::EvolveF (int lev, PatchType patch_type, Real dt, DtType dt_type) +{ + if (!do_dive_cleaning) return; + + BL_PROFILE("WarpX::EvolveF()"); + + static constexpr Real mu_c2 = PhysConst::mu0*PhysConst::c*PhysConst::c; + + int patch_level = (patch_type == PatchType::fine) ? lev : lev-1; + const auto& dx = WarpX::CellSize(patch_level); + const std::array<Real,3> dtsdx {dt/dx[0], dt/dx[1], dt/dx[2]}; + + MultiFab *Ex, *Ey, *Ez, *rho, *F; + if (patch_type == PatchType::fine) + { + Ex = Efield_fp[lev][0].get(); + Ey = Efield_fp[lev][1].get(); + Ez = Efield_fp[lev][2].get(); + rho = rho_fp[lev].get(); + F = F_fp[lev].get(); + } + else + { + Ex = Efield_cp[lev][0].get(); + Ey = Efield_cp[lev][1].get(); + Ez = Efield_cp[lev][2].get(); + rho = rho_cp[lev].get(); + F = F_cp[lev].get(); + } + + const int rhocomp = (dt_type == DtType::FirstHalf) ? 0 : 1; + + MultiFab src(rho->boxArray(), rho->DistributionMap(), 1, 0); + ComputeDivE(src, 0, {Ex,Ey,Ez}, dx); + MultiFab::Saxpy(src, -mu_c2, *rho, rhocomp, 0, 1, 0); + MultiFab::Saxpy(*F, dt, src, 0, 0, 1, 0); + + if (do_pml && pml[lev]->ok()) + { + const auto& pml_F = (patch_type == PatchType::fine) ? pml[lev]->GetF_fp() : pml[lev]->GetF_cp(); + const auto& pml_E = (patch_type == PatchType::fine) ? pml[lev]->GetE_fp() : pml[lev]->GetE_cp(); + +#ifdef _OPENMP +#pragma omp parallel if (Gpu::notInLaunchRegion()) +#endif + for ( MFIter mfi(*pml_F, TilingIfNotGPU()); mfi.isValid(); ++mfi ) + { + const Box& bx = mfi.tilebox(); + WRPX_PUSH_PML_F(bx.loVect(), bx.hiVect(), + BL_TO_FORTRAN_ANYD((*pml_F )[mfi]), + BL_TO_FORTRAN_ANYD((*pml_E[0])[mfi]), + BL_TO_FORTRAN_ANYD((*pml_E[1])[mfi]), + BL_TO_FORTRAN_ANYD((*pml_E[2])[mfi]), + &dtsdx[0], &dtsdx[1], &dtsdx[2]); + } + } +} + diff --git a/Source/FieldSolver/WarpX_K.H b/Source/FieldSolver/WarpX_K.H new file mode 100644 index 000000000..2772b764d --- /dev/null +++ b/Source/FieldSolver/WarpX_K.H @@ -0,0 +1,97 @@ +#ifndef WARPX_K_H_ +#define WARPX_K_H_ + +#include <AMReX_FArrayBox.H> + +using namespace amrex; + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void warpx_push_bx_nodal (int j, int k, int l, Array4<Real> const& Bx, + Array4<Real const> const& Ey, Array4<Real const> const& Ez, + Real dtsdy, Real dtsdz) +{ +#if (AMREX_SPACEDIM == 3) + Bx(j,k,l) = Bx(j,k,l) - 0.5*dtsdy * (Ez(j,k+1,l ) - Ez(j,k-1,l )) + + 0.5*dtsdz * (Ey(j,k ,l+1) - Ey(j,k ,l-1)); +#else + Bx(j,k,0) = Bx(j,k,0) + 0.5*dtsdz * (Ey(j,k+1,0) - Ey(j,k-1,0)); +#endif +} + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void warpx_push_by_nodal (int j, int k, int l, Array4<Real> const& By, + Array4<Real const> const& Ex, Array4<Real const> const& Ez, + Real dtsdx, Real dtsdz) +{ +#if (AMREX_SPACEDIM == 3) + By(j,k,l) = By(j,k,l) + 0.5*dtsdx * (Ez(j+1,k,l ) - Ez(j-1,k,l )) + - 0.5*dtsdz * (Ex(j ,k,l+1) - Ex(j ,k,l-1)); +#else + By(j,k,0) = By(j,k,0) + 0.5*dtsdx * (Ez(j+1,k ,0) - Ez(j-1,k ,0)) + - 0.5*dtsdz * (Ex(j ,k+1,0) - Ex(j ,k-1,0)); +#endif +} + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void warpx_push_bz_nodal (int j, int k, int l, Array4<Real> const& Bz, + Array4<Real const> const& Ex, Array4<Real const> const& Ey, + Real dtsdx, Real dtsdy) +{ +#if (AMREX_SPACEDIM == 3) + Bz(j,k,l) = Bz(j,k,l) - 0.5*dtsdx * (Ey(j+1,k ,l) - Ey(j-1,k ,l)) + + 0.5*dtsdy * (Ex(j ,k+1,l) - Ex(j ,k-1,l)); +#else + Bz(j,k,0) = Bz(j,k,0) - 0.5*dtsdx * (Ey(j+1,k ,0) - Ey(j-1,k ,0)); +#endif +} + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void warpx_push_ex_nodal (int j, int k, int l, Array4<Real> const& Ex, + Array4<Real const> const& By, Array4<Real const> const& Bz, + Array4<Real const> const& jx, + Real mudt, Real dtsdy, Real dtsdz) +{ +#if (AMREX_SPACEDIM == 3) + Ex(j,k,l) = Ex(j,k,l) + 0.5*dtsdy * (Bz(j,k+1,l ) - Bz(j,k-1,l )) + - 0.5*dtsdz * (By(j,k ,l+1) - By(j,k ,l-1)) + - mudt * jx(j,k,l); +#else + Ex(j,k,0) = Ex(j,k,0) - 0.5*dtsdz * (By(j,k+1,0) - By(j,k-1,0)) + - mudt * jx(j,k,0); +#endif +} + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void warpx_push_ey_nodal (int j, int k, int l, Array4<Real> const& Ey, + Array4<Real const> const& Bx, Array4<Real const> const& Bz, + Array4<Real const> const& jy, + Real mudt, Real dtsdx, Real dtsdz) +{ +#if (AMREX_SPACEDIM == 3) + Ey(j,k,l) = Ey(j,k,l) - 0.5*dtsdx * (Bz(j+1,k,l ) - Bz(j-1,k,l )) + + 0.5*dtsdz * (Bx(j ,k,l+1) - Bx(j ,k,l-1)) + - mudt * jy(j,k,l); +#else + Ey(j,k,0) = Ey(j,k,0) - 0.5*dtsdx * (Bz(j+1,k ,0) - Bz(j-1,k ,0)) + + 0.5*dtsdz * (Bx(j ,k+1,0) - Bx(j ,k-1,0)) + - mudt * jy(j,k,0); +#endif +} + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void warpx_push_ez_nodal (int j, int k, int l, Array4<Real> const& Ez, + Array4<Real const> const& Bx, Array4<Real const> const& By, + Array4<Real const> const& jz, + Real mudt, Real dtsdx, Real dtsdy) +{ +#if (AMREX_SPACEDIM == 3) + Ez(j,k,l) = Ez(j,k,l) + 0.5*dtsdx * (By(j+1,k ,l) - By(j-1,k ,l)) + - 0.5*dtsdy * (Bx(j ,k+1,l) - Bx(j ,k-1,l)) + - mudt * jz(j,k,l); +#else + Ez(j,k,0) = Ez(j,k,0) + 0.5*dtsdx * (By(j+1,k,0) - By(j-1,k,0)) + - mudt * jz(j,k,0); +#endif +} + +#endif diff --git a/Source/FieldSolver/WarpX_fft.F90 b/Source/FieldSolver/WarpX_fft.F90 new file mode 100644 index 000000000..35357a63f --- /dev/null +++ b/Source/FieldSolver/WarpX_fft.F90 @@ -0,0 +1,319 @@ + +module warpx_fft_module + use amrex_error_module, only : amrex_error, amrex_abort + use amrex_fort_module, only : amrex_real + use iso_c_binding + implicit none + + include 'fftw3-mpi.f03' + + private + public :: warpx_fft_mpi_init, warpx_fft_domain_decomp, warpx_fft_dataplan_init, warpx_fft_nullify, & + warpx_fft_push_eb + +contains + +!> @brief +!! Set the communicator of the PICSAR module to the one that is passed in argument + subroutine warpx_fft_mpi_init (fcomm) bind(c,name='warpx_fft_mpi_init') + use shared_data, only : comm, rank, nproc + integer, intent(in), value :: fcomm + + integer :: ierr, lnproc, lrank + + comm = fcomm + + call mpi_comm_size(comm, lnproc, ierr) + nproc = lnproc + + call mpi_comm_rank(comm, lrank, ierr) + rank = lrank + +#ifdef _OPENMP + ierr = fftw_init_threads() + if (ierr.eq.0) call amrex_error("fftw_init_threads failed") +#endif + call fftw_mpi_init() +#ifdef _OPENMP + call dfftw_init_threads(ierr) + if (ierr.eq.0) call amrex_error("dfftw_init_threads failed") +#endif + end subroutine warpx_fft_mpi_init + +!> @brief +!! Ask FFTW to do domain decomposition. +! +! This is always a 1d domain decomposition along z ; it is typically +! done on the *FFT sub-groups*, not the all domain + subroutine warpx_fft_domain_decomp (warpx_local_nz, warpx_local_z0, global_lo, global_hi) & + bind(c,name='warpx_fft_domain_decomp') + use picsar_precision, only : idp + use shared_data, only : comm, & + nx_global, ny_global, nz_global, & ! size of global FFT + nx, ny, nz ! size of local subdomains + use mpi_fftw3, only : local_nz, local_z0, fftw_mpi_local_size_3d, alloc_local + + integer, intent(out) :: warpx_local_nz, warpx_local_z0 + integer, dimension(3), intent(in) :: global_lo, global_hi + + nx_global = INT(global_hi(1)-global_lo(1)+1,idp) + ny_global = INT(global_hi(2)-global_lo(2)+1,idp) + nz_global = INT(global_hi(3)-global_lo(3)+1,idp) + + alloc_local = fftw_mpi_local_size_3d( & + INT(nz_global,C_INTPTR_T), & + INT(ny_global,C_INTPTR_T), & + INT(nx_global,C_INTPTR_T)/2+1, & + comm, local_nz, local_z0) + + nx = nx_global + ny = ny_global + nz = local_nz + + warpx_local_nz = local_nz + warpx_local_z0 = local_z0 + end subroutine warpx_fft_domain_decomp + + +!> @brief +!! Set all the flags and metadata of the PICSAR FFT module. +!! Allocate the auxiliary arrays of `fft_data` +!! +!! Note: fft_data is a stuct containing 22 pointers to arrays +!! 1-11: padded arrays in real space ; 12-22 arrays for the fields in Fourier space + subroutine warpx_fft_dataplan_init (nox, noy, noz, fft_data, ndata, dx_wrpx, dt_wrpx, fftw_measure, do_nodal) & + bind(c,name='warpx_fft_dataplan_init') + USE picsar_precision, only: idp + use shared_data, only : c_dim, p3dfft_flag, fftw_plan_measure, & + fftw_with_mpi, fftw_threads_ok, fftw_hybrid, fftw_mpi_transpose, & + nx, ny, nz, & ! size of local subdomains + nkx, nky, nkz, & ! size of local ffts + iz_min_r, iz_max_r, iy_min_r, iy_max_r, ix_min_r, ix_max_r, & ! loop bounds + dx, dy, dz + use fields, only : nxguards, nyguards, nzguards, & ! size of guard regions + ex_r, ey_r, ez_r, bx_r, by_r, bz_r, & + jx_r, jy_r, jz_r, rho_r, rhoold_r, & + exf, eyf, ezf, bxf, byf, bzf, & + jxf, jyf, jzf, rhof, rhooldf, & + l_spectral, l_staggered, norderx, nordery, norderz + use mpi_fftw3, only : alloc_local + use omp_lib, only: omp_get_max_threads + USE gpstd_solver, only: init_gpstd + USE fourier_psaotd, only: init_plans_fourier_mpi + use params, only : dt + + integer, intent(in) :: nox, noy, noz, ndata + integer, intent(in) :: fftw_measure + integer, intent(in) :: do_nodal + type(c_ptr), intent(inout) :: fft_data(ndata) + real(c_double), intent(in) :: dx_wrpx(3), dt_wrpx + + integer(idp) :: nopenmp + integer :: nx_padded + integer, dimension(3) :: shp + integer(kind=c_size_t) :: sz + + ! No need to distinguish physical and guard cells for the global FFT; + ! only nx+2*nxguards counts. Thus we declare 0 guard cells for simplicity + nxguards = 0_idp + nyguards = 0_idp + nzguards = 0_idp + + ! For the calculation of the modified [k] vectors + if (do_nodal == 0) then + l_staggered = .TRUE. + else + l_staggered = .FALSE. + endif + norderx = int(nox, idp) + nordery = int(noy, idp) + norderz = int(noz, idp) + ! Define parameters of FFT plans + c_dim = INT(AMREX_SPACEDIM,idp) ! Dimensionality of the simulation (2d/3d) + fftw_with_mpi = .TRUE. ! Activate MPI FFTW + fftw_hybrid = .FALSE. ! FFT per MPI subgroup (instead of global) + fftw_mpi_transpose = .FALSE. ! Do not transpose the data + fftw_plan_measure = (fftw_measure .ne. 0) + p3dfft_flag = .FALSE. + l_spectral = .TRUE. ! Activate spectral Solver, using FFT +#ifdef _OPENMP + fftw_threads_ok = .TRUE. + nopenmp = OMP_GET_MAX_THREADS() +#else + fftw_threads_ok = .FALSE. + nopenmp = 1 +#endif + + ! Allocate padded arrays for MPI FFTW + nx_padded = 2*(nx/2 + 1) + shp = [nx_padded, int(ny), int(nz)] + sz = 2*alloc_local + fft_data(1) = fftw_alloc_real(sz) + call c_f_pointer(fft_data(1), ex_r, shp) + fft_data(2) = fftw_alloc_real(sz) + call c_f_pointer(fft_data(2), ey_r, shp) + fft_data(3) = fftw_alloc_real(sz) + call c_f_pointer(fft_data(3), ez_r, shp) + fft_data(4) = fftw_alloc_real(sz) + call c_f_pointer(fft_data(4), bx_r, shp) + fft_data(5) = fftw_alloc_real(sz) + call c_f_pointer(fft_data(5), by_r, shp) + fft_data(6) = fftw_alloc_real(sz) + call c_f_pointer(fft_data(6), bz_r, shp) + fft_data(7) = fftw_alloc_real(sz) + call c_f_pointer(fft_data(7), jx_r, shp) + fft_data(8) = fftw_alloc_real(sz) + call c_f_pointer(fft_data(8), jy_r, shp) + fft_data(9) = fftw_alloc_real(sz) + call c_f_pointer(fft_data(9), jz_r, shp) + fft_data(10) = fftw_alloc_real(sz) + call c_f_pointer(fft_data(10), rho_r, shp) + fft_data(11) = fftw_alloc_real(sz) + call c_f_pointer(fft_data(11), rhoold_r, shp) + + ! Set array bounds when copying ex to ex_r in PICSAR + ix_min_r = 1; ix_max_r = nx + iy_min_r = 1; iy_max_r = ny + iz_min_r = 1; iz_max_r = nz + ! Allocate Fourier space fields of the same size + nkx = nx/2 + 1 + nky = ny + nkz = nz + shp = [int(nkx), int(nky), int(nkz)] + sz = alloc_local + fft_data(12) = fftw_alloc_complex(sz) + call c_f_pointer(fft_data(12), exf, shp) + fft_data(13) = fftw_alloc_complex(sz) + call c_f_pointer(fft_data(13), eyf, shp) + fft_data(14) = fftw_alloc_complex(sz) + call c_f_pointer(fft_data(14), ezf, shp) + fft_data(15) = fftw_alloc_complex(sz) + call c_f_pointer(fft_data(15), bxf, shp) + fft_data(16) = fftw_alloc_complex(sz) + call c_f_pointer(fft_data(16), byf, shp) + fft_data(17) = fftw_alloc_complex(sz) + call c_f_pointer(fft_data(17), bzf, shp) + fft_data(18) = fftw_alloc_complex(sz) + call c_f_pointer(fft_data(18), jxf, shp) + fft_data(19) = fftw_alloc_complex(sz) + call c_f_pointer(fft_data(19), jyf, shp) + fft_data(20) = fftw_alloc_complex(sz) + call c_f_pointer(fft_data(20), jzf, shp) + fft_data(21) = fftw_alloc_complex(sz) + call c_f_pointer(fft_data(21), rhof, shp) + fft_data(22) = fftw_alloc_complex(sz) + call c_f_pointer(fft_data(22), rhooldf, shp) +!$acc enter data create (exf,eyf,ezf,bxf,byf,bzf,jxf,jyf,jzf,rhof,rhooldf) + if (ndata < 22) then + call amrex_abort("size of fft_data is too small") + end if + + dx = dx_wrpx(1) + dy = dx_wrpx(2) + dz = dx_wrpx(3) + dt = dt_wrpx + + ! Initialize the matrix blocks for the PSATD solver + CALL init_gpstd() + ! Initialize the plans for fftw with MPI + CALL init_plans_fourier_mpi(nopenmp) + + end subroutine warpx_fft_dataplan_init + + + subroutine warpx_fft_nullify () bind(c,name='warpx_fft_nullify') + use fields, only: ex_r, ey_r, ez_r, bx_r, by_r, bz_r, & + jx_r, jy_r, jz_r, rho_r, rhoold_r, & + exf, eyf, ezf, bxf, byf, bzf, & + jxf, jyf, jzf, rhof, rhooldf + use mpi_fftw3, only : plan_r2c_mpi, plan_c2r_mpi + nullify(ex_r) + nullify(ey_r) + nullify(ez_r) + nullify(bx_r) + nullify(by_r) + nullify(bz_r) + nullify(jx_r) + nullify(jy_r) + nullify(jz_r) + nullify(rho_r) + nullify(rhoold_r) + nullify(exf) + nullify(eyf) + nullify(ezf) + nullify(bxf) + nullify(byf) + nullify(bzf) + nullify(jxf) + nullify(jyf) + nullify(jzf) + nullify(rhof) + nullify(rhooldf) + call fftw_destroy_plan(plan_r2c_mpi) + call fftw_destroy_plan(plan_c2r_mpi) + call fftw_mpi_cleanup() + end subroutine warpx_fft_nullify + + + subroutine warpx_fft_push_eb ( & + ex_wrpx, exlo, exhi, & + ey_wrpx, eylo, eyhi, & + ez_wrpx, ezlo, ezhi, & + bx_wrpx, bxlo, bxhi, & + by_wrpx, bylo, byhi, & + bz_wrpx, bzlo, bzhi, & + jx_wrpx, jxlo, jxhi, & + jy_wrpx, jylo, jyhi, & + jz_wrpx, jzlo, jzhi, & + rhoold_wrpx, r1lo, r1hi, & + rho_wrpx, r2lo, r2hi) & + bind(c,name='warpx_fft_push_eb') + + use fields, only: ex, ey, ez, bx, by, bz, jx, jy, jz + use shared_data, only: rhoold, rho + use constants, only: num + + integer, dimension(3), intent(in) :: exlo, exhi, eylo, eyhi, ezlo, ezhi, bxlo, bxhi, & + bylo, byhi, bzlo, bzhi, jxlo, jxhi, jylo, jyhi, jzlo, jzhi, r1lo, r1hi, r2lo, r2hi + REAL(num), INTENT(INOUT), TARGET :: ex_wrpx(0:exhi(1)-exlo(1),0:exhi(2)-exlo(2),0:exhi(3)-exlo(3)) + REAL(num), INTENT(INOUT), TARGET :: ey_wrpx(0:eyhi(1)-eylo(1),0:eyhi(2)-eylo(2),0:eyhi(3)-eylo(3)) + REAL(num), INTENT(INOUT), TARGET :: ez_wrpx(0:ezhi(1)-ezlo(1),0:ezhi(2)-ezlo(2),0:ezhi(3)-ezlo(3)) + REAL(num), INTENT(INOUT), TARGET :: bx_wrpx(0:bxhi(1)-bxlo(1),0:bxhi(2)-bxlo(2),0:bxhi(3)-bxlo(3)) + REAL(num), INTENT(INOUT), TARGET :: by_wrpx(0:byhi(1)-bylo(1),0:byhi(2)-bylo(2),0:byhi(3)-bylo(3)) + REAL(num), INTENT(INOUT), TARGET :: bz_wrpx(0:bzhi(1)-bzlo(1),0:bzhi(2)-bzlo(2),0:bzhi(3)-bzlo(3)) + REAL(num), INTENT(INOUT), TARGET :: jx_wrpx(0:jxhi(1)-jxlo(1),0:jxhi(2)-jxlo(2),0:jxhi(3)-jxlo(3)) + REAL(num), INTENT(INOUT), TARGET :: jy_wrpx(0:jyhi(1)-jylo(1),0:jyhi(2)-jylo(2),0:jyhi(3)-jylo(3)) + REAL(num), INTENT(INOUT), TARGET :: jz_wrpx(0:jzhi(1)-jzlo(1),0:jzhi(2)-jzlo(2),0:jzhi(3)-jzlo(3)) + REAL(num), INTENT(INOUT), TARGET :: rhoold_wrpx(0:r1hi(1)-r1lo(1),0:r1hi(2)-r1lo(2),0:r1hi(3)-r1lo(3)) + REAL(num), INTENT(INOUT), TARGET :: rho_wrpx(0:r2hi(1)-r2lo(1),0:r2hi(2)-r2lo(2),0:r2hi(3)-r2lo(3)) + + ! Point the fields in the PICSAR modules to the fields provided by WarpX + ex => ex_wrpx + ey => ey_wrpx + ez => ez_wrpx + bx => bx_wrpx + by => by_wrpx + bz => bz_wrpx + jx => jx_wrpx + jy => jy_wrpx + jz => jz_wrpx + rho => rho_wrpx + rhoold => rhoold_wrpx + + ! Call the corresponding PICSAR function + CALL push_psatd_ebfield() + + ex => null() + ey => null() + ez => null() + bx => null() + by => null() + bz => null() + jx => null() + jy => null() + jz => null() + rho => null() + rhoold => null() + end subroutine warpx_fft_push_eb + +end module warpx_fft_module diff --git a/Source/FieldSolver/openbc_poisson_solver.F90 b/Source/FieldSolver/openbc_poisson_solver.F90 new file mode 100644 index 000000000..c18b1db24 --- /dev/null +++ b/Source/FieldSolver/openbc_poisson_solver.F90 @@ -0,0 +1,62 @@ + +module warpx_openbc_module + + implicit none + + integer, parameter :: idecomp = 0 ! 0=xyz, 1=xy, 2=yz, 3=xz, 4=x, 5=y, 6=z. + integer, parameter :: igfflag = 1 ! =0 for ordinary 1/r Green function; + ! =1 for integrated Green function + + integer, save :: gb_lo(3), gb_hi(3) + integer, save :: lc_lo(3), lc_hi(3) + +contains + + subroutine warpx_openbc_decompose(glo, ghi, lo, hi) bind(c,name='warpx_openbc_decompose') + + use mpi + + integer, intent(in) :: glo(3), ghi(3) + integer, intent(out) :: lo(3), hi(3) + integer :: myrank, mprocs, ierr, npx, npy, npz + + call MPI_Comm_size(MPI_COMM_WORLD, mprocs, ierr); + call MPI_COMM_RANK(MPI_COMM_WORLD,myrank,ierr) + + gb_lo = glo + 1 ! +1 because AMReX's domain index starts with 0 + gb_hi = ghi + 2 ! +2 to nodalize it + + call procgriddecomp(mprocs, gb_lo(1),gb_hi(1), & + & gb_lo(2),gb_hi(2), & + & gb_lo(3),gb_hi(3), & + & idecomp, npx, npy, npz) + + call decompose(myrank,mprocs, gb_lo(1),gb_hi(1), & + & gb_lo(2),gb_hi(2), & + & gb_lo(3),gb_hi(3), & + idecomp, lc_lo(1), lc_hi(1), & + & lc_lo(2), lc_hi(2), & + & lc_lo(3), lc_hi(3)) + + lo = lc_lo - 1 ! AMReX's domain index starts with zero. + hi = lc_hi - 1 + + end subroutine warpx_openbc_decompose + + subroutine warpx_openbc_potential(rho, phi, dx) & + bind(C, name="warpx_openbc_potential") + + double precision, intent(in ) :: dx(3) + double precision, intent(in ) :: rho(lc_lo(1):lc_hi(1),lc_lo(2):lc_hi(2),lc_lo(3):lc_hi(3)) + double precision, intent(out) :: phi(lc_lo(1):lc_hi(1),lc_lo(2):lc_hi(2),lc_lo(3):lc_hi(3)) + + integer :: ierr + + call openbcpotential(rho, phi, dx(1), dx(2), dx(3), & + lc_lo(1), lc_hi(1), lc_lo(2), lc_hi(2), lc_lo(3), lc_hi(3), & + gb_lo(1), gb_hi(1), gb_lo(2), gb_hi(2), gb_lo(3), gb_hi(3), & + idecomp, igfflag, ierr) + + end subroutine warpx_openbc_potential + +end module warpx_openbc_module diff --git a/Source/FieldSolver/solve_E_nodal.F90 b/Source/FieldSolver/solve_E_nodal.F90 new file mode 100644 index 000000000..e5ac50b2a --- /dev/null +++ b/Source/FieldSolver/solve_E_nodal.F90 @@ -0,0 +1,73 @@ +module warpx_ES_solve_E_nodal + + use iso_c_binding + use amrex_fort_module, only : amrex_real + + implicit none + +contains + +! This routine computes the node-centered electric field given a node-centered phi. +! The gradient is computed using 2nd-order centered differences. It assumes the +! Boundary conditions have already been set and that you have two rows of ghost cells +! for phi and one row of ghost cells for Ex, Ey, and Ez. +! Note that this routine includes the minus sign in E = - grad phi. +! +! Arguments: +! lo, hi: The corners of the valid box over which the gradient is taken +! Ex, Ey, Ez: The electric field in the x, y, and z directions. +! dx: The cell spacing +! + subroutine warpx_compute_E_nodal_3d (lo, hi, phi, Ex, Ey, Ez, dx) & + bind(c,name='warpx_compute_E_nodal_3d') + integer(c_int), intent(in) :: lo(3), hi(3) + real(amrex_real), intent(in) :: dx(3) + real(amrex_real), intent(in ) :: phi(lo(1)-2:hi(1)+2,lo(2)-2:hi(2)+2,lo(3)-2:hi(3)+2) + real(amrex_real), intent(inout) :: Ex (lo(1)-1:hi(1)+1,lo(2)-1:hi(2)+1,lo(3)-1:hi(3)+1) + real(amrex_real), intent(inout) :: Ey (lo(1)-1:hi(1)+1,lo(2)-1:hi(2)+1,lo(3)-1:hi(3)+1) + real(amrex_real), intent(inout) :: Ez (lo(1)-1:hi(1)+1,lo(2)-1:hi(2)+1,lo(3)-1:hi(3)+1) + + integer :: i, j, k + real(amrex_real) :: fac(3) + + fac = 0.5d0 / dx + + do k = lo(3)-1, hi(3)+1 + do j = lo(2)-1, hi(2)+1 + do i = lo(1)-1, hi(1)+1 + + Ex(i,j,k) = fac(1) * (phi(i-1,j,k) - phi(i+1,j,k)) + Ey(i,j,k) = fac(2) * (phi(i,j-1,k) - phi(i,j+1,k)) + Ez(i,j,k) = fac(3) * (phi(i,j,k-1) - phi(i,j,k+1)) + + end do + end do + end do + + end subroutine warpx_compute_E_nodal_3d + + subroutine warpx_compute_E_nodal_2d (lo, hi, phi, Ex, Ey, dx) & + bind(c,name='warpx_compute_E_nodal_2d') + integer(c_int), intent(in) :: lo(2), hi(2) + real(amrex_real), intent(in) :: dx(2) + real(amrex_real), intent(in ) :: phi(lo(1)-2:hi(1)+2,lo(2)-2:hi(2)+2) + real(amrex_real), intent(inout) :: Ex (lo(1)-1:hi(1)+1,lo(2)-1:hi(2)+1) + real(amrex_real), intent(inout) :: Ey (lo(1)-1:hi(1)+1,lo(2)-1:hi(2)+1) + + integer :: i, j + real(amrex_real) :: fac(2) + + fac = 0.5d0 / dx + + do j = lo(2)-1, hi(2)+1 + do i = lo(1)-1, hi(1)+1 + + Ex(i,j) = fac(1) * (phi(i-1,j) - phi(i+1,j)) + Ey(i,j) = fac(2) * (phi(i,j-1) - phi(i,j+1)) + + end do + end do + + end subroutine warpx_compute_E_nodal_2d + +end module warpx_ES_solve_E_nodal |