diff options
Diffstat (limited to 'Source')
-rw-r--r-- | Source/FieldSolver/Make.package | 5 | ||||
-rw-r--r-- | Source/FieldSolver/PicsarHybridSpectralSolver/Make.package | 6 | ||||
-rw-r--r-- | Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridFFTData.H | 32 | ||||
-rw-r--r-- | Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridSpectralSolver.cpp (renamed from Source/FieldSolver/WarpXFFT.cpp) | 54 | ||||
-rw-r--r-- | Source/FieldSolver/PicsarHybridSpectralSolver/picsar_hybrid_spectral.F90 (renamed from Source/FieldSolver/WarpX_fft.F90) | 0 | ||||
-rw-r--r-- | Source/FieldSolver/WarpXPushFieldsEM.cpp | 48 | ||||
-rw-r--r-- | Source/Initialization/WarpXInitData.cpp | 2 | ||||
-rw-r--r-- | Source/Make.WarpX | 25 | ||||
-rw-r--r-- | Source/Particles/Deposition/CurrentDeposition.H | 233 | ||||
-rw-r--r-- | Source/Particles/WarpXParticleContainer.cpp | 50 | ||||
-rw-r--r-- | Source/WarpX.H | 64 | ||||
-rw-r--r-- | Source/WarpX.cpp | 19 |
12 files changed, 403 insertions, 135 deletions
diff --git a/Source/FieldSolver/Make.package b/Source/FieldSolver/Make.package index 7eea52ee7..25e21c146 100644 --- a/Source/FieldSolver/Make.package +++ b/Source/FieldSolver/Make.package @@ -2,9 +2,10 @@ CEXE_headers += WarpX_K.H CEXE_headers += WarpX_FDTD.H CEXE_sources += WarpXPushFieldsEM.cpp ifeq ($(USE_PSATD),TRUE) - CEXE_sources += WarpXFFT.cpp - F90EXE_sources += WarpX_fft.F90 include $(WARPX_HOME)/Source/FieldSolver/SpectralSolver/Make.package + ifeq ($(USE_CUDA),FALSE) + include $(WARPX_HOME)/Source/FieldSolver/PicsarHybridSpectralSolver/Make.package + endif endif ifeq ($(USE_OPENBC_POISSON),TRUE) F90EXE_sources += openbc_poisson_solver.F90 diff --git a/Source/FieldSolver/PicsarHybridSpectralSolver/Make.package b/Source/FieldSolver/PicsarHybridSpectralSolver/Make.package new file mode 100644 index 000000000..e185525e6 --- /dev/null +++ b/Source/FieldSolver/PicsarHybridSpectralSolver/Make.package @@ -0,0 +1,6 @@ +CEXE_headers += PicsarHybridFFTData.H +CEXE_sources += PicsarHybridSpectralSolver.cpp +F90EXE_sources += picsar_hybrid_spectral.F90 + +INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/FieldSolver/PicsarHybridSpectralSolver +VPATH_LOCATIONS += $(WARPX_HOME)/Source/FieldSolver/PicsarHybridSpectralSolver
\ No newline at end of file diff --git a/Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridFFTData.H b/Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridFFTData.H new file mode 100644 index 000000000..4e97ab675 --- /dev/null +++ b/Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridFFTData.H @@ -0,0 +1,32 @@ +#ifndef WARPX_PICSAR_HYBRID_FFTDATA_H_ +#define WARPX_PICSAR_HYBRID_FFTDATA_H_ + +// FFTData is a stuct containing a 22 pointers to auxiliary arrays +// 1-11: padded arrays in real space (required by FFTW); 12-22: arrays in spectral space +struct FFTData +{ + static constexpr int N = 22; + void* data[N] = { nullptr }; + + ~FFTData () { + for (int i = 0; i < N; ++i) { // The memory is allocated with fftw_alloc. + fftw_free(data[i]); + data[i] = nullptr; + } + } + + FFTData () = default; + + FFTData (FFTData && rhs) noexcept { + for (int i = 0; i < N; ++i) { + data[i] = rhs.data[i]; + rhs.data[i] = nullptr; + } + } + + FFTData (FFTData const&) = delete; + void operator= (FFTData const&) = delete; + void operator= (FFTData&&) = delete; +}; + +#endif // WARPX_PICSAR_HYBRID_FFTDATA_H_
\ No newline at end of file diff --git a/Source/FieldSolver/WarpXFFT.cpp b/Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridSpectralSolver.cpp index f9cd7570a..26c93086a 100644 --- a/Source/FieldSolver/WarpXFFT.cpp +++ b/Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridSpectralSolver.cpp @@ -5,10 +5,10 @@ using namespace amrex; -constexpr int WarpX::FFTData::N; +constexpr int FFTData::N; namespace { -static std::unique_ptr<WarpX::FFTData> nullfftdata; // This for process with nz_fft=0 +static std::unique_ptr<FFTData> nullfftdata; // This for process with nz_fft=0 /** \brief Returns an "owner mask" which 1 for all cells, except * for the duplicated (physical) cells of a nodal grid. @@ -122,6 +122,8 @@ WarpX::AllocLevelDataFFT (int lev) static_assert(std::is_standard_layout<FFTData>::value, "FFTData must have standard layout"); static_assert(sizeof(FFTData) == sizeof(void*)*FFTData::N, "sizeof FFTData is wrong"); + + InitFFTComm(lev); BoxArray ba_fp_fft; @@ -366,52 +368,8 @@ WarpX::FreeFFT (int lev) } void -WarpX::PushPSATD (amrex::Real a_dt) -{ - for (int lev = 0; lev <= finest_level; ++lev) { - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(dt[lev] == a_dt, "dt must be consistent"); - if (fft_hybrid_mpi_decomposition){ - PushPSATD_hybridFFT(lev, a_dt); - } else { - PushPSATD_localFFT(lev, a_dt); - } - } -} - -void WarpX::PushPSATD_localFFT (int lev, amrex::Real /* dt */) -{ - auto& solver = *spectral_solver_fp[lev]; - - // Perform forward Fourier transform - solver.ForwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex); - solver.ForwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey); - solver.ForwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez); - solver.ForwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx); - solver.ForwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By); - solver.ForwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz); - solver.ForwardTransform(*current_fp[lev][0], SpectralFieldIndex::Jx); - solver.ForwardTransform(*current_fp[lev][1], SpectralFieldIndex::Jy); - solver.ForwardTransform(*current_fp[lev][2], SpectralFieldIndex::Jz); - solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_old, 0); - solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_new, 1); - - // Advance fields in spectral space - solver.pushSpectralFields(); - - // Perform backward Fourier Transform - solver.BackwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex); - solver.BackwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey); - solver.BackwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez); - solver.BackwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx); - solver.BackwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By); - solver.BackwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz); -} - -void WarpX::PushPSATD_hybridFFT (int lev, amrex::Real /* dt */) { -#ifndef AMREX_USE_CUDA // Running on CPU ; use PICSAR code for the hybrid FFT - BL_PROFILE_VAR_NS("WarpXFFT::CopyDualGrid", blp_copy); BL_PROFILE_VAR_NS("PICSAR::FftPushEB", blp_push_eb); @@ -486,8 +444,4 @@ WarpX::PushPSATD_hybridFFT (int lev, amrex::Real /* dt */) { amrex::Abort("WarpX::PushPSATD: TODO"); } -#else // AMREX_USE_CUDA is defined ; running on GPU - amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU."); -#endif - } diff --git a/Source/FieldSolver/WarpX_fft.F90 b/Source/FieldSolver/PicsarHybridSpectralSolver/picsar_hybrid_spectral.F90 index 35357a63f..35357a63f 100644 --- a/Source/FieldSolver/WarpX_fft.F90 +++ b/Source/FieldSolver/PicsarHybridSpectralSolver/picsar_hybrid_spectral.F90 diff --git a/Source/FieldSolver/WarpXPushFieldsEM.cpp b/Source/FieldSolver/WarpXPushFieldsEM.cpp index c53e13f8f..bea008598 100644 --- a/Source/FieldSolver/WarpXPushFieldsEM.cpp +++ b/Source/FieldSolver/WarpXPushFieldsEM.cpp @@ -16,6 +16,54 @@ using namespace amrex; +#ifdef WARPX_USE_PSATD + +void +WarpX::PushPSATD (amrex::Real a_dt) +{ + for (int lev = 0; lev <= finest_level; ++lev) { + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(dt[lev] == a_dt, "dt must be consistent"); + if (fft_hybrid_mpi_decomposition){ +#ifndef AMREX_USE_CUDA // Only available on CPU + PushPSATD_hybridFFT(lev, a_dt); +#endif + } else { + PushPSATD_localFFT(lev, a_dt); + } + } +} + +void WarpX::PushPSATD_localFFT (int lev, amrex::Real /* dt */) +{ + auto& solver = *spectral_solver_fp[lev]; + + // Perform forward Fourier transform + solver.ForwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex); + solver.ForwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey); + solver.ForwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez); + solver.ForwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx); + solver.ForwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By); + solver.ForwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz); + solver.ForwardTransform(*current_fp[lev][0], SpectralFieldIndex::Jx); + solver.ForwardTransform(*current_fp[lev][1], SpectralFieldIndex::Jy); + solver.ForwardTransform(*current_fp[lev][2], SpectralFieldIndex::Jz); + solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_old, 0); + solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_new, 1); + + // Advance fields in spectral space + solver.pushSpectralFields(); + + // Perform backward Fourier Transform + solver.BackwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex); + solver.BackwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey); + solver.BackwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez); + solver.BackwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx); + solver.BackwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By); + solver.BackwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz); +} + +#endif + void WarpX::EvolveB (Real a_dt) { diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp index d583b2b0f..2442e0205 100644 --- a/Source/Initialization/WarpXInitData.cpp +++ b/Source/Initialization/WarpXInitData.cpp @@ -317,7 +317,7 @@ WarpX::InitLevelData (int lev, Real time) } } -#ifdef WARPX_USE_PSATD +#ifdef WARPX_USE_PSATD_HYBRID void WarpX::InitLevelDataFFT (int lev, Real time) diff --git a/Source/Make.WarpX b/Source/Make.WarpX index bc837c797..3060ae8f0 100644 --- a/Source/Make.WarpX +++ b/Source/Make.WarpX @@ -111,20 +111,23 @@ endif ifeq ($(USE_PSATD),TRUE) - FFTW_HOME ?= NOT_SET - ifneq ($(FFTW_HOME),NOT_SET) - VPATH_LOCATIONS += $(FFTW_HOME)/include - INCLUDE_LOCATIONS += $(FFTW_HOME)/include - LIBRARY_LOCATIONS += $(FFTW_HOME)/lib - endif USERSuffix := $(USERSuffix).PSATD DEFINES += -DWARPX_USE_PSATD - ifeq ($(USE_CUDA),TRUE) - DEFINES += -DFFTW -DCUDA_FFT=1 # PICSAR uses it - LIBRARIES += -lfftw3_mpi -lfftw3 -lfftw3_threads -lcufft + ifeq ($(USE_CUDA),FALSE) # Running on CPU + # Use FFTW + LIBRARIES += -lfftw3_mpi -lfftw3 -lfftw3_threads + FFTW_HOME ?= NOT_SET + ifneq ($(FFTW_HOME),NOT_SET) + VPATH_LOCATIONS += $(FFTW_HOME)/include + INCLUDE_LOCATIONS += $(FFTW_HOME)/include + LIBRARY_LOCATIONS += $(FFTW_HOME)/lib + endif + # Compile with PICSAR's Hybrid-MPI PSATD + DEFINES += -DWARPX_USE_PSATD_HYBRID + DEFINES += -DFFTW # PICSAR uses it else - DEFINES += -DFFTW # PICSAR uses it - LIBRARIES += -lfftw3_mpi -lfftw3 -lfftw3_threads + # Use cuFFT + LIBRARIES += -lcufft endif endif diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index efda97892..97bc53c20 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -3,12 +3,12 @@ using namespace amrex; -// Compute shape factor and return index of leftmost cell where +// Compute shape factor and return index of leftmost cell where // particle writes. // Specialized templates are defined below for orders 1, 2 and 3. template <int depos_order> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shape_factor(Real* const sx, Real xint) {return 0;}; +int compute_shape_factor(Real* const sx, Real xint); // Compute shape factor for order 1. template <> @@ -176,4 +176,233 @@ void doDepositionShapeN(const Real * const xp, const Real * const yp, const Real ); } +// Compute shape factor and return index of leftmost cell where +// particle writes. +// Specialized templates are defined below for orders 1, 2 and 3. +template <int depos_order> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shifted_shape_factor (Real* const sx, const Real x_old, const int i_new); + +// Compute shape factor for order 1. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shifted_shape_factor <1> (Real* const sx, const Real x_old, const int i_new){ + const int i = (int) x_old; + const int i_shift = i - i_new; + const Real xint = x_old - i; + sx[1+i_shift] = 1.0 - xint; + sx[2+i_shift] = xint; + return i; +} + +// Compute shape factor for order 2. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shifted_shape_factor <2> (Real* const sx, const Real x_old, const int i_new){ + const int i = (int) (x_old+0.5); + const int i_shift = i - (i_new + 1); + const Real xint = x_old - i; + sx[1+i_shift] = 0.5*(0.5-xint)*(0.5-xint); + sx[2+i_shift] = 0.75-xint*xint; + sx[3+i_shift] = 0.5*(0.5+xint)*(0.5+xint); + // index of the leftmost cell where particle deposits + return i-1; +} + +// Compute shape factor for order 3. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shifted_shape_factor <3> (Real* const sx, const Real x_old, const int i_new){ + const int i = (int) x_old; + const int i_shift = i - (i_new + 1); + const Real xint = x_old - i; + sx[1+i_shift] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); + sx[2+i_shift] = 2.0/3.0-xint*xint*(1-xint/2.0); + sx[3+i_shift] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); + sx[4+i_shift] = 1.0/6.0*xint*xint*xint; + // index of the leftmost cell where particle deposits + return i-1; +} + +/* \brief Esirkepov Current Deposition for thread thread_num + * /param xp, yp, zp : Pointer to arrays of particle positions. + * \param wp : Pointer to array of particle weights. + * \param uxp uyp uzp : Pointer to arrays of particle momentum. + * \param Jx_arr : Array4 of current density, either full array or tile. + * \param Jy_arr : Array4 of current density, either full array or tile. + * \param Jz_arr : Array4 of current density, either full array or tile. + * \param np_to_depose : Number of particles for which current is deposited. + * \param dt : Time step for particle level + * \param dx : 3D cell size + * \param xyzmin : Physical lower bounds of domain. + * \param lo : Index lower bounds of domain. + * /param q : species charge. + */ +template <int depos_order> +void doEsirkepovDepositionShapeN (const Real * const xp, const Real * const yp, const Real * const zp, + const Real * const wp, const Real * const uxp, + const Real * const uyp, const Real * const uzp, + const amrex::Array4<amrex::Real>& Jx_arr, + const amrex::Array4<amrex::Real>& Jy_arr, + const amrex::Array4<amrex::Real>& Jz_arr, + const long np_to_depose, + const amrex::Real dt, const std::array<amrex::Real,3>& dx, + const std::array<Real, 3> xyzmin, + const Dim3 lo, + const amrex::Real q) +{ + const Real dxi = 1.0/dx[0]; + const Real dtsdx0 = dt*dxi; + const Real xmin = xyzmin[0]; +#if (AMREX_SPACEDIM == 3) + const Real dyi = 1.0/dx[1]; + const Real dtsdy0 = dt*dyi; + const Real ymin = xyzmin[1]; +#endif + const Real dzi = 1.0/dx[2]; + const Real dtsdz0 = dt*dzi; + const Real zmin = xyzmin[2]; + +#if (AMREX_SPACEDIM == 3) + const Real invdtdx = 1.0/(dt*dx[1]*dx[2]); + const Real invdtdy = 1.0/(dt*dx[0]*dx[2]); + const Real invdtdz = 1.0/(dt*dx[0]*dx[1]); +#elif (AMREX_SPACEDIM == 2) + const Real invdtdx = 1.0/(dt*dx[2]); + const Real invdtdz = 1.0/(dt*dx[0]); + const Real invvol = 1.0/(dx[0]*dx[2]); +#endif + + const Real clightsq = 1.0/PhysConst::c/PhysConst::c; + + // Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr + ParallelFor( np_to_depose, + [=] AMREX_GPU_DEVICE (long ip) { + + // --- Get particle quantities + const Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + + // wqx, wqy wqz are particle current in each direction + const Real wq = q*wp[ip]; + const Real wqx = wq*invdtdx; +#if (AMREX_SPACEDIM == 3) + const Real wqy = wq*invdtdy; +#endif + const Real wqz = wq*invdtdz; + + // computes current and old position in grid units + const Real x_new = (xp[ip] - xmin)*dxi; + const Real x_old = x_new - dtsdx0*uxp[ip]*gaminv; +#if (AMREX_SPACEDIM == 3) + const Real y_new = (yp[ip] - ymin)*dyi; + const Real y_old = y_new - dtsdy0*uyp[ip]*gaminv; +#endif + const Real z_new = (zp[ip] - zmin)*dzi; + const Real z_old = z_new - dtsdz0*uzp[ip]*gaminv; + + // Shape factor arrays + // Note that there are extra values above and below + // to possibly hold the factor for the old particle + // which can be at a different grid location. + Real AMREX_RESTRICT sx_new[depos_order + 3] = {0.}; + Real AMREX_RESTRICT sx_old[depos_order + 3] = {0.}; +#if (AMREX_SPACEDIM == 3) + Real AMREX_RESTRICT sy_new[depos_order + 3] = {0.}; + Real AMREX_RESTRICT sy_old[depos_order + 3] = {0.}; +#endif + Real AMREX_RESTRICT sz_new[depos_order + 3] = {0.}; + Real AMREX_RESTRICT sz_old[depos_order + 3] = {0.}; + + // --- Compute shape factors + // Compute shape factors for position as they are now and at old positions + // [ijk]_new: leftmost grid point that the particle touches + const int i_new = compute_shape_factor<depos_order>(sx_new+1, x_new); + const int i_old = compute_shifted_shape_factor<depos_order>(sx_old, x_old, i_new); +#if (AMREX_SPACEDIM == 3) + const int j_new = compute_shape_factor<depos_order>(sy_new+1, y_new); + const int j_old = compute_shifted_shape_factor<depos_order>(sy_old, y_old, j_new); +#endif + const int k_new = compute_shape_factor<depos_order>(sz_new+1, z_new); + const int k_old = compute_shifted_shape_factor<depos_order>(sz_old, z_old, k_new); + + // computes min/max positions of current contributions + int dil = 1, diu = 1; + if (i_old < i_new) dil = 0; + if (i_old > i_new) diu = 0; +#if (AMREX_SPACEDIM == 3) + int djl = 1, dju = 1; + if (j_old < j_new) djl = 0; + if (j_old > j_new) dju = 0; +#endif + int dkl = 1, dku = 1; + if (k_old < k_new) dkl = 0; + if (k_old > k_new) dku = 0; + +#if (AMREX_SPACEDIM == 3) + + for (int k=dkl; k<=depos_order+2-dku; k++) { + for (int j=djl; j<=depos_order+2-dju; j++) { + Real sdxi = 0.; + for (int i=dil; i<=depos_order+1-diu; i++) { + sdxi += wqx*(sx_old[i] - sx_new[i])*((sy_new[j] + 0.5*(sy_old[j] - sy_new[j]))*sz_new[k] + + (0.5*sy_new[j] + 1./3.*(sy_old[j] - sy_new[j]))*(sz_old[k] - sz_new[k])); + amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi); + } + } + } + for (int k=dkl; k<=depos_order+2-dku; k++) { + for (int i=dil; i<=depos_order+2-diu; i++) { + Real sdyj = 0.; + for (int j=djl; j<=depos_order+1-dju; j++) { + sdyj += wqy*(sy_old[j] - sy_new[j])*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] + + (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i])); + amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj); + } + } + } + for (int j=djl; j<=depos_order+2-dju; j++) { + for (int i=dil; i<=depos_order+2-diu; i++) { + Real sdzk = 0.; + for (int k=dkl; k<=depos_order+1-dku; k++) { + sdzk += wqz*(sz_old[k] - sz_new[k])*((sx_new[i] + 0.5*(sx_old[i] - sx_new[i]))*sy_new[j] + + (0.5*sx_new[i] + 1./3.*(sx_old[i] - sx_new[i]))*(sy_old[j] - sy_new[j])); + amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk); + } + } + } + +#elif (AMREX_SPACEDIM == 2) + + for (int k=dkl; k<=depos_order+2-dku; k++) { + Real sdxi = 0.; + for (int i=dil; i<=depos_order+1-diu; i++) { + sdxi += wqx*(sx_old[i] - sx_new[i])*(sz_new[k] + 0.5*(sz_old[k] - sz_new[k])); + amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdxi); + } + } + for (int k=dkl; k<=depos_order+2-dku; k++) { + for (int i=dil; i<=depos_order+2-diu; i++) { + const Real sdyj = wq*uyp[ip]*gaminv*invvol*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] + + (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i])); + amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdyj); + } + } + for (int i=dil; i<=depos_order+2-diu; i++) { + Real sdzk = 0.; + for (int k=dkl; k<=depos_order+1-dku; k++) { + sdzk += wqz*(sz_old[k] - sz_new[k])*(sx_new[i] + 0.5*(sx_old[i] - sx_new[i])); + amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdzk); + } + } + +#endif + } + ); + + + +} + #endif // CURRENTDEPOSITION_H_ diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 7316dcc95..a20f0035e 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -6,6 +6,7 @@ #include <AMReX_AmrParGDB.H> #include <WarpX_f.H> #include <WarpX.H> +#include <WarpXAlgorithmSelection.H> // Import low-level single-particle kernels #include <GetAndSetPosition.H> @@ -510,21 +511,40 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, // Better for memory? worth trying? const Dim3 lo = lbound(tilebox); - if (WarpX::nox == 1){ - doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, - xyzmin, lo, stagger_shift, q); - } else if (WarpX::nox == 2){ - doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, - xyzmin, lo, stagger_shift, q); - } else if (WarpX::nox == 3){ - doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, - xyzmin, lo, stagger_shift, q); + if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) { + if (WarpX::nox == 1){ + doEsirkepovDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, np_to_depose, dt, dx, + xyzmin, lo, q); + } else if (WarpX::nox == 2){ + doEsirkepovDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, np_to_depose, dt, dx, + xyzmin, lo, q); + } else if (WarpX::nox == 3){ + doEsirkepovDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, np_to_depose, dt, dx, + xyzmin, lo, q); + } + } else { + if (WarpX::nox == 1){ + doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } else if (WarpX::nox == 2){ + doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } else if (WarpX::nox == 3){ + doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } } #ifndef AMREX_USE_GPU diff --git a/Source/WarpX.H b/Source/WarpX.H index ca7596589..35de5c88d 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -26,9 +26,11 @@ #include <NCIGodfreyFilter.H> #ifdef WARPX_USE_PSATD -#include <fftw3.h> #include <SpectralSolver.H> #endif +#ifdef WARPX_USE_PSATD_HYBRID +#include <PicsarHybridFFTData.H> +#endif #if defined(BL_USE_SENSEI_INSITU) namespace amrex { @@ -578,7 +580,7 @@ private: amrex::Vector<std::array< std::unique_ptr<amrex::MultiFab>, 3 > > Efield_slice; amrex::Vector<std::array< std::unique_ptr<amrex::MultiFab>, 3 > > Bfield_slice; -#ifdef WARPX_USE_PSATD +#ifdef WARPX_USE_PSATD_HYBRID // Store fields in real space on the dual grid (i.e. the grid for the FFT push of the fields) // This includes data for the FFT guard cells (between FFT groups) amrex::Vector<std::array< std::unique_ptr<amrex::MultiFab>, 3 > > Efield_fp_fft; @@ -590,44 +592,30 @@ private: amrex::Vector<std::array< std::unique_ptr<amrex::MultiFab>, 3 > > Bfield_cp_fft; amrex::Vector<std::array< std::unique_ptr<amrex::MultiFab>, 3 > > current_cp_fft; amrex::Vector< std::unique_ptr<amrex::MultiFab> > rho_cp_fft; +#endif -public: - // FFTData is a stuct containing a 22 pointers to auxiliary arrays - // 1-11: padded arrays in real space (required by FFTW); 12-22: arrays in spectral space - struct FFTData - { - static constexpr int N = 22; - void* data[N] = { nullptr }; - - ~FFTData () { - for (int i = 0; i < N; ++i) { // The memory is allocated with fftw_alloc. - fftw_free(data[i]); - data[i] = nullptr; - } - } - - FFTData () = default; +#ifdef WARPX_USE_PSATD +private: + void EvolvePSATD (int numsteps); + void PushPSATD (amrex::Real dt); + void PushPSATD_localFFT (int lev, amrex::Real dt); - FFTData (FFTData && rhs) noexcept { - for (int i = 0; i < N; ++i) { - data[i] = rhs.data[i]; - rhs.data[i] = nullptr; - } - } + bool fft_hybrid_mpi_decomposition = false; + int ngroups_fft = 4; + int fftw_plan_measure = 1; + int nox_fft = 16; + int noy_fft = 16; + int noz_fft = 16; - FFTData (FFTData const&) = delete; - void operator= (FFTData const&) = delete; - void operator= (FFTData&&) = delete; - }; + amrex::Vector<std::unique_ptr<SpectralSolver>> spectral_solver_fp; + amrex::Vector<std::unique_ptr<SpectralSolver>> spectral_solver_cp; +#endif +#ifdef WARPX_USE_PSATD_HYBRID private: - amrex::Vector<std::unique_ptr<amrex::LayoutData<FFTData> > > dataptr_fp_fft; amrex::Vector<std::unique_ptr<amrex::LayoutData<FFTData> > > dataptr_cp_fft; - amrex::Vector<std::unique_ptr<SpectralSolver>> spectral_solver_fp; - amrex::Vector<std::unique_ptr<SpectralSolver>> spectral_solver_cp; - // Domain decomposition containing the valid part of the dual grids (i.e. without FFT guard cells) amrex::Vector<amrex::BoxArray> ba_valid_fp_fft; amrex::Vector<amrex::BoxArray> ba_valid_cp_fft; @@ -638,13 +626,6 @@ private: amrex::Vector<MPI_Comm> comm_fft; amrex::Vector<int> color_fft; - bool fft_hybrid_mpi_decomposition = false; - int ngroups_fft = 4; - int fftw_plan_measure = 1; - int nox_fft = 16; - int noy_fft = 16; - int noz_fft = 16; - void AllocLevelDataFFT (int lev); void InitLevelDataFFT (int lev, amrex::Real time); @@ -654,12 +635,7 @@ private: const amrex::Box& domain); void InitFFTDataPlan (int lev); void FreeFFT (int lev); - - void EvolvePSATD (int numsteps); - void PushPSATD (amrex::Real dt); - void PushPSATD_localFFT (int lev, amrex::Real dt); void PushPSATD_hybridFFT (int lev, amrex::Real dt); - #endif #if defined(BL_USE_SENSEI_INSITU) diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 08948227c..1c0d01ce5 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -202,6 +202,10 @@ WarpX::WarpX () costs.resize(nlevs_max); #ifdef WARPX_USE_PSATD + spectral_solver_fp.resize(nlevs_max); + spectral_solver_cp.resize(nlevs_max); +#endif +#ifdef WARPX_USE_PSATD_HYBRID Efield_fp_fft.resize(nlevs_max); Bfield_fp_fft.resize(nlevs_max); current_fp_fft.resize(nlevs_max); @@ -215,9 +219,6 @@ WarpX::WarpX () dataptr_fp_fft.resize(nlevs_max); dataptr_cp_fft.resize(nlevs_max); - spectral_solver_fp.resize(nlevs_max); - spectral_solver_cp.resize(nlevs_max); - ba_valid_fp_fft.resize(nlevs_max); ba_valid_cp_fft.resize(nlevs_max); @@ -492,10 +493,6 @@ WarpX::ReadParameters () pp.query("use_picsar_deposition", use_picsar_deposition); #endif current_deposition_algo = GetAlgorithmInteger(pp, "current_deposition"); - if (!use_picsar_deposition){ - AMREX_ALWAYS_ASSERT_WITH_MESSAGE( current_deposition_algo >= 2, - "if not use_picsar_deposition, cannot use Esirkepov deposition."); - } charge_deposition_algo = GetAlgorithmInteger(pp, "charge_deposition"); field_gathering_algo = GetAlgorithmInteger(pp, "field_gathering"); particle_pusher_algo = GetAlgorithmInteger(pp, "particle_pusher"); @@ -511,8 +508,6 @@ WarpX::ReadParameters () pp.query("nox", nox_fft); pp.query("noy", noy_fft); pp.query("noz", noz_fft); - // Override value - if (fft_hybrid_mpi_decomposition==false) ngroups_fft=ParallelDescriptor::NProcs(); } #endif @@ -568,8 +563,12 @@ WarpX::MakeNewLevelFromScratch (int lev, Real time, const BoxArray& new_grids, #ifdef WARPX_USE_PSATD if (fft_hybrid_mpi_decomposition){ +#ifdef WARPX_USE_PSATD_HYBRID AllocLevelDataFFT(lev); InitLevelDataFFT(lev, time); +#else + amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU."); +#endif } #endif } @@ -614,7 +613,7 @@ WarpX::ClearLevel (int lev) costs[lev].reset(); -#ifdef WARPX_USE_PSATD +#ifdef WARPX_USE_PSATD_HYBRID for (int i = 0; i < 3; ++i) { Efield_fp_fft[lev][i].reset(); Bfield_fp_fft[lev][i].reset(); |