diff options
Diffstat (limited to 'Source')
-rw-r--r-- | Source/BoundaryConditions/WarpX_PML_kernels.H | 5 | ||||
-rw-r--r-- | Source/Evolve/WarpXEvolveEM.cpp | 12 | ||||
-rw-r--r-- | Source/FortranInterface/WarpX_f.F90 | 59 | ||||
-rw-r--r-- | Source/FortranInterface/WarpX_f.H | 9 | ||||
-rw-r--r-- | Source/Make.WarpX | 18 | ||||
-rw-r--r-- | Source/Parallelization/InterpolateDensityFineToCoarse.H | 120 | ||||
-rw-r--r-- | Source/Parallelization/WarpXComm.cpp | 35 | ||||
-rw-r--r-- | Source/Particles/Gather/FieldGather.H | 7 | ||||
-rw-r--r-- | Source/Particles/PhotonParticleContainer.H | 2 | ||||
-rw-r--r-- | Source/Particles/PhysicalParticleContainer.H | 10 | ||||
-rw-r--r-- | Source/Particles/PhysicalParticleContainer.cpp | 75 | ||||
-rw-r--r-- | Source/Particles/Sorting/Make.package | 1 | ||||
-rw-r--r-- | Source/Particles/Sorting/Partition.cpp | 108 | ||||
-rw-r--r-- | Source/Particles/Sorting/SortingUtils.H | 164 | ||||
-rw-r--r-- | Source/Particles/WarpXParticleContainer.H | 8 | ||||
-rw-r--r-- | Source/Particles/WarpXParticleContainer.cpp | 39 | ||||
-rw-r--r-- | Source/Python/WarpXWrappers.cpp | 73 | ||||
-rw-r--r-- | Source/Python/WarpXWrappers.h | 50 | ||||
-rw-r--r-- | Source/WarpX.H | 8 | ||||
-rw-r--r-- | Source/WarpX.cpp | 9 |
20 files changed, 526 insertions, 286 deletions
diff --git a/Source/BoundaryConditions/WarpX_PML_kernels.H b/Source/BoundaryConditions/WarpX_PML_kernels.H index 8f779a5c2..89fdb4911 100644 --- a/Source/BoundaryConditions/WarpX_PML_kernels.H +++ b/Source/BoundaryConditions/WarpX_PML_kernels.H @@ -85,7 +85,8 @@ void warpx_push_pml_bx_ckc(int i, int j, int k, Array4<Real> const&Bx, - Ez(i+1,j ,k-1,0) - Ez(i+1,j ,k-1,1) - Ez(i+1,j ,k-1,2) + Ez(i-1,j+1,k-1,0) + Ez(i-1,j+1,k-1,1) + Ez(i-1,j+1,k-1,2) - Ez(i-1,j ,k-1,0) - Ez(i-1,j ,k-1,1) - Ez(i-1,j ,k-1,2)); - Bx(i,j,k,1) += alphaz*(Ey(i ,j ,k+1,0) + Ey(i ,j ,k+1,1) + Ez(i ,j ,k+1,2) + + Bx(i,j,k,1) += alphaz*(Ey(i ,j ,k+1,0) + Ey(i ,j ,k+1,1) + Ey(i ,j ,k+1,2) - Ey(i ,j ,k ,0) - Ey(i ,j ,k ,1) - Ey(i ,j ,k ,2)) +betazx*(Ey(i+1,j ,k+1,0) + Ey(i+1,j ,k+1,1) + Ey(i+1,j ,k+1,2) - Ey(i+1,j ,k ,0) - Ey(i+1,j ,k ,1) - Ey(i+1,j ,k ,2) @@ -101,7 +102,7 @@ void warpx_push_pml_bx_ckc(int i, int j, int k, Array4<Real> const&Bx, - Ey(i-1,j+1,k ,0) - Ey(i-1,j+1,k ,1) - Ey(i-1,j+1,k ,2) + Ey(i+1,j-1,k+1,0) + Ey(i+1,j-1,k+1,1) + Ey(i+1,j-1,k+1,2) - Ey(i+1,j-1,k ,0) - Ey(i+1,j-1,k ,1) - Ey(i+1,j-1,k ,2) - + Ey(i-1,j-1,k+1,0) + Ey(i-1,j-1,k+1,1) + Ey(i-1,j-1,k+1,1) + + Ey(i-1,j-1,k+1,0) + Ey(i-1,j-1,k+1,1) + Ey(i-1,j-1,k+1,2) - Ey(i-1,j-1,k ,0) - Ey(i-1,j-1,k ,1) - Ey(i-1,j-1,k ,2)); #else diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp index a517a91f5..1e4aa4d5c 100644 --- a/Source/Evolve/WarpXEvolveEM.cpp +++ b/Source/Evolve/WarpXEvolveEM.cpp @@ -562,6 +562,18 @@ WarpX::ComputeDt () if (do_electrostatic) { dt[0] = const_dt; } + + for (int lev=0; lev <= max_level; lev++) { + const Real* dx_lev = geom[lev].CellSize(); + Print()<<"Level "<<lev<<": dt = "<<dt[lev] + <<" ; dx = "<<dx_lev[0] +#if (defined WARPX_DIM_XZ) || (defined WARPX_DIM_RZ) + <<" ; dz = "<<dx_lev[1]<<'\n'; +#elif (defined WARPX_DIM_3D) + <<" ; dy = "<<dx_lev[1] + <<" ; dz = "<<dx_lev[2]<<'\n'; +#endif + } } /* \brief computes max_step for wakefield simulation in boosted frame. diff --git a/Source/FortranInterface/WarpX_f.F90 b/Source/FortranInterface/WarpX_f.F90 index 2ddf9409d..762ed2cd8 100644 --- a/Source/FortranInterface/WarpX_f.F90 +++ b/Source/FortranInterface/WarpX_f.F90 @@ -51,65 +51,6 @@ contains end subroutine warpx_compute_E - subroutine warpx_sync_rho_2d (lo, hi, crse, clo, chi, fine, flo, fhi, nc) & - bind(c, name='warpx_sync_rho_2d') - integer, intent(in) :: lo(2), hi(2), flo(2), fhi(2), clo(2), chi(2), nc - real(amrex_real), intent(in ) :: fine(flo(1):fhi(1),flo(2):fhi(2),nc) - real(amrex_real), intent(inout) :: crse(clo(1):chi(1),clo(2):chi(2),nc) - - integer :: i,j,ii,jj,m - - do m = 1, nc - do j = lo(2), hi(2) - jj = j*2 - do i = lo(1), hi(1) - ii = i*2 - crse(i,j,m) = 0.25d0 * & - ( fine(ii,jj,m) + 0.5d0 *(fine(ii-1,jj ,m)+fine(ii+1,jj ,m) & - & + fine(ii ,jj-1,m)+fine(ii ,jj+1,m)) & - & + 0.25d0*(fine(ii-1,jj-1,m)+fine(ii+1,jj-1,m) & - & + fine(ii-1,jj+1,m)+fine(ii+1,jj+1,m)) ) - end do - end do - end do - end subroutine warpx_sync_rho_2d - - subroutine warpx_sync_rho_3d (lo, hi, crse, clo, chi, fine, flo, fhi, nc) & - bind(c, name='warpx_sync_rho_3d') - integer, intent(in) :: lo(3), hi(3), flo(3), fhi(3), clo(3), chi(3), nc - real(amrex_real), intent(in ) :: fine(flo(1):fhi(1),flo(2):fhi(2),flo(3):fhi(3),nc) - real(amrex_real), intent(inout) :: crse(clo(1):chi(1),clo(2):chi(2),clo(3):chi(3),nc) - - integer :: i,j,k,ii,jj,kk,m - - do m = 1, nc - do k = lo(3), hi(3) - kk = k*2 - do j = lo(2), hi(2) - jj = j*2 - do i = lo(1), hi(1) - ii = i*2 - crse(i,j,k,m) = 0.125d0 * & - (fine(ii,jj,kk,m) + 0.5d0 *(fine(ii-1,jj ,kk ,m)+fine(ii+1,jj ,kk ,m) & - & + fine(ii ,jj-1,kk ,m)+fine(ii ,jj+1,kk ,m) & - & + fine(ii ,jj ,kk-1,m)+fine(ii ,jj ,kk+1,m)) & - & + 0.25d0 *(fine(ii-1,jj-1,kk ,m)+fine(ii+1,jj-1,kk ,m) & - & + fine(ii-1,jj+1,kk ,m)+fine(ii+1,jj+1,kk ,m) & - & + fine(ii-1,jj ,kk-1,m)+fine(ii+1,jj ,kk-1,m) & - & + fine(ii-1,jj ,kk+1,m)+fine(ii+1,jj ,kk+1,m) & - & + fine(ii ,jj-1,kk-1,m)+fine(ii ,jj+1,kk-1,m) & - & + fine(ii ,jj-1,kk+1,m)+fine(ii ,jj+1,kk+1,m)) & - & + 0.125d0*(fine(ii-1,jj-1,kk-1,m)+fine(ii-1,jj-1,kk+1,m) & - & + fine(ii-1,jj+1,kk-1,m)+fine(ii-1,jj+1,kk+1,m) & - & + fine(ii+1,jj-1,kk-1,m)+fine(ii+1,jj-1,kk+1,m) & - & + fine(ii+1,jj+1,kk-1,m)+fine(ii+1,jj+1,kk+1,m))) - end do - end do - end do - end do - - end subroutine warpx_sync_rho_3d - subroutine warpx_build_buffer_masks (lo, hi, msk, mlo, mhi, gmsk, glo, ghi, ng) & bind(c, name='warpx_build_buffer_masks') integer, dimension(3), intent(in) :: lo, hi, mlo, mhi, glo, ghi diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H index 2d5d346e2..b51a83387 100644 --- a/Source/FortranInterface/WarpX_f.H +++ b/Source/FortranInterface/WarpX_f.H @@ -16,8 +16,6 @@ #if (AMREX_SPACEDIM == 3) -#define WRPX_SYNC_RHO warpx_sync_rho_3d - #define WRPX_SUM_FINE_TO_CRSE_NODAL warpx_sum_fine_to_crse_nodal_3d #define WRPX_ZERO_OUT_BNDRY warpx_zero_out_bndry_3d #define WRPX_BUILD_MASK warpx_build_mask_3d @@ -30,8 +28,6 @@ #elif (AMREX_SPACEDIM == 2) -#define WRPX_SYNC_RHO warpx_sync_rho_2d - #define WRPX_SUM_FINE_TO_CRSE_NODAL warpx_sum_fine_to_crse_nodal_2d #define WRPX_ZERO_OUT_BNDRY warpx_zero_out_bndry_2d #define WRPX_BUILD_MASK warpx_build_mask_2d @@ -229,11 +225,6 @@ extern "C" #endif const amrex::Real* sigbz, int sigbz_lo, int sigbz_hi); - void WRPX_SYNC_RHO (const int* lo, const int* hi, - BL_FORT_FAB_ARG_ANYD(crse), - const BL_FORT_FAB_ARG_ANYD(fine), - const int* ncomp); - #ifdef WARPX_USE_PSATD void warpx_fft_mpi_init (int fcomm); void warpx_fft_domain_decomp (int* warpx_local_nz, int* warpx_local_z0, diff --git a/Source/Make.WarpX b/Source/Make.WarpX index 13d4f4554..6f9ae6d2f 100644 --- a/Source/Make.WarpX +++ b/Source/Make.WarpX @@ -9,7 +9,7 @@ ifeq ($(USE_GPU),TRUE) USE_OMP = FALSE USE_CUDA = TRUE USE_ACC = FALSE - NVCC_HOST_COMP = gcc + NVCC_HOST_COMP = gnu endif ifeq ($(USE_RZ),TRUE) @@ -27,24 +27,24 @@ ifeq ($(USE_ASCENT_INSITU),TRUE) USE_ASCENT = TRUE endif -include $(AMREX_HOME)/Tools/GNUMake/Make.defs - ifndef USE_PYTHON_MAIN USE_PYTHON_MAIN = FALSE endif ifeq ($(USE_PYTHON_MAIN),TRUE) - CXXFLAGS += -fPIC + XTRA_CXXFLAGS += -fPIC ifeq ($(USE_OMP),TRUE) LDFLAGS += -fopenmp endif - CFLAGS += -fPIC - FFLAGS += -fPIC - F90FLAGS += -fPIC + XTRA_CFLAGS += -fPIC + XTRA_FFLAGS += -fPIC + XTRA_F90FLAGS += -fPIC USERSuffix := .Lib DEFINES += -DWARPX_USE_PY endif +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + ifeq ($(DIM),3) DEFINES += -DWARPX_DIM_3D else @@ -209,7 +209,11 @@ libwarpx$(PYDIM).a: $(objForExecs) libwarpx$(PYDIM).so: $(objForExecs) @echo Making dynamic library $@ ... +ifeq ($(USE_CUDA),TRUE) + $(SILENT) $(CXX) $(LINKFLAGS) $(SHARED_OPTION) -Xlinker=$(SHARED_OPTION) -Xlinker=-fPIC -o $@ $^ $(LDFLAGS) $(libraries) +else $(SILENT) $(CXX) $(SHARED_OPTION) -fPIC -o $@ $^ $(LDFLAGS) $(libraries) +endif $(SILENT) $(RM) AMReX_buildInfo.cpp @echo SUCCESS diff --git a/Source/Parallelization/InterpolateDensityFineToCoarse.H b/Source/Parallelization/InterpolateDensityFineToCoarse.H new file mode 100644 index 000000000..947db2aac --- /dev/null +++ b/Source/Parallelization/InterpolateDensityFineToCoarse.H @@ -0,0 +1,120 @@ +/* Copyright 2019 Axel Huebl, Weiqun Zhang + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef INTERPOLATE_DENSITY_FINE_TO_COARSE_H +#define INTERPOLATE_DENSITY_FINE_TO_COARSE_H + +#include <AMReX_Array4.H> +#include <AMReX_REAL.H> +#include <AMReX_BLassert.H> +#include <AMReX_Extension.H> +#include <AMReX_GpuQualifiers.H> + +#include <utility> // std::move + + +/** Fill a charge density (rho) coarse patch with averaged values from a fine patch + * + * Fills the values of the charge density on the coarse patch + * by averaging the values of the charge density of the fine patch. + */ +class InterpolateDensityFineToCoarse +{ +public: + /** Construct with fine and coarse patch and their refinement ratio + * + * @param[in] fine read-only fine patch + * @param[in,out] coarse overwritten coarse patch + * @param[in] refinement_ratio ratio between coarse and fine patch granularity + * (currently, only a value of is implemented) + * @param[in] number_of_components the number of components + */ + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + InterpolateDensityFineToCoarse( + amrex::Array4<amrex::Real const> const fine, + amrex::Array4<amrex::Real > const coarse, + int const refinement_ratio, + int const number_of_components + ) : m_fine(std::move(fine)), + m_coarse(std::move(coarse)), + m_refinement_ratio(std::move(refinement_ratio)), + m_number_of_components(std::move(number_of_components)) + { + //! @note constants and stencils in operator() implementation assume 2x refinement + BL_ASSERT(refinement_ratio == 2); + } + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + void + operator()( + int const i, + int const j, + int const k + ) const noexcept + { + using namespace amrex; + + auto const & fine_unsafe = m_fine; // out-of-bounds access not secured with zero-values yet + auto const & coarse = m_coarse; // out-of-bounds access not secured but will also not occur + + // return zero for out-of-bounds accesses during interpolation + // this is efficiently used as a method to add neutral elements beyond guards in the average below + auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l, int const m) noexcept + { + return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l, m) : amrex::Real{0.}; + }; + + int const ii = i * m_refinement_ratio; + int const jj = j * m_refinement_ratio; + int const kk = k * m_refinement_ratio; + for( int m = 0; m < m_number_of_components; ++m ) { +#if AMREX_SPACEDIM == 2 + coarse(i,j,kk,m) = 0.25_rt * ( + fine(ii,jj,kk,m) + + 0.5_rt * ( + fine(ii-1,jj ,kk,m) + fine(ii+1,jj ,kk,m) + + fine(ii ,jj-1,kk,m) + fine(ii ,jj+1,kk,m) + ) + + 0.25_rt * ( + fine(ii-1,jj-1,kk,m) + fine(ii+1,jj-1,kk,m) + + fine(ii-1,jj+1,kk,m) + fine(ii+1,jj+1,kk,m) + ) + ); +#elif AMREX_SPACEDIM == 3 + coarse(i,j,k,m) = 0.125_rt * ( + fine(ii,jj,kk,m) + + 0.5_rt * ( + fine(ii-1,jj ,kk ,m) + fine(ii+1,jj ,kk ,m) + + fine(ii ,jj-1,kk ,m) + fine(ii ,jj+1,kk ,m) + + fine(ii ,jj ,kk-1,m) + fine(ii ,jj ,kk+1,m) + ) + + 0.25_rt * ( + fine(ii-1,jj-1,kk ,m) + fine(ii+1,jj-1,kk ,m) + + fine(ii-1,jj+1,kk ,m) + fine(ii+1,jj+1,kk ,m) + + fine(ii-1,jj ,kk-1,m) + fine(ii+1,jj ,kk-1,m) + + fine(ii-1,jj ,kk+1,m) + fine(ii+1,jj ,kk+1,m) + + fine(ii ,jj-1,kk-1,m) + fine(ii ,jj+1,kk-1,m) + + fine(ii ,jj-1,kk+1,m) + fine(ii ,jj+1,kk+1,m) + ) + + 0.125_rt * ( + fine(ii-1,jj-1,kk-1,m) + fine(ii-1,jj-1,kk+1,m) + + fine(ii-1,jj+1,kk-1,m) + fine(ii-1,jj+1,kk+1,m) + + fine(ii+1,jj-1,kk-1,m) + fine(ii+1,jj-1,kk+1,m) + + fine(ii+1,jj+1,kk-1,m) + fine(ii+1,jj+1,kk+1,m) + ) + ); +#endif + } + } +private: + amrex::Array4< amrex::Real const > m_fine; + amrex::Array4< amrex::Real > m_coarse; + int m_refinement_ratio; + int m_number_of_components; +}; + +#endif // INTERPOLATE_DENSITY_FINE_TO_COARSE_H diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 4f870e79c..92f0b4f09 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -3,10 +3,12 @@ #include <WarpX_f.H> #include <WarpXSumGuardCells.H> #include <Parallelization/InterpolateCurrentFineToCoarse.H> +#include <Parallelization/InterpolateDensityFineToCoarse.H> #include <algorithm> #include <cstdlib> + using namespace amrex; void @@ -354,7 +356,7 @@ WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > std::array< amrex::MultiFab *, 3 > const & coarse, int const refinement_ratio) { - BL_PROFILE("InterpolateCurrentFineToCoarse()"); + BL_PROFILE("interpolateCurrentFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch @@ -386,6 +388,8 @@ WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > void WarpX::SyncRho () { + BL_PROFILE("SyncRho()"); + if (!rho_fp[0]) return; const int ncomp = rho_fp[0]->nComp(); @@ -395,7 +399,7 @@ WarpX::SyncRho () { rho_cp[lev]->setVal(0.0); const IntVect& refinement_ratio = refRatio(lev-1); - SyncRho(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); + interpolateDensityFineToCoarse(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); } // For each level @@ -408,29 +412,26 @@ WarpX::SyncRho () } void -WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio) +WarpX::interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio) { + BL_PROFILE("interpolateDensityFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); - const IntVect& ng = (fine.nGrowVect()+1)/refinement_ratio; + const IntVect& ng = (fine.nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch const int nc = fine.nComp(); #ifdef _OPEMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif { - FArrayBox ffab; - for (MFIter mfi(crse,true); mfi.isValid(); ++mfi) + // OMP in-box decomposition of coarse into tilebox + for (MFIter mfi(coarse, TilingIfNotGPU()); mfi.isValid(); ++mfi) { - const Box& bx = mfi.growntilebox(ng); - Box fbx = amrex::grow(amrex::refine(bx,refinement_ratio),1); - ffab.resize(fbx, nc); - fbx &= fine[mfi].box(); - ffab.setVal(0.0); - ffab.copy(fine[mfi], fbx, 0, fbx, 0, nc); - WRPX_SYNC_RHO(bx.loVect(), bx.hiVect(), - BL_TO_FORTRAN_ANYD(crse[mfi]), - BL_TO_FORTRAN_ANYD(ffab), - &nc); + const Box& bx = mfi.growntilebox(ng); // only grow to outer directions of tileboxes for filling guards + + amrex::ParallelFor( + bx, + InterpolateDensityFineToCoarse(fine.const_array(mfi), coarse.array(mfi), refinement_ratio, nc) + ); } } } @@ -562,7 +563,7 @@ WarpX::RestrictRhoFromFineToCoarsePatch (int lev) if (rho_fp[lev]) { rho_cp[lev]->setVal(0.0); const IntVect& refinement_ratio = refRatio(lev-1); - SyncRho(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); + interpolateDensityFineToCoarse(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]); } } diff --git a/Source/Particles/Gather/FieldGather.H b/Source/Particles/Gather/FieldGather.H index c5ec6fb5b..bdeab16e8 100644 --- a/Source/Particles/Gather/FieldGather.H +++ b/Source/Particles/Gather/FieldGather.H @@ -90,13 +90,6 @@ void doGatherShapeN(const amrex::ParticleReal * const xp, const int l0 = compute_shape_factor<depos_order - lower_in_v>( sz0, z-stagger_shift); - // Set fields on particle to zero - Exp[ip] = 0; - Eyp[ip] = 0; - Ezp[ip] = 0; - Bxp[ip] = 0; - Byp[ip] = 0; - Bzp[ip] = 0; // Each field is gathered in a separate block of // AMREX_SPACEDIM nested loops because the deposition // order can differ for each component of each field diff --git a/Source/Particles/PhotonParticleContainer.H b/Source/Particles/PhotonParticleContainer.H index 5d93e5631..98e8e0aab 100644 --- a/Source/Particles/PhotonParticleContainer.H +++ b/Source/Particles/PhotonParticleContainer.H @@ -63,7 +63,7 @@ public: int thread_num, int lev, int depos_lev, - amrex::Real dt) {}; + amrex::Real dt) override {}; private: diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H index f535a4c5b..79685c08b 100644 --- a/Source/Particles/PhysicalParticleContainer.H +++ b/Source/Particles/PhysicalParticleContainer.H @@ -136,7 +136,15 @@ public: amrex::Real q_tot, long npart, int do_symmetrize); void CheckAndAddParticle(amrex::Real x, amrex::Real y, amrex::Real z, - std::array<amrex::Real, 3> u, amrex::Real weight); + std::array<amrex::Real, 3> u, + amrex::Real weight, + amrex::Gpu::HostVector<amrex::ParticleReal>& particle_x, + amrex::Gpu::HostVector<amrex::ParticleReal>& particle_y, + amrex::Gpu::HostVector<amrex::ParticleReal>& particle_z, + amrex::Gpu::HostVector<amrex::ParticleReal>& particle_ux, + amrex::Gpu::HostVector<amrex::ParticleReal>& particle_uy, + amrex::Gpu::HostVector<amrex::ParticleReal>& particle_uz, + amrex::Gpu::HostVector<amrex::ParticleReal>& particle_w); virtual void GetParticleSlice(const int direction, const amrex::Real z_old, const amrex::Real z_new, const amrex::Real t_boost, diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 352745265..9e12eb26e 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -190,6 +190,16 @@ PhysicalParticleContainer::AddGaussianBeam(Real x_m, Real y_m, Real z_m, std::normal_distribution<double> disty(y_m, y_rms); std::normal_distribution<double> distz(z_m, z_rms); + // Allocate temporary vectors on the CPU + Gpu::HostVector<ParticleReal> particle_x; + Gpu::HostVector<ParticleReal> particle_y; + Gpu::HostVector<ParticleReal> particle_z; + Gpu::HostVector<ParticleReal> particle_ux; + Gpu::HostVector<ParticleReal> particle_uy; + Gpu::HostVector<ParticleReal> particle_uz; + Gpu::HostVector<ParticleReal> particle_w; + int np = 0; + if (ParallelDescriptor::IOProcessor()) { // If do_symmetrize, create 4x fewer particles, and // Replicate each particle 4 times (x,y) (-x,y) (x,-y) (-x,-y) @@ -215,44 +225,61 @@ PhysicalParticleContainer::AddGaussianBeam(Real x_m, Real y_m, Real z_m, u.z *= PhysConst::c; if (do_symmetrize){ // Add four particles to the beam: - CheckAndAddParticle( x, y, z, { u.x, u.y, u.z}, weight/4. ); - CheckAndAddParticle( x,-y, z, { u.x,-u.y, u.z}, weight/4. ); - CheckAndAddParticle(-x, y, z, {-u.x, u.y, u.z}, weight/4. ); - CheckAndAddParticle(-x,-y, z, {-u.x,-u.y, u.z}, weight/4. ); + CheckAndAddParticle(x, y, z, { u.x, u.y, u.z}, weight/4., + particle_x, particle_y, particle_z, + particle_ux, particle_uy, particle_uz, + particle_w); + CheckAndAddParticle(x, -y, z, { u.x, -u.y, u.z}, weight/4., + particle_x, particle_y, particle_z, + particle_ux, particle_uy, particle_uz, + particle_w); + CheckAndAddParticle(-x, y, z, { -u.x, u.y, u.z}, weight/4., + particle_x, particle_y, particle_z, + particle_ux, particle_uy, particle_uz, + particle_w); + CheckAndAddParticle(-x, -y, z, { -u.x, -u.y, u.z}, weight/4., + particle_x, particle_y, particle_z, + particle_ux, particle_uy, particle_uz, + particle_w); } else { - CheckAndAddParticle(x, y, z, {u.x,u.y,u.z}, weight); + CheckAndAddParticle(x, y, z, { u.x, u.y, u.z}, weight, + particle_x, particle_y, particle_z, + particle_ux, particle_uy, particle_uz, + particle_w); } } } } - Redistribute(); + // Add the temporary CPU vectors to the particle structure + np = particle_z.size(); + AddNParticles(0,np, + particle_x.dataPtr(), particle_y.dataPtr(), particle_z.dataPtr(), + particle_ux.dataPtr(), particle_uy.dataPtr(), particle_uz.dataPtr(), + 1, particle_w.dataPtr(),1); } void PhysicalParticleContainer::CheckAndAddParticle(Real x, Real y, Real z, std::array<Real, 3> u, - Real weight) + Real weight, + Gpu::HostVector<ParticleReal>& particle_x, + Gpu::HostVector<ParticleReal>& particle_y, + Gpu::HostVector<ParticleReal>& particle_z, + Gpu::HostVector<ParticleReal>& particle_ux, + Gpu::HostVector<ParticleReal>& particle_uy, + Gpu::HostVector<ParticleReal>& particle_uz, + Gpu::HostVector<ParticleReal>& particle_w) { - std::array<ParticleReal,PIdx::nattribs> attribs; - attribs.fill(0.0); - - // update attribs with input arguments if (WarpX::gamma_boost > 1.) { MapParticletoBoostedFrame(x, y, z, u); } - attribs[PIdx::ux] = u[0]; - attribs[PIdx::uy] = u[1]; - attribs[PIdx::uz] = u[2]; - attribs[PIdx::w ] = weight; - - if ( (NumRuntimeRealComps()>0) || (NumRuntimeIntComps()>0) ) - { - // need to create old values - auto& particle_tile = DefineAndReturnParticleTile(0, 0, 0); - } - - // add particle - AddOneParticle(0, 0, 0, x, y, z, attribs); + particle_x.push_back(x); + particle_y.push_back(y); + particle_z.push_back(z); + particle_ux.push_back(u[0]); + particle_uy.push_back(u[1]); + particle_uz.push_back(u[2]); + particle_w.push_back(weight); } void diff --git a/Source/Particles/Sorting/Make.package b/Source/Particles/Sorting/Make.package index 750d2607e..f9c708e5b 100644 --- a/Source/Particles/Sorting/Make.package +++ b/Source/Particles/Sorting/Make.package @@ -1,3 +1,4 @@ +CEXE_headers += SortingUtils.H CEXE_sources += Partition.cpp INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Sorting VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles/Sorting diff --git a/Source/Particles/Sorting/Partition.cpp b/Source/Particles/Sorting/Partition.cpp index 683dbbd04..d1455249a 100644 --- a/Source/Particles/Sorting/Partition.cpp +++ b/Source/Particles/Sorting/Partition.cpp @@ -1,5 +1,6 @@ -#include <WarpX.H> +#include <SortingUtils.H> #include <PhysicalParticleContainer.H> +#include <WarpX.H> #include <AMReX_Particles.H> using namespace amrex; @@ -41,56 +42,64 @@ PhysicalParticleContainer::PartitionParticlesInBuffers( { BL_PROFILE("PPC::Evolve::partition"); - std::vector<bool> inexflag; + // Initialize temporary arrays + Gpu::DeviceVector<int> inexflag; inexflag.resize(np); + Gpu::DeviceVector<long> pid; + pid.resize(np); - auto& aos = pti.GetArrayOfStructs(); + // First, partition particles into the larger buffer - // We need to partition the large buffer first + // - Select the larger buffer iMultiFab const* bmasks = (WarpX::n_field_gather_buffer >= WarpX::n_current_deposition_buffer) ? gather_masks : current_masks; - int i = 0; - const auto& msk = (*bmasks)[pti]; - for (const auto& p : aos) { - const IntVect& iv = Index(p, lev); - inexflag[i++] = msk(iv); - } - - Vector<long> pid; - pid.resize(np); - std::iota(pid.begin(), pid.end(), 0L); - auto sep = std::stable_partition(pid.begin(), pid.end(), - [&inexflag](long id) { return inexflag[id]; }); + // - For each particle, find whether it is in the larger buffer, + // by looking up the mask. Store the answer in `inexflag`. + amrex::ParallelFor( np, fillBufferFlag(pti, bmasks, inexflag, Geom(lev), 0) ); + // - Find the indices that reorder particles so that the last particles + // are in the larger buffer + fillWithConsecutiveIntegers( pid ); + auto const sep = stablePartition( pid.begin(), pid.end(), inexflag ); + // At the end of this step, `pid` contains the indices that should be used to + // reorder the particles, and `sep` is the position in the array that + // separates the particles that deposit/gather on the fine patch (first part) + // and the particles that deposit/gather in the buffers (last part) + long const n_fine = iteratorDistance(pid.begin(), sep); + // Number of particles on fine patch, i.e. outside of the larger buffer + + // Second, among particles that are in the larger buffer, partition + // particles into the smaller buffer if (WarpX::n_current_deposition_buffer == WarpX::n_field_gather_buffer) { - nfine_current = nfine_gather = std::distance(pid.begin(), sep); - } else if (sep != pid.end()) { + // No need to do anything if the buffers have the same size + nfine_current = nfine_gather = iteratorDistance(pid.begin(), sep); + } else if (sep == pid.end()) { + // No need to do anything if there are no particles in the larger buffer + nfine_current = nfine_gather = np; + } else { int n_buf; if (bmasks == gather_masks) { - nfine_gather = std::distance(pid.begin(), sep); + nfine_gather = n_fine; bmasks = current_masks; n_buf = WarpX::n_current_deposition_buffer; } else { - nfine_current = std::distance(pid.begin(), sep); + nfine_current = n_fine; bmasks = gather_masks; n_buf = WarpX::n_field_gather_buffer; } if (n_buf > 0) { - const auto& msk2 = (*bmasks)[pti]; - for (auto it = sep; it != pid.end(); ++it) { - const long id = *it; - const IntVect& iv = Index(aos[id], lev); - inexflag[id] = msk2(iv); - } + // - For each particle in the large buffer, find whether it is in + // the smaller buffer, by looking up the mask. Store the answer in `inexflag`. + amrex::ParallelFor( np - n_fine, + fillBufferFlag(pti, bmasks, inexflag, Geom(lev), n_fine) ); + auto const sep2 = stablePartition( sep, pid.end(), inexflag ); - auto sep2 = std::stable_partition(sep, pid.end(), - [&inexflag](long id) {return inexflag[id];}); if (bmasks == gather_masks) { - nfine_gather = std::distance(pid.begin(), sep2); + nfine_gather = iteratorDistance(pid.begin(), sep2); } else { - nfine_current = std::distance(pid.begin(), sep2); + nfine_current = iteratorDistance(pid.begin(), sep2); } } } @@ -103,39 +112,38 @@ PhysicalParticleContainer::PartitionParticlesInBuffers( nfine_gather = 0; } + // Reorder the actual particle array, using the `pid` indices if (nfine_current != np || nfine_gather != np) { - + // Temporary array for particle AoS ParticleVector particle_tmp; particle_tmp.resize(np); - for (long ip = 0; ip < np; ++ip) { - particle_tmp[ip] = aos[pid[ip]]; - } + // Copy particle AoS + auto& aos = pti.GetArrayOfStructs(); + amrex::ParallelFor( np, + copyAndReorder<ParticleType>( aos(), particle_tmp, pid ) ); std::swap(aos(), particle_tmp); + // Temporary array for particle individual attributes RealVector tmp; tmp.resize(np); - for (long ip = 0; ip < np; ++ip) { - tmp[ip] = wp[pid[ip]]; - } + // Copy individual attributes + amrex::ParallelFor( np, copyAndReorder<Real>( wp, tmp, pid ) ); std::swap(wp, tmp); - - for (long ip = 0; ip < np; ++ip) { - tmp[ip] = uxp[pid[ip]]; - } + amrex::ParallelFor( np, copyAndReorder<Real>( uxp, tmp, pid ) ); std::swap(uxp, tmp); - - for (long ip = 0; ip < np; ++ip) { - tmp[ip] = uyp[pid[ip]]; - } + amrex::ParallelFor( np, copyAndReorder<Real>( uyp, tmp, pid ) ); std::swap(uyp, tmp); - - for (long ip = 0; ip < np; ++ip) { - tmp[ip] = uzp[pid[ip]]; - } + amrex::ParallelFor( np, copyAndReorder<Real>( uzp, tmp, pid ) ); std::swap(uzp, tmp); - } + // Make sure that the temporary arrays are not destroyed before + // the GPU kernels finish running + Gpu::streamSynchronize(); + } + // Make sure that the temporary arrays are not destroyed before + // the GPU kernels finish running + Gpu::streamSynchronize(); } diff --git a/Source/Particles/Sorting/SortingUtils.H b/Source/Particles/Sorting/SortingUtils.H new file mode 100644 index 000000000..852b8a73f --- /dev/null +++ b/Source/Particles/Sorting/SortingUtils.H @@ -0,0 +1,164 @@ +#ifndef WARPX_PARTICLES_SORTING_SORTINGUTILS_H_ +#define WARPX_PARTICLES_SORTING_SORTINGUTILS_H_ + +#include <WarpXParticleContainer.H> +#include <AMReX_CudaContainers.H> +#include <AMReX_Gpu.H> +#ifdef AMREX_USE_GPU + #include <thrust/partition.h> + #include <thrust/distance.h> +#endif + +/* \brief Fill the elements of the input vector with consecutive integer, + * starting from 0 + * + * \param[inout] v Vector of integers, to be filled by this routine + */ +void fillWithConsecutiveIntegers( amrex::Gpu::DeviceVector<long>& v ) +{ +#ifdef AMREX_USE_GPU + // On GPU: Use thrust + thrust::sequence( v.begin(), v.end() ); +#else + // On CPU: Use std library + std::iota( v.begin(), v.end(), 0L ); +#endif +} + +/* \brief Find the indices that would reorder the elements of `predicate` + * so that the elements with non-zero value precede the other elements + * + * \param[in, out] index_begin Point to the beginning of the vector which is + * to be filled with these indices + * \param[in, out] index_begin Point to the end of the vector which is + * to be filled with these indices + * \param[in] Vector that indicates the elements that need to be reordered first + */ +template< typename ForwardIterator > +ForwardIterator stablePartition(ForwardIterator const index_begin, + ForwardIterator const index_end, + amrex::Gpu::DeviceVector<int> const& predicate) +{ +#ifdef AMREX_USE_GPU + // On GPU: Use thrust + int const* AMREX_RESTRICT predicate_ptr = predicate.dataPtr(); + ForwardIterator const sep = thrust::stable_partition( + thrust::cuda::par(amrex::Cuda::The_ThrustCachedAllocator()), + index_begin, index_end, + [predicate_ptr] AMREX_GPU_DEVICE (long i) { return predicate_ptr[i]; } + ); +#else + // On CPU: Use std library + ForwardIterator const sep = std::stable_partition( + index_begin, index_end, + [&predicate](long i) { return predicate[i]; } + ); +#endif + return sep; +} + +/* \brief Return the number of elements between `first` and `last` + * + * \param[in] fist Points to a position in a vector + * \param[in] last Points to another position in a vector + * \return The number of elements between `first` and `last` + */ +template< typename ForwardIterator > +int iteratorDistance(ForwardIterator const first, + ForwardIterator const last) +{ +#ifdef AMREX_USE_GPU + // On GPU: Use thrust + return thrust::distance( first, last ); +#else + return std::distance( first, last ); +#endif +} + +/* \brief Functor that fills the elements of the particle array `inexflag` + * with the value of the spatial array `bmasks`, at the corresponding particle position. + * + * This is done only for the elements from `start_index` to the end of `inexflag` + * + * \param[in] pti Contains information on the particle positions + * \param[in] bmasks Spatial array, that contains a flag indicating + * whether each cell is part of the gathering/deposition buffers + * \param[out] inexflag Vector to be filled with the value of `bmasks` + * \param[in] geom Geometry object, necessary to locate particles within the array `bmasks` + * \param[in] start_index Index that which elements start to be modified + */ +class fillBufferFlag +{ + public: + fillBufferFlag( WarpXParIter const& pti, amrex::iMultiFab const* bmasks, + amrex::Gpu::DeviceVector<int>& inexflag, + amrex::Geometry const& geom, long const start_index=0 ) : + m_start_index(start_index) { + + // Extract simple structure that can be used directly on the GPU + m_particles = &(pti.GetArrayOfStructs()[0]); + m_buffer_mask = (*bmasks)[pti].array(); + m_inexflag_ptr = inexflag.dataPtr(); + m_domain = geom.Domain(); + for (int idim=0; idim<AMREX_SPACEDIM; idim++) { + m_prob_lo[idim] = geom.ProbLo(idim); + m_inv_cell_size[idim] = geom.InvCellSize(idim); + } + }; + + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void operator()( const long i ) const { + // Select a particle + auto const& p = m_particles[i+m_start_index]; + // Find the index of the cell where this particle is located + amrex::IntVect const iv = amrex::getParticleCell( p, + m_prob_lo, m_inv_cell_size, m_domain ); + // Find the value of the buffer flag in this cell and + // store it at the corresponding particle position in the array `inexflag` + m_inexflag_ptr[i+m_start_index] = m_buffer_mask(iv); + }; + + private: + amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> m_prob_lo; + amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> m_inv_cell_size; + amrex::Box m_domain; + int* m_inexflag_ptr; + WarpXParticleContainer::ParticleType const* m_particles; + amrex::Array4<int const> m_buffer_mask; + long const m_start_index; +}; + +/* \brief Functor that copies the elements of `src` into `dst`, + * while reordering them according to `indices` + * + * \param[in] src Source vector + * \param[out] dst Destination vector + * \param[in] indices Array of indices that indicate how to reorder elements + */ +template <typename T> +class copyAndReorder +{ + public: + copyAndReorder( + amrex::Gpu::ManagedDeviceVector<T> const& src, + amrex::Gpu::ManagedDeviceVector<T>& dst, + amrex::Gpu::DeviceVector<long> const& indices ) { + // Extract simple structure that can be used directly on the GPU + m_src_ptr = src.dataPtr(); + m_dst_ptr = dst.dataPtr(); + m_indices_ptr = indices.dataPtr(); + }; + + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + void operator()( const long ip ) const { + m_dst_ptr[ip] = m_src_ptr[ m_indices_ptr[ip] ]; + }; + + private: + T const* m_src_ptr; + T* m_dst_ptr; + long const* m_indices_ptr; +}; + +#endif // WARPX_PARTICLES_SORTING_SORTINGUTILS_H_ diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H index b717d89e6..4b36b12f6 100644 --- a/Source/Particles/WarpXParticleContainer.H +++ b/Source/Particles/WarpXParticleContainer.H @@ -236,14 +236,6 @@ public: const amrex::ParticleReal* vx, const amrex::ParticleReal* vy, const amrex::ParticleReal* vz, int nattr, const amrex::ParticleReal* attr, int uniqueparticles, int id=-1); - void AddOneParticle (int lev, int grid, int tile, - amrex::ParticleReal x, amrex::ParticleReal y, amrex::ParticleReal z, - std::array<amrex::ParticleReal,PIdx::nattribs>& attribs); - - void AddOneParticle (ParticleTileType& particle_tile, - amrex::ParticleReal x, amrex::ParticleReal y, amrex::ParticleReal z, - std::array<amrex::ParticleReal,PIdx::nattribs>& attribs); - virtual void ReadHeader (std::istream& is); virtual void WriteHeader (std::ostream& os) const; diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 65a82f233..7fb57500d 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -135,45 +135,6 @@ WarpXParticleContainer::AllocData () } void -WarpXParticleContainer::AddOneParticle (int lev, int grid, int tile, - ParticleReal x, ParticleReal y, ParticleReal z, - std::array<ParticleReal,PIdx::nattribs>& attribs) -{ - auto& particle_tile = DefineAndReturnParticleTile(lev, grid, tile); - AddOneParticle(particle_tile, x, y, z, attribs); -} - -void -WarpXParticleContainer::AddOneParticle (ParticleTileType& particle_tile, - ParticleReal x, ParticleReal y, ParticleReal z, - std::array<ParticleReal,PIdx::nattribs>& attribs) -{ - ParticleType p; - p.id() = ParticleType::NextID(); - p.cpu() = ParallelDescriptor::MyProc(); -#if (AMREX_SPACEDIM == 3) - p.pos(0) = x; - p.pos(1) = y; - p.pos(2) = z; -#elif (AMREX_SPACEDIM == 2) -#ifdef WARPX_DIM_RZ - attribs[PIdx::theta] = std::atan2(y, x); - x = std::sqrt(x*x + y*y); -#endif - p.pos(0) = x; - p.pos(1) = z; -#endif - - particle_tile.push_back(p); - particle_tile.push_back_real(attribs); - - for (int i = PIdx::nattribs; i < NumRealComps(); ++i) - { - particle_tile.push_back_real(i, 0.0); - } -} - -void WarpXParticleContainer::AddNParticles (int lev, int n, const ParticleReal* x, const ParticleReal* y, const ParticleReal* z, const ParticleReal* vx, const ParticleReal* vy, const ParticleReal* vz, diff --git a/Source/Python/WarpXWrappers.cpp b/Source/Python/WarpXWrappers.cpp index c6d7dfdb8..be9ec9519 100644 --- a/Source/Python/WarpXWrappers.cpp +++ b/Source/Python/WarpXWrappers.cpp @@ -1,7 +1,4 @@ -#include <AMReX.H> -#include <AMReX_BLProfiler.H> - #include <WarpXWrappers.h> #include <WarpXParticleContainer.H> #include <WarpX.H> @@ -10,7 +7,7 @@ namespace { - double** getMultiFabPointers(const amrex::MultiFab& mf, int *num_boxes, int *ncomps, int *ngrow, int **shapes) + amrex::Real** getMultiFabPointers(const amrex::MultiFab& mf, int *num_boxes, int *ncomps, int *ngrow, int **shapes) { *ncomps = mf.nComp(); *ngrow = mf.nGrow(); @@ -18,14 +15,14 @@ namespace int shapesize = AMREX_SPACEDIM; if (mf.nComp() > 1) shapesize += 1; *shapes = (int*) malloc(shapesize * (*num_boxes) * sizeof(int)); - double** data = (double**) malloc((*num_boxes) * sizeof(double*)); + amrex::Real** data = (amrex::Real**) malloc((*num_boxes) * sizeof(amrex::Real*)); #ifdef _OPENMP #pragma omp parallel #endif for ( amrex::MFIter mfi(mf, false); mfi.isValid(); ++mfi ) { int i = mfi.LocalIndex(); - data[i] = (double*) mf[mfi].dataPtr(); + data[i] = (amrex::Real*) mf[mfi].dataPtr(); for (int j = 0; j < AMREX_SPACEDIM; ++j) { (*shapes)[shapesize*i+j] = mf[mfi].box().length(j); } @@ -53,6 +50,16 @@ namespace extern "C" { + int warpx_Real_size() + { + return (int)sizeof(amrex::Real); + } + + int warpx_ParticleReal_size() + { + return (int)sizeof(amrex::ParticleReal); + } + int warpx_nSpecies() { auto & mypc = WarpX::GetInstance().GetPartContainer(); @@ -165,9 +172,9 @@ extern "C" } void warpx_addNParticles(int speciesnumber, int lenx, - double* x, double* y, double* z, - double* vx, double* vy, double* vz, - int nattr, double* attr, int uniqueparticles) + amrex::ParticleReal* x, amrex::ParticleReal* y, amrex::ParticleReal* z, + amrex::ParticleReal* vx, amrex::ParticleReal* vy, amrex::ParticleReal* vz, + int nattr, amrex::ParticleReal* attr, int uniqueparticles) { auto & mypc = WarpX::GetInstance().GetPartContainer(); auto & myspc = mypc.GetParticleContainer(speciesnumber); @@ -180,14 +187,14 @@ extern "C" ConvertLabParamsToBoost(); } - double warpx_getProbLo(int dir) + amrex::Real warpx_getProbLo(int dir) { WarpX& warpx = WarpX::GetInstance(); const amrex::Geometry& geom = warpx.Geom(0); return geom.ProbLo(dir); } - double warpx_getProbHi(int dir) + amrex::Real warpx_getProbHi(int dir) { WarpX& warpx = WarpX::GetInstance(); const amrex::Geometry& geom = warpx.Geom(0); @@ -200,7 +207,7 @@ extern "C" return myspc.TotalNumberOfParticles(); } - double** warpx_getEfield(int lev, int direction, + amrex::Real** warpx_getEfield(int lev, int direction, int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getEfield(lev, direction); return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); @@ -212,7 +219,7 @@ extern "C" return getMultiFabLoVects(mf, return_size, ngrow); } - double** warpx_getEfieldCP(int lev, int direction, + amrex::Real** warpx_getEfieldCP(int lev, int direction, int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getEfield_cp(lev, direction); return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); @@ -224,7 +231,7 @@ extern "C" return getMultiFabLoVects(mf, return_size, ngrow); } - double** warpx_getEfieldFP(int lev, int direction, + amrex::Real** warpx_getEfieldFP(int lev, int direction, int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getEfield_fp(lev, direction); return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); @@ -236,7 +243,7 @@ extern "C" return getMultiFabLoVects(mf, return_size, ngrow); } - double** warpx_getBfield(int lev, int direction, + amrex::Real** warpx_getBfield(int lev, int direction, int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getBfield(lev, direction); return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); @@ -248,7 +255,7 @@ extern "C" return getMultiFabLoVects(mf, return_size, ngrow); } - double** warpx_getBfieldCP(int lev, int direction, + amrex::Real** warpx_getBfieldCP(int lev, int direction, int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getBfield_cp(lev, direction); return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); @@ -260,7 +267,7 @@ extern "C" return getMultiFabLoVects(mf, return_size, ngrow); } - double** warpx_getBfieldFP(int lev, int direction, + amrex::Real** warpx_getBfieldFP(int lev, int direction, int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getBfield_fp(lev, direction); return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); @@ -272,7 +279,7 @@ extern "C" return getMultiFabLoVects(mf, return_size, ngrow); } - double** warpx_getCurrentDensity(int lev, int direction, + amrex::Real** warpx_getCurrentDensity(int lev, int direction, int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getcurrent(lev, direction); return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); @@ -284,7 +291,7 @@ extern "C" return getMultiFabLoVects(mf, return_size, ngrow); } - double** warpx_getCurrentDensityCP(int lev, int direction, + amrex::Real** warpx_getCurrentDensityCP(int lev, int direction, int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getcurrent_cp(lev, direction); return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); @@ -296,7 +303,7 @@ extern "C" return getMultiFabLoVects(mf, return_size, ngrow); } - double** warpx_getCurrentDensityFP(int lev, int direction, + amrex::Real** warpx_getCurrentDensityFP(int lev, int direction, int *return_size, int *ncomps, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getcurrent_fp(lev, direction); return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes); @@ -308,7 +315,7 @@ extern "C" return getMultiFabLoVects(mf, return_size, ngrow); } - double** warpx_getParticleStructs(int speciesnumber, int lev, + amrex::ParticleReal** warpx_getParticleStructs(int speciesnumber, int lev, int* num_tiles, int** particles_per_tile) { auto & mypc = WarpX::GetInstance().GetPartContainer(); auto & myspc = mypc.GetParticleContainer(speciesnumber); @@ -320,17 +327,17 @@ extern "C" *num_tiles = i; *particles_per_tile = (int*) malloc(*num_tiles*sizeof(int)); - double** data = (double**) malloc(*num_tiles*sizeof(typename WarpXParticleContainer::ParticleType*)); + amrex::ParticleReal** data = (amrex::ParticleReal**) malloc(*num_tiles*sizeof(typename WarpXParticleContainer::ParticleType*)); i = 0; for (WarpXParIter pti(myspc, lev); pti.isValid(); ++pti, ++i) { auto& aos = pti.GetArrayOfStructs(); - data[i] = (double*) aos.data(); + data[i] = (amrex::ParticleReal*) aos.data(); (*particles_per_tile)[i] = pti.numParticles(); } return data; } - double** warpx_getParticleArrays(int speciesnumber, int comp, int lev, + amrex::ParticleReal** warpx_getParticleArrays(int speciesnumber, int comp, int lev, int* num_tiles, int** particles_per_tile) { auto & mypc = WarpX::GetInstance().GetPartContainer(); auto & myspc = mypc.GetParticleContainer(speciesnumber); @@ -342,11 +349,11 @@ extern "C" *num_tiles = i; *particles_per_tile = (int*) malloc(*num_tiles*sizeof(int)); - double** data = (double**) malloc(*num_tiles*sizeof(double*)); + amrex::ParticleReal** data = (amrex::ParticleReal**) malloc(*num_tiles*sizeof(amrex::ParticleReal*)); i = 0; for (WarpXParIter pti(myspc, lev); pti.isValid(); ++pti, ++i) { auto& soa = pti.GetStructOfArrays(); - data[i] = (double*) soa.GetRealData(comp).dataPtr(); + data[i] = (amrex::ParticleReal*) soa.GetRealData(comp).dataPtr(); (*particles_per_tile)[i] = pti.numParticles(); } return data; @@ -361,11 +368,11 @@ extern "C" warpx.MoveWindow (true); } - void warpx_EvolveE (double dt) { + void warpx_EvolveE (amrex::Real dt) { WarpX& warpx = WarpX::GetInstance(); warpx.EvolveE (dt); } - void warpx_EvolveB (double dt) { + void warpx_EvolveB (amrex::Real dt) { WarpX& warpx = WarpX::GetInstance(); warpx.EvolveB (dt); } @@ -385,7 +392,7 @@ extern "C" WarpX& warpx = WarpX::GetInstance(); warpx.UpdateAuxilaryData (); } - void warpx_PushParticlesandDepose (double cur_time) { + void warpx_PushParticlesandDepose (amrex::Real cur_time) { WarpX& warpx = WarpX::GetInstance(); warpx.PushParticlesandDepose (cur_time); } @@ -398,15 +405,15 @@ extern "C" WarpX& warpx = WarpX::GetInstance(); warpx.setistep (lev, ii); } - double warpx_gett_new (int lev) { + amrex::Real warpx_gett_new (int lev) { WarpX& warpx = WarpX::GetInstance(); return warpx.gett_new (lev); } - void warpx_sett_new (int lev, double time) { + void warpx_sett_new (int lev, amrex::Real time) { WarpX& warpx = WarpX::GetInstance(); warpx.sett_new (lev, time); } - double warpx_getdt (int lev) { + amrex::Real warpx_getdt (int lev) { WarpX& warpx = WarpX::GetInstance(); return warpx.getdt (lev); } @@ -415,7 +422,7 @@ extern "C" WarpX& warpx = WarpX::GetInstance(); return warpx.maxStep (); } - double warpx_stopTime () { + amrex::Real warpx_stopTime () { WarpX& warpx = WarpX::GetInstance(); return warpx.stopTime (); } diff --git a/Source/Python/WarpXWrappers.h b/Source/Python/WarpXWrappers.h index 233a0b930..a272b9250 100644 --- a/Source/Python/WarpXWrappers.h +++ b/Source/Python/WarpXWrappers.h @@ -1,6 +1,9 @@ #ifndef WARPX_WRAPPERS_H_ #define WARPX_WRAPPERS_H_ +#include <AMReX.H> +#include <AMReX_BLProfiler.H> + #ifdef BL_USE_MPI #include <mpi.h> #endif @@ -9,6 +12,9 @@ extern "C" { #endif + int warpx_Real_size(); + int warpx_ParticleReal_size(); + int warpx_nSpecies(); bool warpx_use_fdtd_nci_corr(); @@ -49,61 +55,61 @@ extern "C" { void warpx_evolve (int numsteps); // -1 means the inputs parameter will be used. void warpx_addNParticles(int speciesnumber, int lenx, - double* x, double* y, double* z, - double* vx, double* vy, double* vz, - int nattr, double* attr, int uniqueparticles); + amrex::ParticleReal* x, amrex::ParticleReal* y, amrex::ParticleReal* z, + amrex::ParticleReal* vx, amrex::ParticleReal* vy, amrex::ParticleReal* vz, + int nattr, amrex::ParticleReal* attr, int uniqueparticles); void warpx_ConvertLabParamsToBoost(); - double warpx_getProbLo(int dir); + amrex::Real warpx_getProbLo(int dir); - double warpx_getProbHi(int dir); + amrex::Real warpx_getProbHi(int dir); long warpx_getNumParticles(int speciesnumber); - double** warpx_getEfield(int lev, int direction, - int *return_size, int* ncomps, int* ngrow, int **shapes); + amrex::Real** warpx_getEfield(int lev, int direction, + int *return_size, int* ncomps, int* ngrow, int **shapes); int* warpx_getEfieldLoVects(int lev, int direction, int *return_size, int* ngrow); - double** warpx_getBfield(int lev, int direction, - int *return_size, int* ncomps, int* ngrow, int **shapes); + amrex::Real** warpx_getBfield(int lev, int direction, + int *return_size, int* ncomps, int* ngrow, int **shapes); int* warpx_getBfieldLoVects(int lev, int direction, int *return_size, int* ngrow); - double** warpx_getCurrentDensity(int lev, int direction, - int *return_size, int* ncomps, int* ngrow, int **shapes); + amrex::Real** warpx_getCurrentDensity(int lev, int direction, + int *return_size, int* ncomps, int* ngrow, int **shapes); int* warpx_getCurrentDensityLoVects(int lev, int direction, int *return_size, int* ngrow); - double** warpx_getParticleStructs(int speciesnumber, int lev, - int* num_tiles, int** particles_per_tile); + amrex::ParticleReal** warpx_getParticleStructs(int speciesnumber, int lev, + int* num_tiles, int** particles_per_tile); - double** warpx_getParticleArrays(int speciesnumber, int comp, int lev, - int* num_tiles, int** particles_per_tile); + amrex::ParticleReal** warpx_getParticleArrays(int speciesnumber, int comp, int lev, + int* num_tiles, int** particles_per_tile); void warpx_ComputeDt (); void warpx_MoveWindow (); - void warpx_EvolveE (double dt); - void warpx_EvolveB (double dt); + void warpx_EvolveE (amrex::Real dt); + void warpx_EvolveB (amrex::Real dt); void warpx_FillBoundaryE (); void warpx_FillBoundaryB (); void warpx_SyncCurrent (); void warpx_UpdateAuxilaryData (); - void warpx_PushParticlesandDepose (double cur_time); + void warpx_PushParticlesandDepose (amrex::Real cur_time); int warpx_getistep (int lev); void warpx_setistep (int lev, int ii); - double warpx_gett_new (int lev); - void warpx_sett_new (int lev, double time); - double warpx_getdt (int lev); + amrex::Real warpx_gett_new (int lev); + void warpx_sett_new (int lev, amrex::Real time); + amrex::Real warpx_getdt (int lev); int warpx_maxStep (); - double warpx_stopTime (); + amrex::Real warpx_stopTime (); int warpx_checkInt (); int warpx_plotInt (); diff --git a/Source/WarpX.H b/Source/WarpX.H index c59802427..0da1cf350 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -436,10 +436,12 @@ private: * averaging the values of the charge density of the fine patch (on the same level). * * \param[in] fine fine patches to interpolate from - * \param[out] crse coarse patches to interpolate to - * \param[in] ref_ratio integer ratio between the two + * \param[out] coarse coarse patches to interpolate to + * \param[in] refinement_ratio integer ratio between the two */ - void SyncRho (const amrex::MultiFab& fine, amrex::MultiFab& crse, int ref_ratio); + void interpolateDensityFineToCoarse (const amrex::MultiFab& fine, + amrex::MultiFab& coarse, + int const refinement_ratio); void ExchangeWithPmlB (int lev); void ExchangeWithPmlE (int lev); diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 91174ee0b..5a51d7d13 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -137,13 +137,13 @@ WarpX::WarpX () // No valid BoxArray and DistributionMapping have been defined. // But the arrays for them have been resized. - int nlevs_max = maxLevel() + 1; + const int nlevs_max = maxLevel() + 1; istep.resize(nlevs_max, 0); nsubsteps.resize(nlevs_max, 1); #if 0 // no subcycling yet - for (int lev = 1; lev <= maxLevel(); ++lev) { + for (int lev = 1; lev < nlevs_max; ++lev) { nsubsteps[lev] = MaxRefRatio(lev-1); } #endif @@ -239,7 +239,7 @@ WarpX::WarpX () WarpX::~WarpX () { - int nlevs_max = maxLevel() +1; + const int nlevs_max = maxLevel() +1; for (int lev = 0; lev < nlevs_max; ++lev) { ClearLevel(lev); } @@ -452,7 +452,8 @@ WarpX::ReadParameters () } // Check that the coarsening_ratio can divide the blocking factor - for (int lev=0; lev<maxLevel(); lev++){ + const int nlevs_max = maxLevel(); + for (int lev=0; lev<nlevs_max; lev++){ for (int comp=0; comp<AMREX_SPACEDIM; comp++){ if ( blockingFactor(lev)[comp] % plot_coarsening_ratio != 0 ){ amrex::Abort("plot_coarsening_ratio should be an integer " |