diff options
author | 2018-12-11 10:37:21 -0800 | |
---|---|---|
committer | 2018-12-11 10:37:21 -0800 | |
commit | b53d4a7ea5c3963e16727aafdbda01771af04c0d (patch) | |
tree | 33ddccf2ceb4ae6670baa4ed31b9b97cb7035473 /Source/PhysicalParticleContainer.cpp | |
parent | 4e4a84aba36b2d2168cf41077f596549180f0dd9 (diff) | |
download | WarpX-b53d4a7ea5c3963e16727aafdbda01771af04c0d.tar.gz WarpX-b53d4a7ea5c3963e16727aafdbda01771af04c0d.tar.zst WarpX-b53d4a7ea5c3963e16727aafdbda01771af04c0d.zip |
fix conflicts for merge revert
Diffstat (limited to 'Source/PhysicalParticleContainer.cpp')
-rw-r--r-- | Source/PhysicalParticleContainer.cpp | 729 |
1 files changed, 293 insertions, 436 deletions
diff --git a/Source/PhysicalParticleContainer.cpp b/Source/PhysicalParticleContainer.cpp index c4fdd4464..df0ee3b3c 100644 --- a/Source/PhysicalParticleContainer.cpp +++ b/Source/PhysicalParticleContainer.cpp @@ -10,62 +10,6 @@ using namespace amrex; -long PhysicalParticleContainer:: -NumParticlesToAdd(const Box& overlap_box, const RealBox& overlap_realbox, - const RealBox& tile_realbox, const RealBox& particle_real_box) -{ - const int lev = 0; - const Geometry& geom = Geom(lev); - int num_ppc = plasma_injector->num_particles_per_cell; - const Real* dx = geom.CellSize(); - - long np = 0; - const auto& overlap_corner = overlap_realbox.lo(); - for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv)) - { - int fac; - if (injected) { -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; - Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; -#endif - fac = GetRefineFac(x, y, z); - } else { - fac = 1.0; - } - - int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac); - for (int i_part=0; i_part<ref_num_ppc;i_part++) { - std::array<Real, 3> r; - plasma_injector->getPositionUnitBox(r, i_part, fac); -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1]; - Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1]; -#endif - // If the new particle is not inside the tile box, - // go to the next generated particle. -#if ( AMREX_SPACEDIM == 3 ) - if(!tile_realbox.contains( RealVect{x, y, z} )) continue; -#elif ( AMREX_SPACEDIM == 2 ) - if(!tile_realbox.contains( RealVect{x, z} )) continue; -#endif - ++np; - } - } - - return np; -} - PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int ispecies, const std::string& name) : WarpXParticleContainer(amr_core, ispecies), @@ -78,7 +22,7 @@ PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int isp ParmParse pp(species_name); pp.query("boost_adjust_transverse_positions", boost_adjust_transverse_positions); - pp.query("do_backward_propagation", do_backward_propagation); + pp.query("do_backward_propagation", do_backward_propagation); } void PhysicalParticleContainer::InitData() @@ -230,19 +174,9 @@ PhysicalParticleContainer::AddParticles (int lev) * but its boundaries need not be aligned with the actual cells of the simulation) */ void -PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) +PhysicalParticleContainer::AddPlasma(int lev, RealBox part_realbox) { -#ifdef AMREX_USE_GPU - AddPlasmaGPU(lev, part_realbox); -#else - AddPlasmaCPU(lev, part_realbox); -#endif -} - -void -PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox) -{ - BL_PROFILE("PhysicalParticleContainer::AddPlasmaCPU"); + BL_PROFILE("PhysicalParticleContainer::AddPlasma"); // If no part_realbox is provided, initialize particles in the whole domain const Geometry& geom = Geom(lev); @@ -431,264 +365,18 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox) attribs[PIdx::uzold] = u[2]; #endif - AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs); + AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs); } } if (cost) { - wt = (amrex::second() - wt) / tile_box.d_numPts(); - FArrayBox* costfab = cost->fabPtr(mfi); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tile_box, work_box, - { - costfab->plus(wt, work_box); - }); + wt = (amrex::second() - wt) / tile_box.d_numPts(); + (*cost)[mfi].plus(wt, tile_box); } } } } -#ifdef AMREX_USE_GPU -void -PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox) -{ - BL_PROFILE("PhysicalParticleContainer::AddPlasmaGPU"); - - // If no part_realbox is provided, initialize particles in the whole domain - const Geometry& geom = Geom(lev); - if (!part_realbox.ok()) part_realbox = geom.ProbDomain(); - - int num_ppc = plasma_injector->num_particles_per_cell; - - const Real* dx = geom.CellSize(); - - Real scale_fac; -#if AMREX_SPACEDIM==3 - scale_fac = dx[0]*dx[1]*dx[2]/num_ppc; -#elif AMREX_SPACEDIM==2 - scale_fac = dx[0]*dx[1]/num_ppc; -#endif - -#ifdef _OPENMP - // First touch all tiles in the map in serial - for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) { - const int grid_id = mfi.index(); - const int tile_id = mfi.LocalTileIndex(); - GetParticles(lev)[std::make_pair(grid_id, tile_id)]; - } -#endif - - MultiFab* cost = WarpX::getCosts(lev); - - if ( (not m_refined_injection_mask) and WarpX::do_moving_window) - { - Box mask_box = geom.Domain(); - mask_box.setSmall(WarpX::moving_window_dir, 0); - mask_box.setBig(WarpX::moving_window_dir, 0); - m_refined_injection_mask.reset( new IArrayBox(mask_box)); - m_refined_injection_mask->setVal(-1); - } - - MFItInfo info; - if (do_tiling) { - info.EnableTiling(tile_size); - } - info.SetDynamic(true); - -#ifdef _OPENMP -#pragma omp parallel if (not WarpX::serialize_ics) -#endif - { - std::array<Real,PIdx::nattribs> attribs; - attribs.fill(0.0); - - // Loop through the tiles - for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) { - - Real wt = amrex::second(); - - const Box& tile_box = mfi.tilebox(); - const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev); - - // Find the cells of part_box that overlap with tile_realbox - // If there is no overlap, just go to the next tile in the loop - RealBox overlap_realbox; - Box overlap_box; - Real ncells_adjust; - bool no_overlap = 0; - - for (int dir=0; dir<AMREX_SPACEDIM; dir++) { - if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) { - ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] ); - overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]); - } else { - no_overlap = 1; break; - } - if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) { - ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] ); - overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]); - } else { - no_overlap = 1; break; - } - // Count the number of cells in this direction in overlap_realbox - overlap_box.setSmall( dir, 0 ); - overlap_box.setBig( dir, - int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1); - } - if (no_overlap == 1) { - continue; // Go to the next tile - } - - const int grid_id = mfi.index(); - const int tile_id = mfi.LocalTileIndex(); - - Cuda::HostVector<ParticleType> host_particles; - std::array<Cuda::HostVector<Real>, PIdx::nattribs> host_attribs; - - // Loop through the cells of overlap_box and inject - // the corresponding particles - const auto& overlap_corner = overlap_realbox.lo(); - for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv)) - { - int fac; - if (injected) { -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; - Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; -#endif - fac = GetRefineFac(x, y, z); - } else { - fac = 1.0; - } - - int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac); - for (int i_part=0; i_part<ref_num_ppc;i_part++) { - std::array<Real, 3> r; - plasma_injector->getPositionUnitBox(r, i_part, fac); -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1]; - Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1]; -#endif - // If the new particle is not inside the tile box, - // go to the next generated particle. -#if ( AMREX_SPACEDIM == 3 ) - if(!tile_realbox.contains( RealVect{x, y, z} )) continue; -#elif ( AMREX_SPACEDIM == 2 ) - if(!tile_realbox.contains( RealVect{x, z} )) continue; -#endif - - Real dens; - std::array<Real, 3> u; - if (WarpX::gamma_boost == 1.){ - // Lab-frame simulation - // If the particle is not within the species's - // xmin, xmax, ymin, ymax, zmin, zmax, go to - // the next generated particle. - if (!plasma_injector->insideBounds(x, y, z)) continue; - plasma_injector->getMomentum(u, x, y, z); - dens = plasma_injector->getDensity(x, y, z); - } else { - // Boosted-frame simulation - Real c = PhysConst::c; - Real gamma_boost = WarpX::gamma_boost; - Real beta_boost = WarpX::beta_boost; - // Since the user provides the density distribution - // at t_lab=0 and in the lab-frame coordinates, - // we need to find the lab-frame position of this - // particle at t_lab=0, from its boosted-frame coordinates - // Assuming ballistic motion, this is given by: - // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) ) - // where betaz_lab is the speed of the particle in the lab frame - // - // In order for this equation to be solvable, betaz_lab - // is explicitly assumed to have no dependency on z0_lab - plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency - // At this point u is the lab-frame momentum - // => Apply the above formula for z0_lab - Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) ); - Real betaz_lab = u[2]/gamma_lab/c; - Real t = WarpX::GetInstance().gett_new(lev); - Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) ); - // If the particle is not within the lab-frame zmin, zmax, etc. - // go to the next generated particle. - if (!plasma_injector->insideBounds(x, y, z0_lab)) continue; - // call `getDensity` with lab-frame parameters - dens = plasma_injector->getDensity(x, y, z0_lab); - // At this point u and dens are the lab-frame quantities - // => Perform Lorentz transform - dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab ); - u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab ); - } - attribs[PIdx::w ] = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac)); - attribs[PIdx::ux] = u[0]; - attribs[PIdx::uy] = u[1]; - attribs[PIdx::uz] = u[2]; - -#ifdef WARPX_STORE_OLD_PARTICLE_ATTRIBS - attribs[PIdx::xold] = x; - attribs[PIdx::yold] = y; - attribs[PIdx::zold] = z; - - attribs[PIdx::uxold] = u[0]; - attribs[PIdx::uyold] = u[1]; - attribs[PIdx::uzold] = u[2]; -#endif - - ParticleType p; - p.id() = ParticleType::NextID(); - p.cpu() = ParallelDescriptor::MyProc(); -#if (AMREX_SPACEDIM == 3) - p.pos(0) = x; - p.pos(1) = y; - p.pos(2) = z; -#elif (AMREX_SPACEDIM == 2) - p.pos(0) = x; - p.pos(1) = z; -#endif - - host_particles.push_back(p); - for (int kk = 0; kk < PIdx::nattribs; ++kk) - host_attribs[kk].push_back(attribs[kk]); - } - } - - auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)]; - auto old_size = particle_tile.GetArrayOfStructs().size(); - auto new_size = old_size + host_particles.size(); - particle_tile.resize(new_size); - - thrust::copy(host_particles.begin(), - host_particles.end(), - particle_tile.GetArrayOfStructs().begin() + old_size); - - for (int kk = 0; kk < PIdx::nattribs; ++kk) { - thrust::copy(host_attribs[kk].begin(), - host_attribs[kk].end(), - particle_tile.GetStructOfArrays().GetRealData(kk).begin() + old_size); - } - - if (cost) { - wt = (amrex::second() - wt) / tile_box.d_numPts(); - FArrayBox* costfab = cost->fabPtr(mfi); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tile_box, work_box, - { - costfab->plus(wt, work_box); - }); - } - } - } -} -#endif - #ifdef WARPX_DO_ELECTROSTATIC void PhysicalParticleContainer:: @@ -737,10 +425,10 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, const FArrayBox& ezfab = (*E[lev][2])[pti]; #endif - WRPX_INTERPOLATE_CIC(particles.dataPtr(), nstride, np, - Exp.dataPtr(), Eyp.dataPtr(), + WRPX_INTERPOLATE_CIC(particles.data(), nstride, np, + Exp.data(), Eyp.data(), #if AMREX_SPACEDIM == 3 - Ezp.dataPtr(), + Ezp.data(), #endif exfab.dataPtr(), eyfab.dataPtr(), #if AMREX_SPACEDIM == 3 @@ -807,10 +495,10 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, #endif if (lev == 0) { - WRPX_INTERPOLATE_CIC(particles.dataPtr(), nstride, np, - Exp.dataPtr(), Eyp.dataPtr(), + WRPX_INTERPOLATE_CIC(particles.data(), nstride, np, + Exp.data(), Eyp.data(), #if AMREX_SPACEDIM == 3 - Ezp.dataPtr(), + Ezp.data(), #endif exfab.dataPtr(), eyfab.dataPtr(), #if AMREX_SPACEDIM == 3 @@ -827,10 +515,10 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, const Box& coarse_box = coarsened_fine_BA[pti]; const Real* coarse_dx = Geom(0).CellSize(); - WRPX_INTERPOLATE_CIC_TWO_LEVELS(particles.dataPtr(), nstride, np, - Exp.dataPtr(), Eyp.dataPtr(), + WRPX_INTERPOLATE_CIC_TWO_LEVELS(particles.data(), nstride, np, + Exp.data(), Eyp.data(), #if AMREX_SPACEDIM == 3 - Ezp.dataPtr(), + Ezp.data(), #endif exfab.dataPtr(), eyfab.dataPtr(), #if AMREX_SPACEDIM == 3 @@ -885,14 +573,14 @@ PhysicalParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<Mul // // Particle Push // - WRPX_PUSH_LEAPFROG(particles.dataPtr(), nstride, np, - uxp.dataPtr(), uyp.dataPtr(), + WRPX_PUSH_LEAPFROG(particles.data(), nstride, np, + uxp.data(), uyp.data(), #if AMREX_SPACEDIM == 3 - uzp.dataPtr(), + uzp.data(), #endif - Exp.dataPtr(), Eyp.dataPtr(), + Exp.data(), Eyp.data(), #if AMREX_SPACEDIM == 3 - Ezp.dataPtr(), + Ezp.data(), #endif &this->charge, &this->mass, &dt, prob_domain.lo(), prob_domain.hi()); @@ -919,7 +607,7 @@ PhysicalParticleContainer::FieldGather (int lev, #pragma omp parallel #endif { - Cuda::DeviceVector<Real> xp, yp, zp; + Vector<Real> xp, yp, zp; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -965,14 +653,12 @@ PhysicalParticleContainer::FieldGather (int lev, // Field Gather // const int ll4symtry = false; + const int l_lower_order_in_v = warpx_l_lower_order_in_v(); long lvect_fieldgathe = 64; warpx_geteb_energy_conserving( - &np, - xp.dataPtr(), - yp.dataPtr(), - zp.dataPtr(), - Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(), - Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(), + &np, xp.data(), yp.data(), zp.data(), + Exp.data(),Eyp.data(),Ezp.data(), + Bxp.data(),Byp.data(),Bzp.data(), ixyzmin, &xyzmin[0], &xyzmin[1], &xyzmin[2], &dx[0], &dx[1], &dx[2], @@ -983,17 +669,13 @@ PhysicalParticleContainer::FieldGather (int lev, BL_TO_FORTRAN_ANYD(bxfab), BL_TO_FORTRAN_ANYD(byfab), BL_TO_FORTRAN_ANYD(bzfab), - &ll4symtry, &WarpX::l_lower_order_in_v, + &ll4symtry, &l_lower_order_in_v, &lvect_fieldgathe, &WarpX::field_gathering_algo); if (cost) { const Box& tbx = pti.tilebox(); wt = (amrex::second() - wt) / tbx.d_numPts(); - FArrayBox* costfab = cost->fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box, - { - costfab->plus(wt, work_box); - }); + (*cost)[pti].plus(wt, tbx); } } } @@ -1014,15 +696,20 @@ PhysicalParticleContainer::Evolve (int lev, BL_PROFILE_VAR_NS("PPC::Evolve::Copy", blp_copy); BL_PROFILE_VAR_NS("PICSAR::FieldGather", blp_pxr_fg); BL_PROFILE_VAR_NS("PICSAR::ParticlePush", blp_pxr_pp); + BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); + BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); BL_PROFILE_VAR_NS("PPC::Evolve::partition", blp_partition); - + const std::array<Real,3>& dx = WarpX::CellSize(lev); const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0)); const auto& mypc = WarpX::GetInstance().GetPartContainer(); const int nstencilz_fdtd_nci_corr = mypc.nstencilz_fdtd_nci_corr; - BL_ASSERT(OnSameGrids(lev,jx)); + // WarpX assumes the same number of guard cells for Jx, Jy, Jz + long ngJ = jx.nGrow(); + + BL_ASSERT(OnSameGrids(lev,Ex)); MultiFab* cost = WarpX::getCosts(lev); @@ -1032,26 +719,17 @@ PhysicalParticleContainer::Evolve (int lev, bool has_buffer = cEx || cjx; #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel #endif { -#ifdef _OPENMP - int thread_num = omp_get_thread_num(); -#else - int thread_num = 0; -#endif - - if (local_rho[thread_num] == nullptr) local_rho[thread_num].reset( new amrex::FArrayBox()); - if (local_jx[thread_num] == nullptr) local_jx[thread_num].reset( new amrex::FArrayBox()); - if (local_jy[thread_num] == nullptr) local_jy[thread_num].reset( new amrex::FArrayBox()); - if (local_jz[thread_num] == nullptr) local_jz[thread_num].reset( new amrex::FArrayBox()); - + Vector<Real> xp, yp, zp, giv; + FArrayBox local_rho, local_jx, local_jy, local_jz; FArrayBox filtered_Ex, filtered_Ey, filtered_Ez; FArrayBox filtered_Bx, filtered_By, filtered_Bz; std::vector<bool> inexflag; Vector<long> pid; - RealVector tmp; - ParticleVector particle_tmp; + Vector<Real> tmp; + Vector<ParticleType> particle_tmp; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -1082,7 +760,7 @@ PhysicalParticleContainer::Evolve (int lev, FArrayBox const* byfab = &(By[pti]); FArrayBox const* bzfab = &(Bz[pti]); - if (WarpX::use_fdtd_nci_corr) + if (warpx_use_fdtd_nci_corr()) { #if (AMREX_SPACEDIM == 2) const Box& tbox = amrex::grow(pti.tilebox(),{static_cast<int>(WarpX::nox), @@ -1145,6 +823,10 @@ PhysicalParticleContainer::Evolve (int lev, #endif } + FArrayBox& jxfab = jx[pti]; + FArrayBox& jyfab = jy[pti]; + FArrayBox& jzfab = jz[pti]; + Exp.assign(np,0.0); Eyp.assign(np,0.0); Ezp.assign(np,0.0); @@ -1152,7 +834,7 @@ PhysicalParticleContainer::Evolve (int lev, Byp.assign(np,WarpX::B_external[1]); Bzp.assign(np,WarpX::B_external[2]); - m_giv[thread_num].resize(np); + giv.resize(np); long nfine_current = np; long nfine_gather = np; @@ -1251,33 +933,121 @@ PhysicalParticleContainer::Evolve (int lev, // copy data from particle container to temp arrays // BL_PROFILE_VAR_START(blp_copy); - pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); + pti.GetPosition(xp, yp, zp); BL_PROFILE_VAR_STOP(blp_copy); - if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev); - + const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); + const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); + const int* ixyzmin_grid = box.loVect(); + + long lvect = 8; + + auto depositCharge = [&] (MultiFab* rhomf, MultiFab* crhomf, int icomp) + { + long ngRho = rhomf->nGrow(); + Real* data_ptr; + Box tile_box = convert(pti.tilebox(), IntVect::TheUnitVector()); + const int *rholen; + + if (np_current > 0) + { + FArrayBox& rhofab = (*rhomf)[pti]; + const std::array<Real, 3>& xyzmin = xyzmin_tile; + tile_box.grow(ngRho); + local_rho.resize(tile_box); + local_rho = 0.0; + data_ptr = local_rho.dataPtr(); + rholen = local_rho.length(); + +#if (AMREX_SPACEDIM == 3) + const long nx = rholen[0]-1-2*ngRho; + const long ny = rholen[1]-1-2*ngRho; + const long nz = rholen[2]-1-2*ngRho; +#else + const long nx = rholen[0]-1-2*ngRho; + const long ny = 0; + const long nz = rholen[1]-1-2*ngRho; +#endif + warpx_charge_deposition(data_ptr, &np_current, + xp.data(), yp.data(), zp.data(), wp.data(), + &this->charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dx[0], &dx[1], &dx[2], &nx, &ny, &nz, + &ngRho, &ngRho, &ngRho, + &WarpX::nox,&WarpX::noy,&WarpX::noz, + &lvect, &WarpX::charge_deposition_algo); + + const int ncomp = 1; + amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho), + BL_TO_FORTRAN_N_3D(rhofab,icomp), ncomp); + } + + if (np_current < np) + { + const IntVect& ref_ratio = WarpX::RefRatio(lev-1); + const Box& ctilebox = amrex::coarsen(pti.tilebox(), ref_ratio); + const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); + + tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector()); + tile_box.grow(ngRho); + + local_rho.resize(tile_box); + + local_rho = 0.0; + + data_ptr = local_rho.dataPtr(); + rholen = local_rho.length(); + +#if (AMREX_SPACEDIM == 3) + const long nx = rholen[0]-1-2*ngRho; + const long ny = rholen[1]-1-2*ngRho; + const long nz = rholen[2]-1-2*ngRho; +#else + const long nx = rholen[0]-1-2*ngRho; + const long ny = 0; + const long nz = rholen[1]-1-2*ngRho; +#endif + + long ncrse = np - nfine_current; + warpx_charge_deposition(data_ptr, &ncrse, + xp.data() + nfine_current, + yp.data() + nfine_current, + zp.data() + nfine_current, + wp.data() + nfine_current, + &this->charge, + &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], + &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz, + &ngRho, &ngRho, &ngRho, + &WarpX::nox,&WarpX::noy,&WarpX::noz, + &lvect, &WarpX::charge_deposition_algo); + + FArrayBox& crhofab = (*crhomf)[pti]; + + const int ncomp = 1; + amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho), + BL_TO_FORTRAN_N_3D(crhofab,icomp), ncomp); + } + }; + + if (rho) depositCharge(rho, crho, 0); + if (! do_not_push) { // // Field Gather of Aux Data (i.e., the full solution) // const int ll4symtry = false; + const int l_lower_order_in_v = warpx_l_lower_order_in_v(); long lvect_fieldgathe = 64; - const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); - const int* ixyzmin_grid = box.loVect(); - const long np_gather = (cEx) ? nfine_gather : np; BL_PROFILE_VAR_START(blp_pxr_fg); warpx_geteb_energy_conserving( - &np_gather, - m_xp[thread_num].dataPtr(), - m_yp[thread_num].dataPtr(), - m_zp[thread_num].dataPtr(), - Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(), - Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(), + &np_gather, xp.data(), yp.data(), zp.data(), + Exp.data(),Eyp.data(),Ezp.data(), + Bxp.data(),Byp.data(),Bzp.data(), ixyzmin_grid, &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2], &dx[0], &dx[1], &dx[2], @@ -1288,7 +1058,7 @@ PhysicalParticleContainer::Evolve (int lev, BL_TO_FORTRAN_ANYD(*bxfab), BL_TO_FORTRAN_ANYD(*byfab), BL_TO_FORTRAN_ANYD(*bzfab), - &ll4symtry, &WarpX::l_lower_order_in_v, + &ll4symtry, &l_lower_order_in_v, &lvect_fieldgathe, &WarpX::field_gathering_algo); if (np_gather < np) @@ -1305,7 +1075,7 @@ PhysicalParticleContainer::Evolve (int lev, const FArrayBox* cbyfab = &(*cBy)[pti]; const FArrayBox* cbzfab = &(*cBz)[pti]; - if (WarpX::use_fdtd_nci_corr) + if (warpx_use_fdtd_nci_corr()) { #if (AMREX_SPACEDIM == 2) const Box& tbox = amrex::grow(cbox,{static_cast<int>(WarpX::nox), @@ -1369,12 +1139,9 @@ PhysicalParticleContainer::Evolve (int lev, long ncrse = np - nfine_gather; warpx_geteb_energy_conserving( - &ncrse, - m_xp[thread_num].dataPtr()+nfine_gather, - m_yp[thread_num].dataPtr()+nfine_gather, - m_zp[thread_num].dataPtr()+nfine_gather, - Exp.dataPtr()+nfine_gather, Eyp.dataPtr()+nfine_gather, Ezp.dataPtr()+nfine_gather, - Bxp.dataPtr()+nfine_gather, Byp.dataPtr()+nfine_gather, Bzp.dataPtr()+nfine_gather, + &ncrse, xp.data()+nfine_gather, yp.data()+nfine_gather, zp.data()+nfine_gather, + Exp.data()+nfine_gather, Eyp.data()+nfine_gather, Ezp.data()+nfine_gather, + Bxp.data()+nfine_gather, Byp.data()+nfine_gather, Bzp.data()+nfine_gather, cixyzmin_grid, &cxyzmin_grid[0], &cxyzmin_grid[1], &cxyzmin_grid[2], &cdx[0], &cdx[1], &cdx[2], @@ -1385,7 +1152,7 @@ PhysicalParticleContainer::Evolve (int lev, BL_TO_FORTRAN_ANYD(*cbxfab), BL_TO_FORTRAN_ANYD(*cbyfab), BL_TO_FORTRAN_ANYD(*cbzfab), - &ll4symtry, &WarpX::l_lower_order_in_v, + &ll4symtry, &l_lower_order_in_v, &lvect_fieldgathe, &WarpX::field_gathering_algo); } @@ -1395,34 +1162,141 @@ PhysicalParticleContainer::Evolve (int lev, // Particle Push // BL_PROFILE_VAR_START(blp_pxr_pp); - PushPX(pti, m_xp[thread_num], m_yp[thread_num], m_zp[thread_num], - m_giv[thread_num], dt); + PushPX(pti, xp, yp, zp, giv, dt); BL_PROFILE_VAR_STOP(blp_pxr_pp); // - // Current Deposition + // Current Deposition onto fine patch // - DepositCurrent(pti, wp, uxp, uyp, uzp, jx, jy, jz, - cjx, cjy, cjz, np_current, np, thread_num, lev, dt); - + + BL_PROFILE_VAR_START(blp_pxr_cd); + Real *jx_ptr, *jy_ptr, *jz_ptr; + const int *jxntot, *jyntot, *jzntot; + Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag); + Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag); + Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag); + Box gtbx, gtby, gtbz; + + const std::array<Real, 3>& xyzmin = xyzmin_tile; + + if (np_current > 0) + { + tbx.grow(ngJ); + tby.grow(ngJ); + tbz.grow(ngJ); + + local_jx.resize(tbx); + local_jy.resize(tby); + local_jz.resize(tbz); + + local_jx = 0.0; + local_jy = 0.0; + local_jz = 0.0; + + jx_ptr = local_jx.dataPtr(); + jy_ptr = local_jy.dataPtr(); + jz_ptr = local_jz.dataPtr(); + + jxntot = local_jx.length(); + jyntot = local_jy.length(); + jzntot = local_jz.length(); + + warpx_current_deposition( + jx_ptr, &ngJ, jxntot, + jy_ptr, &ngJ, jyntot, + jz_ptr, &ngJ, jzntot, + &np_current, xp.data(), yp.data(), zp.data(), + uxp.data(), uyp.data(), uzp.data(), + giv.data(), wp.data(), &this->charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dt, &dx[0], &dx[1], &dx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, + &lvect,&WarpX::current_deposition_algo); + + BL_PROFILE_VAR_STOP(blp_pxr_cd); + + BL_PROFILE_VAR_START(blp_accumulate); + const int ncomp = 1; + amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jx), + BL_TO_FORTRAN_3D(jxfab), ncomp); + + amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jy), + BL_TO_FORTRAN_3D(jyfab), ncomp); + + amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jz), + BL_TO_FORTRAN_3D(jzfab), ncomp); + BL_PROFILE_VAR_STOP(blp_accumulate); + } + + if (np_current < np) + { + const IntVect& ref_ratio = WarpX::RefRatio(lev-1); + const Box& ctilebox = amrex::coarsen(pti.tilebox(),ref_ratio); + const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); + + tbx = amrex::convert(ctilebox, WarpX::jx_nodal_flag); + tby = amrex::convert(ctilebox, WarpX::jy_nodal_flag); + tbz = amrex::convert(ctilebox, WarpX::jz_nodal_flag); + tbx.grow(ngJ); + tby.grow(ngJ); + tbz.grow(ngJ); + + local_jx.resize(tbx); + local_jy.resize(tby); + local_jz.resize(tbz); + + local_jx = 0.0; + local_jy = 0.0; + local_jz = 0.0; + + jx_ptr = local_jx.dataPtr(); + jy_ptr = local_jy.dataPtr(); + jz_ptr = local_jz.dataPtr(); + + jxntot = local_jx.length(); + jyntot = local_jy.length(); + jzntot = local_jz.length(); + + long ncrse = np - nfine_current; + warpx_current_deposition( + jx_ptr, &ngJ, jxntot, + jy_ptr, &ngJ, jyntot, + jz_ptr, &ngJ, jzntot, + &ncrse, xp.data()+nfine_current, yp.data()+nfine_current, zp.data()+nfine_current, + uxp.data()+nfine_current, uyp.data()+nfine_current, uzp.data()+nfine_current, + giv.data()+nfine_current, wp.data()+nfine_current, &this->charge, + &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], + &dt, &cdx[0], &cdx[1], &cdx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, + &lvect,&WarpX::current_deposition_algo); + + FArrayBox& cjxfab = (*cjx)[pti]; + FArrayBox& cjyfab = (*cjy)[pti]; + FArrayBox& cjzfab = (*cjz)[pti]; + + const int ncomp = 1; + amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jx), + BL_TO_FORTRAN_3D(cjxfab), ncomp); + amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jy), + BL_TO_FORTRAN_3D(cjyfab), ncomp); + amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jz), + BL_TO_FORTRAN_3D(cjzfab), ncomp); + } + // // copy particle data back // BL_PROFILE_VAR_START(blp_copy); - pti.SetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); + pti.SetPosition(xp, yp, zp); BL_PROFILE_VAR_STOP(blp_copy); } - - if (rho) DepositCharge(pti, wp, rho, crho, 1, np_current, np, thread_num, lev); + + if (rho) depositCharge(rho, crho, 1); if (cost) { const Box& tbx = pti.tilebox(); wt = (amrex::second() - wt) / tbx.d_numPts(); - FArrayBox* costfab = cost->fabPtr(pti); - AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box, - { - costfab->plus(wt, work_box); - }); + (*cost)[pti].plus(wt, tbx); } } } @@ -1430,10 +1304,8 @@ PhysicalParticleContainer::Evolve (int lev, void PhysicalParticleContainer::PushPX(WarpXParIter& pti, - Cuda::DeviceVector<Real>& xp, - Cuda::DeviceVector<Real>& yp, - Cuda::DeviceVector<Real>& zp, - Cuda::DeviceVector<Real>& giv, + Vector<Real>& xp, Vector<Real>& yp, Vector<Real>& zp, + Vector<Real>& giv, Real dt) { @@ -1458,19 +1330,15 @@ PhysicalParticleContainer::PushPX(WarpXParIter& pti, auto& uypold = attribs[PIdx::uyold]; auto& uzpold = attribs[PIdx::uzold]; - warpx_copy_attribs(&np, xp.dataPtr(), yp.dataPtr(), zp.dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - xpold.dataPtr(), ypold.dataPtr(), zpold.dataPtr(), - uxpold.dataPtr(), uypold.dataPtr(), uzpold.dataPtr()); + warpx_copy_attribs(&np, xp.data(), yp.data(), zp.data(), + uxp.data(), uyp.data(), uzp.data(), + xpold.data(), ypold.data(), zpold.data(), + uxpold.data(), uypold.data(), uzpold.data()); #endif - warpx_particle_pusher(&np, - xp.dataPtr(), - yp.dataPtr(), - zp.dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - giv.dataPtr(), + warpx_particle_pusher(&np, xp.data(), yp.data(), zp.data(), + uxp.data(), uyp.data(), uzp.data(), giv.data(), Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(), Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(), &this->charge, &this->mass, &dt, @@ -1483,8 +1351,6 @@ PhysicalParticleContainer::PushP (int lev, Real dt, const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez, const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz) { - BL_PROFILE("PhysicalParticleContainer::PushP"); - if (do_not_push) return; const std::array<Real,3>& dx = WarpX::CellSize(lev); @@ -1493,11 +1359,8 @@ PhysicalParticleContainer::PushP (int lev, Real dt, #pragma omp parallel #endif { -#ifdef _OPENMP - int thread_num = omp_get_thread_num(); -#else - int thread_num = 0; -#endif + Vector<Real> xp, yp, zp, giv; + for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { const Box& box = pti.validbox(); @@ -1531,25 +1394,23 @@ PhysicalParticleContainer::PushP (int lev, Real dt, Byp.assign(np,WarpX::B_external[1]); Bzp.assign(np,WarpX::B_external[2]); - m_giv[thread_num].resize(np); + giv.resize(np); // // copy data from particle container to temp arrays // - pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); + pti.GetPosition(xp, yp, zp); const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); const int* ixyzmin_grid = box.loVect(); const int ll4symtry = false; + const int l_lower_order_in_v = true; long lvect_fieldgathe = 64; warpx_geteb_energy_conserving( - &np, - m_xp[thread_num].dataPtr(), - m_yp[thread_num].dataPtr(), - m_zp[thread_num].dataPtr(), - Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(), - Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(), + &np, xp.data(), yp.data(), zp.data(), + Exp.data(),Eyp.data(),Ezp.data(), + Bxp.data(),Byp.data(),Bzp.data(), ixyzmin_grid, &xyzmin_grid[0], &xyzmin_grid[1], &xyzmin_grid[2], &dx[0], &dx[1], &dx[2], @@ -1560,15 +1421,11 @@ PhysicalParticleContainer::PushP (int lev, Real dt, BL_TO_FORTRAN_ANYD(bxfab), BL_TO_FORTRAN_ANYD(byfab), BL_TO_FORTRAN_ANYD(bzfab), - &ll4symtry, &WarpX::l_lower_order_in_v, + &ll4symtry, &l_lower_order_in_v, &lvect_fieldgathe, &WarpX::field_gathering_algo); - warpx_particle_pusher_momenta(&np, - m_xp[thread_num].dataPtr(), - m_yp[thread_num].dataPtr(), - m_zp[thread_num].dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - m_giv[thread_num].dataPtr(), + warpx_particle_pusher_momenta(&np, xp.data(), yp.data(), zp.data(), + uxp.data(), uyp.data(), uzp.data(), giv.data(), Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(), Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(), &this->charge, &this->mass, &dt, @@ -1625,7 +1482,7 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real #pragma omp parallel #endif { - RealVector xp_new, yp_new, zp_new; + Vector<Real> xp_new, yp_new, zp_new; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { @@ -1758,7 +1615,7 @@ int PhysicalParticleContainer::GetRefineFac(const Real x, const Real y, const Re stretched_boxes.push_back(bx); } - BoxArray stretched_ba(stretched_boxes.dataPtr(), stretched_boxes.size()); + BoxArray stretched_ba(stretched_boxes.data(), stretched_boxes.size()); const int num_ghost = 0; if ( stretched_ba.intersects(Box(iv, iv), num_ghost) ) |