diff options
Diffstat (limited to 'Source')
-rw-r--r-- | Source/Particles/PhysicalParticleContainer.cpp | 63 | ||||
-rw-r--r-- | Source/Particles/RigidInjectedParticleContainer.cpp | 20 | ||||
-rw-r--r-- | Source/Particles/WarpXParticleContainer.H | 12 |
3 files changed, 54 insertions, 41 deletions
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 1b4c243ac..8b7646997 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -641,8 +641,8 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) overlap_realbox.lo(2))}; // count the number of particles that each cell in overlap_box could add - Gpu::DeviceVector<int> counts(overlap_box.numPts()+1, 0); - Gpu::DeviceVector<int> offset(overlap_box.numPts()+1, 0); + Gpu::DeviceVector<int> counts(overlap_box.numPts(), 0); + Gpu::DeviceVector<int> offset(overlap_box.numPts()); auto pcounts = counts.data(); int lrrfac = rrfac; int lrefine_injection = refine_injection; @@ -674,16 +674,10 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) amrex::ignore_unused(k); #endif }); - Gpu::exclusive_scan(counts.begin(), counts.end(), offset.begin()); // Max number of new particles. All of them are created, // and invalid ones are then discarded - int max_new_particles; -#ifdef AMREX_USE_GPU - Gpu::dtoh_memcpy(&max_new_particles, offset.dataPtr()+overlap_box.numPts(), sizeof(int)); -#else - std::memcpy(&max_new_particles, offset.dataPtr()+overlap_box.numPts(), sizeof(int)); -#endif + int max_new_particles = Scan::ExclusiveSum(counts.size(), counts.data(), offset.data()); // Update NextID to include particles created in this function Long pid; @@ -913,13 +907,13 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) } }); + amrex::Gpu::synchronize(); + if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers) { - amrex::Gpu::synchronize(); wt = amrex::second() - wt; amrex::HostDevice::Atomic::Add( &(*cost)[mfi.index()], wt); } - amrex::Gpu::synchronize(); } // The function that calls this is responsible for redistributing particles. @@ -1149,9 +1143,10 @@ PhysicalParticleContainer::Evolve (int lev, } } + amrex::Gpu::synchronize(); + if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers) { - amrex::Gpu::synchronize(); wt = amrex::second() - wt; amrex::HostDevice::Atomic::Add( &(*cost)[pti.index()], wt); } @@ -1255,7 +1250,7 @@ PhysicalParticleContainer::SplitParticles (int lev) long np_split; if(split_type==0) { - np_split = pow(2, AMREX_SPACEDIM); + np_split = (AMREX_SPACEDIM == 3) ? 8 : 4; } else { np_split = 2*AMREX_SPACEDIM; } @@ -1599,8 +1594,8 @@ PhysicalParticleContainer::GetParticleSlice ( // from going out of scope after each iteration, while the kernels // may still need access to them. // Note that the destructor for WarpXParIter is synchronized. - amrex::Gpu::ManagedDeviceVector<int> FlagForPartCopy; - amrex::Gpu::ManagedDeviceVector<int> IndexForPartCopy; + amrex::Gpu::DeviceVector<int> FlagForPartCopy; + amrex::Gpu::DeviceVector<int> IndexForPartCopy; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { const Box& box = pti.validbox(); @@ -1658,9 +1653,7 @@ PhysicalParticleContainer::GetParticleSlice ( // exclusive scan to obtain location indices using flag values // These location indices are used to copy data from // src to dst when the copy-flag is set to 1. - amrex::Gpu::exclusive_scan(Flag,Flag+np,IndexLocation); - - const int total_partdiag_size = IndexLocation[np-1] + Flag[np-1]; + const int total_partdiag_size = amrex::Scan::ExclusiveSum(np,Flag,IndexLocation); // allocate array size for diagnostic particle array diagnostic_particles[lev][index].resize(total_partdiag_size); @@ -1740,6 +1733,7 @@ PhysicalParticleContainer::GetParticleSlice ( diag_uzp[loc] = uzp; } }); + Gpu::synchronize(); // because of FlagForPartCopy & IndexForPartCopy } } } @@ -1936,10 +1930,10 @@ PhysicalParticleContainer::InitIonizationModule () // Get atomic number and ionization energies from file int ion_element_id = ion_map_ids[physical_element]; ion_atomic_number = ion_atomic_numbers[ion_element_id]; - ionization_energies.resize(ion_atomic_number); + Vector<Real> h_ionization_energies(ion_atomic_number); int offset = ion_energy_offsets[ion_element_id]; for(int i=0; i<ion_atomic_number; i++){ - ionization_energies[i] = table_ionization_energies[i+offset]; + h_ionization_energies[i] = table_ionization_energies[i+offset]; } // Compute ADK prefactors (See Chen, JCP 236 (2013), equation (2)) // For now, we assume l=0 and m=0. @@ -1949,22 +1943,35 @@ PhysicalParticleContainer::InitIonizationModule () Real Ea = PhysConst::m_e * PhysConst::c*PhysConst::c /PhysConst::q_e * std::pow(PhysConst::alpha,4)/PhysConst::r_e; Real UH = table_ionization_energies[0]; - Real l_eff = std::sqrt(UH/ionization_energies[0]) - 1.; + Real l_eff = std::sqrt(UH/h_ionization_energies[0]) - 1.; const Real dt = WarpX::GetInstance().getdt(0); + ionization_energies.resize(ion_atomic_number); adk_power.resize(ion_atomic_number); adk_prefactor.resize(ion_atomic_number); adk_exp_prefactor.resize(ion_atomic_number); - for (int i=0; i<ion_atomic_number; ++i){ - Real n_eff = (i+1) * std::sqrt(UH/ionization_energies[i]); + + Gpu::copyAsync(Gpu::hostToDevice, + h_ionization_energies.begin(), h_ionization_energies.end(), + ionization_energies.begin()); + + Real const* AMREX_RESTRICT p_ionization_energies = ionization_energies.data(); + Real * AMREX_RESTRICT p_adk_power = adk_power.data(); + Real * AMREX_RESTRICT p_adk_prefactor = adk_prefactor.data(); + Real * AMREX_RESTRICT p_adk_exp_prefactor = adk_exp_prefactor.data(); + amrex::ParallelFor(ion_atomic_number, [=] AMREX_GPU_DEVICE (int i) noexcept + { + Real n_eff = (i+1) * std::sqrt(UH/p_ionization_energies[i]); Real C2 = std::pow(2,2*n_eff)/(n_eff*tgamma(n_eff+l_eff+1)*tgamma(n_eff-l_eff)); - adk_power[i] = -(2*n_eff - 1); - Real Uion = ionization_energies[i]; - adk_prefactor[i] = dt * wa * C2 * ( Uion/(2*UH) ) + p_adk_power[i] = -(2*n_eff - 1); + Real Uion = p_ionization_energies[i]; + p_adk_prefactor[i] = dt * wa * C2 * ( Uion/(2*UH) ) * std::pow(2*std::pow((Uion/UH),3./2)*Ea,2*n_eff - 1); - adk_exp_prefactor[i] = -2./3 * std::pow( Uion/UH,3./2) * Ea; - } + p_adk_exp_prefactor[i] = -2./3 * std::pow( Uion/UH,3./2) * Ea; + }); + + Gpu::synchronize(); } IonizationFilterFunc diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp index 8b8237ceb..338a23675 100644 --- a/Source/Particles/RigidInjectedParticleContainer.cpp +++ b/Source/Particles/RigidInjectedParticleContainer.cpp @@ -246,7 +246,7 @@ RigidInjectedParticleContainer::PushPX (WarpXParIter& pti, auto& uzp = attribs[PIdx::uz]; // Save the position and momenta, making copies - Gpu::ManagedDeviceVector<ParticleReal> xp_save, yp_save, zp_save; + Gpu::DeviceVector<ParticleReal> xp_save, yp_save, zp_save; RealVector uxp_save, uyp_save, uzp_save; const auto GetPosition = GetParticlePosition(pti); @@ -453,9 +453,12 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt, } // Save the position and momenta, making copies - auto uxp_save = uxp; - auto uyp_save = uyp; - auto uzp_save = uzp; + amrex::Gpu::DeviceVector<ParticleReal> uxp_save(np); + amrex::Gpu::DeviceVector<ParticleReal> uyp_save(np); + amrex::Gpu::DeviceVector<ParticleReal> uzp_save(np); + ParticleReal* const AMREX_RESTRICT ux_save = uxp_save.dataPtr(); + ParticleReal* const AMREX_RESTRICT uy_save = uyp_save.dataPtr(); + ParticleReal* const AMREX_RESTRICT uz_save = uzp_save.dataPtr(); // Loop over the particles and update their momentum const amrex::Real q = this->charge; @@ -466,6 +469,10 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt, amrex::ParallelFor( np, [=] AMREX_GPU_DEVICE (long ip) { + ux_save[ip] = uxpp[ip]; + uy_save[ip] = uypp[ip]; + uz_save[ip] = uzpp[ip]; + amrex::ParticleReal xp, yp, zp; getPosition(ip, xp, yp, zp); @@ -513,9 +520,6 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt, // Undo the push for particles not injected yet. // It is assumed that PushP will only be called on the first and last steps // and that no particles will cross zinject_plane. - const ParticleReal* const AMREX_RESTRICT ux_save = uxp_save.dataPtr(); - const ParticleReal* const AMREX_RESTRICT uy_save = uyp_save.dataPtr(); - const ParticleReal* const AMREX_RESTRICT uz_save = uzp_save.dataPtr(); const ParticleReal zz = zinject_plane_levels[lev]; amrex::ParallelFor( pti.numParticles(), [=] AMREX_GPU_DEVICE (long i) { @@ -527,6 +531,8 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt, uzpp[i] = uz_save[i]; } }); + + amrex::Gpu::synchronize(); } } } diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H index afc83c041..5032bb455 100644 --- a/Source/Particles/WarpXParticleContainer.H +++ b/Source/Particles/WarpXParticleContainer.H @@ -368,10 +368,10 @@ protected: std::string ionization_product_name; int ion_atomic_number; int ionization_initial_level = 0; - amrex::Gpu::ManagedVector<amrex::Real> ionization_energies; - amrex::Gpu::ManagedVector<amrex::Real> adk_power; - amrex::Gpu::ManagedVector<amrex::Real> adk_prefactor; - amrex::Gpu::ManagedVector<amrex::Real> adk_exp_prefactor; + amrex::Gpu::DeviceVector<amrex::Real> ionization_energies; + amrex::Gpu::DeviceVector<amrex::Real> adk_power; + amrex::Gpu::DeviceVector<amrex::Real> adk_prefactor; + amrex::Gpu::DeviceVector<amrex::Real> adk_exp_prefactor; std::string physical_element; int do_resampling = 0; @@ -402,9 +402,9 @@ protected: amrex::Vector<amrex::FArrayBox> local_jz; public: - using DataContainer = amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>; using PairIndex = std::pair<int, int>; - using TmpParticleTile = std::array<DataContainer, TmpIdx::nattribs>; + using TmpParticleTile = std::array<amrex::Gpu::DeviceVector<amrex::ParticleReal>, + TmpIdx::nattribs>; using TmpParticles = amrex::Vector<std::map<PairIndex, TmpParticleTile> >; protected: |