From 65d1c056be23365a045bbcaebf4413378c4c977d Mon Sep 17 00:00:00 2001 From: WeiqunZhang Date: Tue, 8 Sep 2020 12:36:52 -0700 Subject: Remove ManagedVector from Particles/ (#1273) --- Source/Particles/PhysicalParticleContainer.cpp | 63 ++++++++++++++------------ 1 file changed, 35 insertions(+), 28 deletions(-) (limited to 'Source/Particles/PhysicalParticleContainer.cpp') diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 1b4c243ac..8b7646997 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -641,8 +641,8 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) overlap_realbox.lo(2))}; // count the number of particles that each cell in overlap_box could add - Gpu::DeviceVector counts(overlap_box.numPts()+1, 0); - Gpu::DeviceVector offset(overlap_box.numPts()+1, 0); + Gpu::DeviceVector counts(overlap_box.numPts(), 0); + Gpu::DeviceVector offset(overlap_box.numPts()); auto pcounts = counts.data(); int lrrfac = rrfac; int lrefine_injection = refine_injection; @@ -674,16 +674,10 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) amrex::ignore_unused(k); #endif }); - Gpu::exclusive_scan(counts.begin(), counts.end(), offset.begin()); // Max number of new particles. All of them are created, // and invalid ones are then discarded - int max_new_particles; -#ifdef AMREX_USE_GPU - Gpu::dtoh_memcpy(&max_new_particles, offset.dataPtr()+overlap_box.numPts(), sizeof(int)); -#else - std::memcpy(&max_new_particles, offset.dataPtr()+overlap_box.numPts(), sizeof(int)); -#endif + int max_new_particles = Scan::ExclusiveSum(counts.size(), counts.data(), offset.data()); // Update NextID to include particles created in this function Long pid; @@ -913,13 +907,13 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) } }); + amrex::Gpu::synchronize(); + if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers) { - amrex::Gpu::synchronize(); wt = amrex::second() - wt; amrex::HostDevice::Atomic::Add( &(*cost)[mfi.index()], wt); } - amrex::Gpu::synchronize(); } // The function that calls this is responsible for redistributing particles. @@ -1149,9 +1143,10 @@ PhysicalParticleContainer::Evolve (int lev, } } + amrex::Gpu::synchronize(); + if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers) { - amrex::Gpu::synchronize(); wt = amrex::second() - wt; amrex::HostDevice::Atomic::Add( &(*cost)[pti.index()], wt); } @@ -1255,7 +1250,7 @@ PhysicalParticleContainer::SplitParticles (int lev) long np_split; if(split_type==0) { - np_split = pow(2, AMREX_SPACEDIM); + np_split = (AMREX_SPACEDIM == 3) ? 8 : 4; } else { np_split = 2*AMREX_SPACEDIM; } @@ -1599,8 +1594,8 @@ PhysicalParticleContainer::GetParticleSlice ( // from going out of scope after each iteration, while the kernels // may still need access to them. // Note that the destructor for WarpXParIter is synchronized. - amrex::Gpu::ManagedDeviceVector FlagForPartCopy; - amrex::Gpu::ManagedDeviceVector IndexForPartCopy; + amrex::Gpu::DeviceVector FlagForPartCopy; + amrex::Gpu::DeviceVector IndexForPartCopy; for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) { const Box& box = pti.validbox(); @@ -1658,9 +1653,7 @@ PhysicalParticleContainer::GetParticleSlice ( // exclusive scan to obtain location indices using flag values // These location indices are used to copy data from // src to dst when the copy-flag is set to 1. - amrex::Gpu::exclusive_scan(Flag,Flag+np,IndexLocation); - - const int total_partdiag_size = IndexLocation[np-1] + Flag[np-1]; + const int total_partdiag_size = amrex::Scan::ExclusiveSum(np,Flag,IndexLocation); // allocate array size for diagnostic particle array diagnostic_particles[lev][index].resize(total_partdiag_size); @@ -1740,6 +1733,7 @@ PhysicalParticleContainer::GetParticleSlice ( diag_uzp[loc] = uzp; } }); + Gpu::synchronize(); // because of FlagForPartCopy & IndexForPartCopy } } } @@ -1936,10 +1930,10 @@ PhysicalParticleContainer::InitIonizationModule () // Get atomic number and ionization energies from file int ion_element_id = ion_map_ids[physical_element]; ion_atomic_number = ion_atomic_numbers[ion_element_id]; - ionization_energies.resize(ion_atomic_number); + Vector h_ionization_energies(ion_atomic_number); int offset = ion_energy_offsets[ion_element_id]; for(int i=0; i