diff options
author | 2022-12-19 17:59:04 -0800 | |
---|---|---|
committer | 2022-12-19 17:59:04 -0800 | |
commit | fe3e6eb4474be8c00bc3f42fdd22ea0a27329570 (patch) | |
tree | 08b854d26951a8a58c55f9dc8cad6ce49b119ff8 | |
parent | a080c13de54b94ad24f606f94075a6ac4176ea06 (diff) | |
download | WarpX-fe3e6eb4474be8c00bc3f42fdd22ea0a27329570.tar.gz WarpX-fe3e6eb4474be8c00bc3f42fdd22ea0a27329570.tar.zst WarpX-fe3e6eb4474be8c00bc3f42fdd22ea0a27329570.zip |
managed memory for user defined attribs in addplasma (#3576)
-rw-r--r-- | Source/Particles/PhysicalParticleContainer.cpp | 91 |
1 files changed, 66 insertions, 25 deletions
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 18cf17162..017be509b 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -1059,23 +1059,43 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) // user-defined integer and real attributes const int n_user_int_attribs = m_user_int_attribs.size(); const int n_user_real_attribs = m_user_real_attribs.size(); - amrex::Gpu::DeviceVector<int*> pa_user_int(n_user_int_attribs); - amrex::Gpu::DeviceVector<ParticleReal*> pa_user_real(n_user_real_attribs); - amrex::Gpu::DeviceVector< amrex::ParserExecutor<7> > user_int_attrib_parserexec(n_user_int_attribs); - amrex::Gpu::DeviceVector< amrex::ParserExecutor<7> > user_real_attrib_parserexec(n_user_real_attribs); + amrex::Gpu::PinnedVector<int*> pa_user_int_pinned(n_user_int_attribs); + amrex::Gpu::PinnedVector<ParticleReal*> pa_user_real_pinned(n_user_real_attribs); + amrex::Gpu::PinnedVector< amrex::ParserExecutor<7> > user_int_attrib_parserexec_pinned(n_user_int_attribs); + amrex::Gpu::PinnedVector< amrex::ParserExecutor<7> > user_real_attrib_parserexec_pinned(n_user_real_attribs); for (int ia = 0; ia < n_user_int_attribs; ++ia) { - pa_user_int[ia] = soa.GetIntData(particle_icomps[m_user_int_attribs[ia]]).data() + old_size; - user_int_attrib_parserexec[ia] = m_user_int_attrib_parser[ia]->compile<7>(); + pa_user_int_pinned[ia] = soa.GetIntData(particle_icomps[m_user_int_attribs[ia]]).data() + old_size; + user_int_attrib_parserexec_pinned[ia] = m_user_int_attrib_parser[ia]->compile<7>(); } for (int ia = 0; ia < n_user_real_attribs; ++ia) { - pa_user_real[ia] = soa.GetRealData(particle_comps[m_user_real_attribs[ia]]).data() + old_size; - user_real_attrib_parserexec[ia] = m_user_real_attrib_parser[ia]->compile<7>(); + pa_user_real_pinned[ia] = soa.GetRealData(particle_comps[m_user_real_attribs[ia]]).data() + old_size; + user_real_attrib_parserexec_pinned[ia] = m_user_real_attrib_parser[ia]->compile<7>(); } - int** pa_user_int_data = pa_user_int.dataPtr(); - ParticleReal** pa_user_real_data = pa_user_real.dataPtr(); - amrex::ParserExecutor<7> const* user_int_parserexec_data = user_int_attrib_parserexec.dataPtr(); - amrex::ParserExecutor<7> const* user_real_parserexec_data = user_real_attrib_parserexec.dataPtr(); - +#ifdef AMREX_USE_GPU + // To avoid using managed memory, we first define pinned memory vector, initialize on cpu, + // and them memcpy to device from host + amrex::Gpu::DeviceVector<int*> d_pa_user_int(n_user_int_attribs); + amrex::Gpu::DeviceVector<ParticleReal*> d_pa_user_real(n_user_real_attribs); + amrex::Gpu::DeviceVector< amrex::ParserExecutor<7> > d_user_int_attrib_parserexec(n_user_int_attribs); + amrex::Gpu::DeviceVector< amrex::ParserExecutor<7> > d_user_real_attrib_parserexec(n_user_real_attribs); + amrex::Gpu::copyAsync(Gpu::hostToDevice, pa_user_int_pinned.begin(), + pa_user_int_pinned.end(), d_pa_user_int.begin()); + amrex::Gpu::copyAsync(Gpu::hostToDevice, pa_user_real_pinned.begin(), + pa_user_real_pinned.end(), d_pa_user_real.begin()); + amrex::Gpu::copyAsync(Gpu::hostToDevice, user_int_attrib_parserexec_pinned.begin(), + user_int_attrib_parserexec_pinned.end(), d_user_int_attrib_parserexec.begin()); + amrex::Gpu::copyAsync(Gpu::hostToDevice, user_real_attrib_parserexec_pinned.begin(), + user_real_attrib_parserexec_pinned.end(), d_user_real_attrib_parserexec.begin()); + int** pa_user_int_data = d_pa_user_int.dataPtr(); + ParticleReal** pa_user_real_data = d_pa_user_real.dataPtr(); + amrex::ParserExecutor<7> const* user_int_parserexec_data = d_user_int_attrib_parserexec.dataPtr(); + amrex::ParserExecutor<7> const* user_real_parserexec_data = d_user_real_attrib_parserexec.dataPtr(); +#else + int** pa_user_int_data = pa_user_int_pinned.dataPtr(); + ParticleReal** pa_user_real_data = pa_user_real_pinned.dataPtr(); + amrex::ParserExecutor<7> const* user_int_parserexec_data = user_int_attrib_parserexec_pinned.dataPtr(); + amrex::ParserExecutor<7> const* user_real_parserexec_data = user_real_attrib_parserexec_pinned.dataPtr(); +#endif int* pi = nullptr; if (do_field_ionization) { @@ -1591,22 +1611,43 @@ PhysicalParticleContainer::AddPlasmaFlux (amrex::Real dt) // user-defined integer and real attributes const int n_user_int_attribs = m_user_int_attribs.size(); const int n_user_real_attribs = m_user_real_attribs.size(); - amrex::Gpu::DeviceVector<int*> pa_user_int(n_user_int_attribs); - amrex::Gpu::DeviceVector<ParticleReal*> pa_user_real(n_user_real_attribs); - amrex::Gpu::DeviceVector< amrex::ParserExecutor<7> > user_int_attrib_parserexec(n_user_int_attribs); - amrex::Gpu::DeviceVector< amrex::ParserExecutor<7> > user_real_attrib_parserexec(n_user_real_attribs); + amrex::Gpu::PinnedVector<int*> pa_user_int_pinned(n_user_int_attribs); + amrex::Gpu::PinnedVector<ParticleReal*> pa_user_real_pinned(n_user_real_attribs); + amrex::Gpu::PinnedVector< amrex::ParserExecutor<7> > user_int_attrib_parserexec_pinned(n_user_int_attribs); + amrex::Gpu::PinnedVector< amrex::ParserExecutor<7> > user_real_attrib_parserexec_pinned(n_user_real_attribs); for (int ia = 0; ia < n_user_int_attribs; ++ia) { - pa_user_int[ia] = soa.GetIntData(particle_icomps[m_user_int_attribs[ia]]).data() + old_size; - user_int_attrib_parserexec[ia] = m_user_int_attrib_parser[ia]->compile<7>(); + pa_user_int_pinned[ia] = soa.GetIntData(particle_icomps[m_user_int_attribs[ia]]).data() + old_size; + user_int_attrib_parserexec_pinned[ia] = m_user_int_attrib_parser[ia]->compile<7>(); } for (int ia = 0; ia < n_user_real_attribs; ++ia) { - pa_user_real[ia] = soa.GetRealData(particle_comps[m_user_real_attribs[ia]]).data() + old_size; - user_real_attrib_parserexec[ia] = m_user_real_attrib_parser[ia]->compile<7>(); + pa_user_real_pinned[ia] = soa.GetRealData(particle_comps[m_user_real_attribs[ia]]).data() + old_size; + user_real_attrib_parserexec_pinned[ia] = m_user_real_attrib_parser[ia]->compile<7>(); } - int** pa_user_int_data = pa_user_int.dataPtr(); - ParticleReal** pa_user_real_data = pa_user_real.dataPtr(); - amrex::ParserExecutor<7> const* user_int_parserexec_data = user_int_attrib_parserexec.dataPtr(); - amrex::ParserExecutor<7> const* user_real_parserexec_data = user_real_attrib_parserexec.dataPtr(); +#ifdef AMREX_USE_GPU + // To avoid using managed memory, we first define pinned memory vector, initialize on cpu, + // and them memcpy to device from host + amrex::Gpu::DeviceVector<int*> d_pa_user_int(n_user_int_attribs); + amrex::Gpu::DeviceVector<ParticleReal*> d_pa_user_real(n_user_real_attribs); + amrex::Gpu::DeviceVector< amrex::ParserExecutor<7> > d_user_int_attrib_parserexec(n_user_int_attribs); + amrex::Gpu::DeviceVector< amrex::ParserExecutor<7> > d_user_real_attrib_parserexec(n_user_real_attribs); + amrex::Gpu::copyAsync(Gpu::hostToDevice, pa_user_int_pinned.begin(), + pa_user_int_pinned.end(), d_pa_user_int.begin()); + amrex::Gpu::copyAsync(Gpu::hostToDevice, pa_user_real_pinned.begin(), + pa_user_real_pinned.end(), d_pa_user_real.begin()); + amrex::Gpu::copyAsync(Gpu::hostToDevice, user_int_attrib_parserexec_pinned.begin(), + user_int_attrib_parserexec_pinned.end(), d_user_int_attrib_parserexec.begin()); + amrex::Gpu::copyAsync(Gpu::hostToDevice, user_real_attrib_parserexec_pinned.begin(), + user_real_attrib_parserexec_pinned.end(), d_user_real_attrib_parserexec.begin()); + int** pa_user_int_data = d_pa_user_int.dataPtr(); + ParticleReal** pa_user_real_data = d_pa_user_real.dataPtr(); + amrex::ParserExecutor<7> const* user_int_parserexec_data = d_user_int_attrib_parserexec.dataPtr(); + amrex::ParserExecutor<7> const* user_real_parserexec_data = d_user_real_attrib_parserexec.dataPtr(); +#else + int** pa_user_int_data = pa_user_int_pinned.dataPtr(); + ParticleReal** pa_user_real_data = pa_user_real_pinned.dataPtr(); + amrex::ParserExecutor<7> const* user_int_parserexec_data = user_int_attrib_parserexec_pinned.dataPtr(); + amrex::ParserExecutor<7> const* user_real_parserexec_data = user_real_attrib_parserexec_pinned.dataPtr(); +#endif int* p_ion_level = nullptr; if (do_field_ionization) { |