From d59fa46d24417b67554132bc666e45886160bd09 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 24 Jul 2019 19:43:18 -0700 Subject: Reimplement AddPlasma. Commits related to AddPlasma in hackathon branch are squashed into one. --- Source/Parser/GpuParser.cpp | 69 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 Source/Parser/GpuParser.cpp (limited to 'Source/Parser/GpuParser.cpp') diff --git a/Source/Parser/GpuParser.cpp b/Source/Parser/GpuParser.cpp new file mode 100644 index 000000000..ebba79498 --- /dev/null +++ b/Source/Parser/GpuParser.cpp @@ -0,0 +1,69 @@ +#include + +GpuParser::GpuParser (WarpXParser const& wp) +{ +#ifdef AMREX_USE_GPU + + struct wp_parser* a_wp = wp.m_parser; + m_gpu_parser.sz_mempool = wp_ast_size((struct wp_node*)a_wp); + m_gpu_parser.p_root = (struct wp_node*) + amrex::The_Managed_Arena()->alloc(m_gpu_parser.sz_mempool); + m_gpu_parser.p_free = m_gpu_parser.p_root; + // 0: don't free the source + m_gpu_parser.ast = wp_parser_ast_dup(&m_gpu_parser, a_wp->ast, 0); + wp_parser_regvar_gpu(&m_gpu_parser, "x", 0); + wp_parser_regvar_gpu(&m_gpu_parser, "y", 1); + wp_parser_regvar_gpu(&m_gpu_parser, "z", 2); + + m_cpu_parser.sz_mempool = wp_ast_size((struct wp_node*)a_wp); + m_cpu_parser.p_root = (struct wp_node*) + amrex::The_Managed_Arena()->alloc(m_cpu_parser.sz_mempool); + m_cpu_parser.p_free = m_cpu_parser.p_root; + // 0: don't free the source + m_cpu_parser.ast = wp_parser_ast_dup(&m_cpu_parser, a_wp->ast, 0); + wp_parser_regvar(&m_cpu_parser, "x", &(m_var.x)); + wp_parser_regvar(&m_cpu_parser, "y", &(m_var.y)); + wp_parser_regvar(&m_cpu_parser, "z", &(m_var.z)); + +#else + +#ifdef _OPENMP + nthreads = omp_get_max_threads(); +#else + nthreads = 1; +#endif + + m_parser = ::new struct wp_parser*[nthreads]; + m_var = ::new amrex::XDim3[nthreads]; + + for (int tid = 0; tid < nthreads; ++tid) + { +#ifdef _OPENMP + m_parser[tid] = wp_parser_dup(wp.m_parser[tid]); +#else + m_parser[tid] = wp_parser_dup(wp.m_parser); +#endif + wp_parser_regvar(m_parser[tid], "x", &(m_var[tid].x)); + wp_parser_regvar(m_parser[tid], "y", &(m_var[tid].y)); + wp_parser_regvar(m_parser[tid], "z", &(m_var[tid].z)); + } + +#endif +} + +void +GpuParser::clear () +{ +#ifdef AMREX_USE_GPU + amrex::The_Managed_Arena()->free(m_gpu_parser.ast); + amrex::The_Managed_Arena()->free(m_cpu_parser.ast); +#else + for (int tid = 0; tid < nthreads; ++tid) + { + wp_parser_delete(m_parser[tid]); + } + ::delete[] m_parser; + ::delete[] m_var; +#endif +} + -- cgit v1.2.3 From 209fcf219017a8852aac9806240fbadc5fbabafe Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Wed, 31 Jul 2019 11:37:54 -0700 Subject: comment GPUParser and custom density/momentum profiles --- Source/Initialization/CustomDensityProb.H | 6 ++++++ Source/Initialization/CustomMomentumProb.H | 3 +++ Source/Parser/GpuParser.H | 13 ++++++++++++- Source/Parser/GpuParser.cpp | 16 ++++++++++------ 4 files changed, 31 insertions(+), 7 deletions(-) (limited to 'Source/Parser/GpuParser.cpp') diff --git a/Source/Initialization/CustomDensityProb.H b/Source/Initialization/CustomDensityProb.H index 44612c799..b00830e6c 100644 --- a/Source/Initialization/CustomDensityProb.H +++ b/Source/Initialization/CustomDensityProb.H @@ -8,11 +8,15 @@ // An example of Custom Density Profile +// struct whose getDensity returns density at a given position computed from +// a custom function, with runtime input parameters. struct InjectorDensityCustom { InjectorDensityCustom (std::string const& species_name) : p(nullptr) { + // Read parameters for custom density profile from file, and + // store them in managed memory. amrex::ParmParse pp(species_name); std::vector v; pp.getarr("custom_profile_params", v); @@ -23,6 +27,8 @@ struct InjectorDensityCustom } } + // Return density at given position, using user-defined parameters + // stored in p. AMREX_GPU_HOST_DEVICE amrex::Real getDensity (amrex::Real, amrex::Real, amrex::Real) const noexcept diff --git a/Source/Initialization/CustomMomentumProb.H b/Source/Initialization/CustomMomentumProb.H index 42090d0fa..f8bc29a05 100644 --- a/Source/Initialization/CustomMomentumProb.H +++ b/Source/Initialization/CustomMomentumProb.H @@ -8,10 +8,13 @@ // An example of Custom Momentum Profile +// struct whose getDensity returns momentum at a given position computed from +// a custom function. struct InjectorMomentumCustom { InjectorMomentumCustom (std::string const& /*a_species_name*/) {} + // Return momentum at given position (illustration: momentum=0). AMREX_GPU_HOST_DEVICE amrex::XDim3 getMomentum (amrex::Real, amrex::Real, amrex::Real) const noexcept diff --git a/Source/Parser/GpuParser.H b/Source/Parser/GpuParser.H index 99b4e5e16..1533ee6b9 100644 --- a/Source/Parser/GpuParser.H +++ b/Source/Parser/GpuParser.H @@ -4,6 +4,11 @@ #include #include +// When compiled for CPU, wrap WarpXParser and enable threading. +// When compiled for GPU, store one copy of the parser in +// CUDA managed memory for __device__ code, and one copy of the parser +// in CUDA managed memory for __host__ code. This way, the parser can be +// efficiently called from both host and device. class GpuParser { public: @@ -17,6 +22,8 @@ public: #ifdef AMREX_USE_GPU #ifdef AMREX_DEVICE_COMPILE +// WarpX compiled for GPU, function compiled for __device__ + // the 3D position of each particle is stored in shared memory. amrex::Gpu::SharedMemory gsm; double* p = gsm.dataPtr(); int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y); @@ -25,6 +32,7 @@ public: p[tid*3+2] = z; return wp_ast_eval(m_gpu_parser.ast); #else +// WarpX compiled for GPU, function compiled for __host__ m_var.x = x; m_var.y = y; m_var.z = z; @@ -32,7 +40,7 @@ public: #endif #else - +// WarpX compiled for CPU #ifdef _OPENMP int tid = omp_get_thread_num(); #else @@ -48,10 +56,13 @@ public: private: #ifdef AMREX_USE_GPU + // Copy of the parser running on __device__ struct wp_parser m_gpu_parser; + // Copy of the parser running on __host__ struct wp_parser m_cpu_parser; mutable amrex::XDim3 m_var; #else + // Only one parser struct wp_parser** m_parser; mutable amrex::XDim3* m_var; int nthreads; diff --git a/Source/Parser/GpuParser.cpp b/Source/Parser/GpuParser.cpp index ebba79498..97b96d465 100644 --- a/Source/Parser/GpuParser.cpp +++ b/Source/Parser/GpuParser.cpp @@ -5,6 +5,8 @@ GpuParser::GpuParser (WarpXParser const& wp) #ifdef AMREX_USE_GPU struct wp_parser* a_wp = wp.m_parser; + // Initialize GPU parser: allocate memory in CUDA managed memory, + // copy all data needed on GPU to m_gpu_parser m_gpu_parser.sz_mempool = wp_ast_size((struct wp_node*)a_wp); m_gpu_parser.p_root = (struct wp_node*) amrex::The_Managed_Arena()->alloc(m_gpu_parser.sz_mempool); @@ -15,6 +17,8 @@ GpuParser::GpuParser (WarpXParser const& wp) wp_parser_regvar_gpu(&m_gpu_parser, "y", 1); wp_parser_regvar_gpu(&m_gpu_parser, "z", 2); + // Initialize CPU parser: allocate memory in CUDA managed memory, + // copy all data needed on CPU to m_cpu_parser m_cpu_parser.sz_mempool = wp_ast_size((struct wp_node*)a_wp); m_cpu_parser.p_root = (struct wp_node*) amrex::The_Managed_Arena()->alloc(m_cpu_parser.sz_mempool); @@ -25,13 +29,13 @@ GpuParser::GpuParser (WarpXParser const& wp) wp_parser_regvar(&m_cpu_parser, "y", &(m_var.y)); wp_parser_regvar(&m_cpu_parser, "z", &(m_var.z)); -#else +#else // not defined AMREX_USE_GPU #ifdef _OPENMP nthreads = omp_get_max_threads(); -#else +#else // _OPENMP nthreads = 1; -#endif +#endif // _OPENMP m_parser = ::new struct wp_parser*[nthreads]; m_var = ::new amrex::XDim3[nthreads]; @@ -40,15 +44,15 @@ GpuParser::GpuParser (WarpXParser const& wp) { #ifdef _OPENMP m_parser[tid] = wp_parser_dup(wp.m_parser[tid]); -#else +#else // _OPENMP m_parser[tid] = wp_parser_dup(wp.m_parser); -#endif +#endif // _OPENMP wp_parser_regvar(m_parser[tid], "x", &(m_var[tid].x)); wp_parser_regvar(m_parser[tid], "y", &(m_var[tid].y)); wp_parser_regvar(m_parser[tid], "z", &(m_var[tid].z)); } -#endif +#endif // AMREX_USE_GPU } void -- cgit v1.2.3 From a632ec172a2a2993b2be67ad6977f27c881d8e45 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 7 Aug 2019 19:37:09 -0700 Subject: fix a bug in GpuParser --- Source/Parser/GpuParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Source/Parser/GpuParser.cpp') diff --git a/Source/Parser/GpuParser.cpp b/Source/Parser/GpuParser.cpp index 97b96d465..db1c2287d 100644 --- a/Source/Parser/GpuParser.cpp +++ b/Source/Parser/GpuParser.cpp @@ -7,7 +7,7 @@ GpuParser::GpuParser (WarpXParser const& wp) struct wp_parser* a_wp = wp.m_parser; // Initialize GPU parser: allocate memory in CUDA managed memory, // copy all data needed on GPU to m_gpu_parser - m_gpu_parser.sz_mempool = wp_ast_size((struct wp_node*)a_wp); + m_gpu_parser.sz_mempool = wp_ast_size(a_wp->ast); m_gpu_parser.p_root = (struct wp_node*) amrex::The_Managed_Arena()->alloc(m_gpu_parser.sz_mempool); m_gpu_parser.p_free = m_gpu_parser.p_root; @@ -19,7 +19,7 @@ GpuParser::GpuParser (WarpXParser const& wp) // Initialize CPU parser: allocate memory in CUDA managed memory, // copy all data needed on CPU to m_cpu_parser - m_cpu_parser.sz_mempool = wp_ast_size((struct wp_node*)a_wp); + m_cpu_parser.sz_mempool = wp_ast_size(a_wp->ast); m_cpu_parser.p_root = (struct wp_node*) amrex::The_Managed_Arena()->alloc(m_cpu_parser.sz_mempool); m_cpu_parser.p_free = m_cpu_parser.p_root; -- cgit v1.2.3