diff options
Diffstat (limited to 'Source/Parser')
-rw-r--r-- | Source/Parser/GpuParser.H | 116 | ||||
-rw-r--r-- | Source/Parser/GpuParser.cpp | 84 | ||||
-rw-r--r-- | Source/Parser/Make.package | 1 | ||||
-rw-r--r-- | Source/Parser/WarpXParser.H | 10 | ||||
-rw-r--r-- | Source/Parser/WarpXParser.cpp | 6 | ||||
-rw-r--r-- | Source/Parser/WarpXParserWrapper.H | 20 | ||||
-rw-r--r-- | Source/Parser/wp_parser_c.h | 44 | ||||
-rw-r--r-- | Source/Parser/wp_parser_y.c | 2 |
8 files changed, 126 insertions, 157 deletions
diff --git a/Source/Parser/GpuParser.H b/Source/Parser/GpuParser.H index c6d870800..65db03524 100644 --- a/Source/Parser/GpuParser.H +++ b/Source/Parser/GpuParser.H @@ -10,42 +10,36 @@ #include <WarpXParser.H> #include <AMReX_Gpu.H> +#include <AMReX_Array.H> +#include <AMReX_TypeTraits.H> // When compiled for CPU, wrap WarpXParser and enable threading. // When compiled for GPU, store one copy of the parser in // CUDA managed memory for __device__ code, and one copy of the parser // in CUDA managed memory for __host__ code. This way, the parser can be // efficiently called from both host and device. +template <int N> class GpuParser { public: GpuParser (WarpXParser const& wp); void clear (); + template <typename... Ts> AMREX_GPU_HOST_DEVICE - amrex::Real - operator() (amrex::Real x, amrex::Real y, amrex::Real z, amrex::Real t=0.0) const noexcept + std::enable_if_t<sizeof...(Ts) == N + and amrex::Same<amrex::Real,Ts...>::value, + amrex::Real> + operator() (Ts... var) const noexcept { #ifdef AMREX_USE_GPU - -#ifdef AMREX_DEVICE_COMPILE + amrex::GpuArray<amrex::Real,N> l_var{var...}; +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) // WarpX compiled for GPU, function compiled for __device__ - // the 3D position of each particle is stored in shared memory. - amrex::Gpu::SharedMemory<amrex::Real> gsm; - amrex::Real* p = gsm.dataPtr(); - int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y); - p[tid*4] = x; - p[tid*4+1] = y; - p[tid*4+2] = z; - p[tid*4+3] = t; - return wp_ast_eval(m_gpu_parser.ast); + return wp_ast_eval(m_gpu_parser.ast, l_var.data()); #else // WarpX compiled for GPU, function compiled for __host__ - m_var.x = x; - m_var.y = y; - m_var.z = z; - m_t = t; - return wp_ast_eval(m_cpu_parser.ast); + return wp_ast_eval(m_cpu_parser->ast, nullptr); #endif #else @@ -55,11 +49,8 @@ public: #else int tid = 0; #endif - m_var[tid].x = x; - m_var[tid].y = y; - m_var[tid].z = z; - m_t[tid] = t; - return wp_ast_eval(m_parser[tid]->ast); + m_var[tid] = amrex::GpuArray<amrex::Real,N>{var...}; + return wp_ast_eval(m_parser[tid]->ast, nullptr); #endif } @@ -70,16 +61,85 @@ private: // Copy of the parser running on __device__ struct wp_parser m_gpu_parser; // Copy of the parser running on __host__ - struct wp_parser m_cpu_parser; - mutable amrex::XDim3 m_var; - mutable amrex::Real m_t; + struct wp_parser* m_cpu_parser; + mutable amrex::GpuArray<amrex::Real,N> m_var; #else // Only one parser struct wp_parser** m_parser; - mutable amrex::XDim3* m_var; - mutable amrex::Real* m_t; + mutable amrex::GpuArray<amrex::Real,N>* m_var; int nthreads; #endif }; +template <int N> +GpuParser<N>::GpuParser (WarpXParser const& wp) +{ +#ifdef AMREX_USE_GPU + + struct wp_parser* a_wp = wp.m_parser; + // Initialize GPU parser: allocate memory in CUDA managed memory, + // copy all data needed on GPU to m_gpu_parser + m_gpu_parser.sz_mempool = wp_ast_size(a_wp->ast); + m_gpu_parser.p_root = (struct wp_node*) + amrex::The_Managed_Arena()->alloc(m_gpu_parser.sz_mempool); + m_gpu_parser.p_free = m_gpu_parser.p_root; + // 0: don't free the source + m_gpu_parser.ast = wp_parser_ast_dup(&m_gpu_parser, a_wp->ast, 0); + for (int i = 0; i < N; ++i) { + wp_parser_regvar_gpu(&m_gpu_parser, wp.m_varnames[i].c_str(), i); + } + + // Initialize CPU parser: + m_cpu_parser = wp_parser_dup(a_wp); + for (int i = 0; i < N; ++i) { + wp_parser_regvar(m_cpu_parser, wp.m_varnames[i].c_str(), &m_var[i]); + } + +#else // not defined AMREX_USE_GPU + +#ifdef _OPENMP + nthreads = omp_get_max_threads(); +#else // _OPENMP + nthreads = 1; +#endif // _OPENMP + + m_parser = ::new struct wp_parser*[nthreads]; + m_var = ::new amrex::GpuArray<amrex::Real,N>[nthreads]; + + for (int tid = 0; tid < nthreads; ++tid) + { +#ifdef _OPENMP + m_parser[tid] = wp_parser_dup(wp.m_parser[tid]); + for (int i = 0; i < N; ++i) { + wp_parser_regvar(m_parser[tid], wp.m_varnames[tid][i].c_str(), &(m_var[tid][i])); + } +#else // _OPENMP + m_parser[tid] = wp_parser_dup(wp.m_parser); + for (int i = 0; i < N; ++i) { + wp_parser_regvar(m_parser[tid], wp.m_varnames[i].c_str(), &(m_var[tid][i])); + } +#endif // _OPENMP + } + +#endif // AMREX_USE_GPU +} + + +template <int N> +void +GpuParser<N>::clear () +{ +#ifdef AMREX_USE_GPU + amrex::The_Managed_Arena()->free(m_gpu_parser.ast); + wp_parser_delete(m_cpu_parser); +#else + for (int tid = 0; tid < nthreads; ++tid) + { + wp_parser_delete(m_parser[tid]); + } + ::delete[] m_parser; + ::delete[] m_var; +#endif +} + #endif diff --git a/Source/Parser/GpuParser.cpp b/Source/Parser/GpuParser.cpp deleted file mode 100644 index 22fab6313..000000000 --- a/Source/Parser/GpuParser.cpp +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright 2019-2020 Maxence Thevenet, Revathi Jambunathan, Weiqun Zhang - * - * - * This file is part of WarpX. - * - * License: BSD-3-Clause-LBNL - */ -#include <GpuParser.H> - -GpuParser::GpuParser (WarpXParser const& wp) -{ -#ifdef AMREX_USE_GPU - - struct wp_parser* a_wp = wp.m_parser; - // Initialize GPU parser: allocate memory in CUDA managed memory, - // copy all data needed on GPU to m_gpu_parser - m_gpu_parser.sz_mempool = wp_ast_size(a_wp->ast); - m_gpu_parser.p_root = (struct wp_node*) - amrex::The_Managed_Arena()->alloc(m_gpu_parser.sz_mempool); - m_gpu_parser.p_free = m_gpu_parser.p_root; - // 0: don't free the source - m_gpu_parser.ast = wp_parser_ast_dup(&m_gpu_parser, a_wp->ast, 0); - wp_parser_regvar_gpu(&m_gpu_parser, "x", 0); - wp_parser_regvar_gpu(&m_gpu_parser, "y", 1); - wp_parser_regvar_gpu(&m_gpu_parser, "z", 2); - wp_parser_regvar_gpu(&m_gpu_parser, "t", 3); - - // Initialize CPU parser: allocate memory in CUDA managed memory, - // copy all data needed on CPU to m_cpu_parser - m_cpu_parser.sz_mempool = wp_ast_size(a_wp->ast); - m_cpu_parser.p_root = (struct wp_node*) - amrex::The_Managed_Arena()->alloc(m_cpu_parser.sz_mempool); - m_cpu_parser.p_free = m_cpu_parser.p_root; - // 0: don't free the source - m_cpu_parser.ast = wp_parser_ast_dup(&m_cpu_parser, a_wp->ast, 0); - wp_parser_regvar(&m_cpu_parser, "x", &(m_var.x)); - wp_parser_regvar(&m_cpu_parser, "y", &(m_var.y)); - wp_parser_regvar(&m_cpu_parser, "z", &(m_var.z)); - wp_parser_regvar(&m_cpu_parser, "t", &(m_t)); - -#else // not defined AMREX_USE_GPU - -#ifdef _OPENMP - nthreads = omp_get_max_threads(); -#else // _OPENMP - nthreads = 1; -#endif // _OPENMP - - m_parser = ::new struct wp_parser*[nthreads]; - m_var = ::new amrex::XDim3[nthreads]; - m_t = ::new amrex::Real[nthreads]; - - for (int tid = 0; tid < nthreads; ++tid) - { -#ifdef _OPENMP - m_parser[tid] = wp_parser_dup(wp.m_parser[tid]); -#else // _OPENMP - m_parser[tid] = wp_parser_dup(wp.m_parser); -#endif // _OPENMP - wp_parser_regvar(m_parser[tid], "x", &(m_var[tid].x)); - wp_parser_regvar(m_parser[tid], "y", &(m_var[tid].y)); - wp_parser_regvar(m_parser[tid], "z", &(m_var[tid].z)); - wp_parser_regvar(m_parser[tid], "t", &(m_t[tid])); - } - -#endif // AMREX_USE_GPU -} - -void -GpuParser::clear () -{ -#ifdef AMREX_USE_GPU - amrex::The_Managed_Arena()->free(m_gpu_parser.ast); - amrex::The_Managed_Arena()->free(m_cpu_parser.ast); -#else - for (int tid = 0; tid < nthreads; ++tid) - { - wp_parser_delete(m_parser[tid]); - } - ::delete[] m_parser; - ::delete[] m_var; -#endif -} - diff --git a/Source/Parser/Make.package b/Source/Parser/Make.package index 15115c138..be07e3a7d 100644 --- a/Source/Parser/Make.package +++ b/Source/Parser/Make.package @@ -4,7 +4,6 @@ cEXE_headers += wp_parser_y.h wp_parser.tab.h wp_parser.lex.h wp_parser_c.h CEXE_sources += WarpXParser.cpp CEXE_headers += WarpXParser.H CEXE_headers += GpuParser.H -CEXE_sources += GpuParser.cpp CEXE_headers += WarpXParserWrapper.H INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Parser diff --git a/Source/Parser/WarpXParser.H b/Source/Parser/WarpXParser.H index 863b35fb8..703b1effc 100644 --- a/Source/Parser/WarpXParser.H +++ b/Source/Parser/WarpXParser.H @@ -21,7 +21,7 @@ #include <omp.h> #endif -class GpuParser; +template <int N> class GpuParser; class WarpXParser { @@ -56,7 +56,7 @@ public: std::set<std::string> symbols () const; - friend class GpuParser; + template <int N> friend class GpuParser; private: void clear (); @@ -71,9 +71,11 @@ private: #ifdef _OPENMP std::vector<struct wp_parser*> m_parser; mutable std::vector<std::array<amrex::Real,16> > m_variables; + mutable std::vector<std::vector<std::string> > m_varnames; #else struct wp_parser* m_parser = nullptr; mutable std::array<amrex::Real,16> m_variables; + mutable std::vector<std::string> m_varnames; #endif }; @@ -82,9 +84,9 @@ amrex::Real WarpXParser::eval () const noexcept { #ifdef _OPENMP - return wp_ast_eval(m_parser[omp_get_thread_num()]->ast); + return wp_ast_eval(m_parser[omp_get_thread_num()]->ast,nullptr); #else - return wp_ast_eval(m_parser->ast); + return wp_ast_eval(m_parser->ast,nullptr); #endif } diff --git a/Source/Parser/WarpXParser.cpp b/Source/Parser/WarpXParser.cpp index 8c8be7ecb..dd000792b 100644 --- a/Source/Parser/WarpXParser.cpp +++ b/Source/Parser/WarpXParser.cpp @@ -27,6 +27,7 @@ WarpXParser::define (std::string const& func_body) int nthreads = omp_get_max_threads(); m_variables.resize(nthreads); + m_varnames.resize(nthreads); m_parser.resize(nthreads); m_parser[0] = wp_c_parser_new(f.c_str()); #pragma omp parallel @@ -53,6 +54,7 @@ void WarpXParser::clear () { m_expression.clear(); + m_varnames.clear(); #ifdef _OPENMP @@ -80,8 +82,10 @@ WarpXParser::registerVariable (std::string const& name, amrex::Real& var) // We assume this is called inside OMP parallel region #ifdef _OPENMP wp_parser_regvar(m_parser[omp_get_thread_num()], name.c_str(), &var); + m_varnames[omp_get_thread_num()].push_back(name); #else wp_parser_regvar(m_parser, name.c_str(), &var); + m_varnames.push_back(name); #endif } @@ -98,6 +102,7 @@ WarpXParser::registerVariables (std::vector<std::string> const& names) auto& v = m_variables[tid]; for (int j = 0; j < names.size(); ++j) { wp_parser_regvar(p, names[j].c_str(), &(v[j])); + m_varnames[tid].push_back(names[j]); } } @@ -105,6 +110,7 @@ WarpXParser::registerVariables (std::vector<std::string> const& names) for (int j = 0; j < names.size(); ++j) { wp_parser_regvar(m_parser, names[j].c_str(), &(m_variables[j])); + m_varnames.push_back(names[j]); } #endif diff --git a/Source/Parser/WarpXParserWrapper.H b/Source/Parser/WarpXParserWrapper.H index 2c76d97a3..38147aba5 100644 --- a/Source/Parser/WarpXParserWrapper.H +++ b/Source/Parser/WarpXParserWrapper.H @@ -18,24 +18,16 @@ * in a safe way. The ParserWrapper struct is used to avoid memory leak * in the EB parser functions. */ +template <int N> struct ParserWrapper - : public amrex::Gpu::Managed + : public amrex::Gpu::Managed, public GpuParser<N> { - ParserWrapper (WarpXParser const& a_parser) noexcept - : m_parser(a_parser) {} + using GpuParser<N>::GpuParser; - ~ParserWrapper() { - m_parser.clear(); - } + ParserWrapper (ParserWrapper<N> const&) = delete; + void operator= (ParserWrapper<N> const&) = delete; - AMREX_GPU_HOST_DEVICE - amrex::Real - getField (amrex::Real x, amrex::Real y, amrex::Real z, amrex::Real t=0.0) const noexcept - { - return m_parser(x,y,z,t); - } - - GpuParser m_parser; + ~ParserWrapper() { GpuParser<N>::clear(); } }; #endif diff --git a/Source/Parser/wp_parser_c.h b/Source/Parser/wp_parser_c.h index 2cf0e2c00..c9c0d82ac 100644 --- a/Source/Parser/wp_parser_c.h +++ b/Source/Parser/wp_parser_c.h @@ -23,16 +23,10 @@ extern "C" { AMREX_GPU_HOST_DEVICE inline amrex_real -wp_ast_eval (struct wp_node* node) +wp_ast_eval (struct wp_node* node, amrex_real const* x) { amrex_real result; -#ifdef AMREX_DEVICE_COMPILE - extern __shared__ amrex_real extern_xyz[]; - int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y); - amrex_real* x = extern_xyz + tid*4; // parser assumes 4 independent variables (x,y,z,t) -#endif - switch (node->type) { case WP_NUMBER: @@ -42,7 +36,7 @@ wp_ast_eval (struct wp_node* node) } case WP_SYMBOL: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i =((struct wp_symbol*)node)->ip.i; result = x[i]; #else @@ -52,45 +46,45 @@ wp_ast_eval (struct wp_node* node) } case WP_ADD: { - result = wp_ast_eval(node->l) + wp_ast_eval(node->r); + result = wp_ast_eval(node->l,x) + wp_ast_eval(node->r,x); break; } case WP_SUB: { - result = wp_ast_eval(node->l) - wp_ast_eval(node->r); + result = wp_ast_eval(node->l,x) - wp_ast_eval(node->r,x); break; } case WP_MUL: { - result = wp_ast_eval(node->l) * wp_ast_eval(node->r); + result = wp_ast_eval(node->l,x) * wp_ast_eval(node->r,x); break; } case WP_DIV: { - result = wp_ast_eval(node->l) / wp_ast_eval(node->r); + result = wp_ast_eval(node->l,x) / wp_ast_eval(node->r,x); break; } case WP_NEG: { - result = -wp_ast_eval(node->l); + result = -wp_ast_eval(node->l,x); break; } case WP_F1: { result = wp_call_f1(((struct wp_f1*)node)->ftype, - wp_ast_eval(((struct wp_f1*)node)->l)); + wp_ast_eval(((struct wp_f1*)node)->l,x)); break; } case WP_F2: { result = wp_call_f2(((struct wp_f2*)node)->ftype, - wp_ast_eval(((struct wp_f2*)node)->l), - wp_ast_eval(((struct wp_f2*)node)->r)); + wp_ast_eval(((struct wp_f2*)node)->l,x), + wp_ast_eval(((struct wp_f2*)node)->r,x)); break; } case WP_ADD_VP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->rip.i; result = node->lvp.v + x[i]; #else @@ -100,7 +94,7 @@ wp_ast_eval (struct wp_node* node) } case WP_ADD_PP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->lvp.ip.i; int j = node->rip.i; result = x[i] + x[j]; @@ -111,7 +105,7 @@ wp_ast_eval (struct wp_node* node) } case WP_SUB_VP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->rip.i; result = node->lvp.v - x[i]; #else @@ -121,7 +115,7 @@ wp_ast_eval (struct wp_node* node) } case WP_SUB_PP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->lvp.ip.i; int j = node->rip.i; result = x[i] - x[j]; @@ -132,7 +126,7 @@ wp_ast_eval (struct wp_node* node) } case WP_MUL_VP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->rip.i; result = node->lvp.v * x[i]; #else @@ -142,7 +136,7 @@ wp_ast_eval (struct wp_node* node) } case WP_MUL_PP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->lvp.ip.i; int j = node->rip.i; result = x[i] * x[j]; @@ -153,7 +147,7 @@ wp_ast_eval (struct wp_node* node) } case WP_DIV_VP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->rip.i; result = node->lvp.v / x[i]; #else @@ -163,7 +157,7 @@ wp_ast_eval (struct wp_node* node) } case WP_DIV_PP: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->lvp.ip.i; int j = node->rip.i; result = x[i] / x[j]; @@ -174,7 +168,7 @@ wp_ast_eval (struct wp_node* node) } case WP_NEG_P: { -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) int i = node->rip.i; result = -x[i]; #else diff --git a/Source/Parser/wp_parser_y.c b/Source/Parser/wp_parser_y.c index b71b42638..57293ab87 100644 --- a/Source/Parser/wp_parser_y.c +++ b/Source/Parser/wp_parser_y.c @@ -80,7 +80,7 @@ yyerror (char const *s, ...) { va_list vl; va_start(vl, s); -#ifdef AMREX_DEVICE_COMPILE +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) printf(s,"\n"); assert(0); #else |