aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--.travis.yml23
-rw-r--r--Docs/source/building/building.rst2
-rw-r--r--Docs/source/visualization/yt.rst17
-rwxr-xr-xExamples/Tests/reduced_diags/analysis_reduced_diags.py10
-rw-r--r--Regression/WarpX-tests.ini1
-rw-r--r--Source/Diagnostics/BackTransformedDiagnostic.cpp4
-rw-r--r--Source/Initialization/InjectorDensity.H4
-rw-r--r--Source/Initialization/InjectorDensity.cpp17
-rw-r--r--Source/Initialization/InjectorMomentum.H4
-rw-r--r--Source/Initialization/InjectorMomentum.cpp18
-rw-r--r--Source/Initialization/InjectorPosition.H2
-rw-r--r--Source/Initialization/PlasmaInjector.H9
-rw-r--r--Source/Initialization/PlasmaInjector.cpp8
-rw-r--r--Source/Initialization/WarpXInitData.cpp40
-rw-r--r--Source/Parser/GpuParser.H116
-rw-r--r--Source/Parser/GpuParser.cpp84
-rw-r--r--Source/Parser/Make.package1
-rw-r--r--Source/Parser/WarpXParser.H10
-rw-r--r--Source/Parser/WarpXParser.cpp6
-rw-r--r--Source/Parser/WarpXParserWrapper.H20
-rw-r--r--Source/Parser/wp_parser_c.h44
-rw-r--r--Source/Parser/wp_parser_y.c2
-rw-r--r--Source/Particles/MultiParticleContainer.H12
-rw-r--r--Source/Particles/MultiParticleContainer.cpp24
-rw-r--r--Source/Particles/PhysicalParticleContainer.cpp39
-rw-r--r--Source/Utils/WarpXMovingWindow.cpp12
-rw-r--r--Source/Utils/WarpXUtil.H2
-rw-r--r--Source/Utils/WarpXUtil.cpp9
-rw-r--r--Source/WarpX.H18
29 files changed, 226 insertions, 332 deletions
diff --git a/.travis.yml b/.travis.yml
index 20e2c0549..e4aba5ca7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -8,6 +8,7 @@
dist: xenial
language: c++
sudo: true
+cache: pip
env:
matrix:
@@ -18,18 +19,24 @@ env:
before_install:
- sudo apt-get update
- - sudo apt-get install -y gcc gfortran g++ openmpi-bin libopenmpi-dev libfftw3-dev libfftw3-mpi-dev
- # Install miniconda and python dependencies
- - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
- - bash Miniconda3-latest-Linux-x86_64.sh -b
- - export PATH=/home/travis/miniconda3/bin:$PATH
- - pip install --upgrade pip && pip install numpy scipy matplotlib mpi4py cython
- - pip install git+https://github.com/yt-project/yt.git
+ - sudo apt-get install -y gcc gfortran g++ openmpi-bin libopenmpi-dev libfftw3-dev libfftw3-mpi-dev libhdf5-openmpi-dev pkg-config make python3 python3-pip python3-setuptools
+ # xenial misses "libadios-openmpi-dev"
+ - sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 2
+ - sudo update-alternatives --set python /usr/bin/python3
+
+install:
+ - python -m pip install --upgrade pip
+ - python -m pip install --upgrade cmake cython matplotlib mpi4py numpy scipy
+ - export CEI_CMAKE="/home/travis/.local/bin/cmake"
+ - python -m pip install --upgrade git+https://github.com/yt-project/yt.git
+ - sudo curl -L -o /usr/local/bin/cmake-easyinstall https://git.io/JvLxY && sudo chmod a+x /usr/local/bin/cmake-easyinstall
script:
- export FFTW_HOME=/usr/
- - export OMP_NUM_THREADS=1
+
# Run the tests on the current commit
- export WARPX_TEST_COMMIT=$TRAVIS_COMMIT
+
# Run the script that prepares the test environment and runs the tests
+ - export OMP_NUM_THREADS=1
- ./run_test.sh
diff --git a/Docs/source/building/building.rst b/Docs/source/building/building.rst
index d07cfaecd..70c5ab7a9 100644
--- a/Docs/source/building/building.rst
+++ b/Docs/source/building/building.rst
@@ -42,7 +42,7 @@ options are set in the file ``GNUmakefile``. The default
options correspond to an optimized code for 3D geometry. The main compile-time
options are:
- * ``DIM=3D`` or ``2D``: Geometry of the simulation (note that running an executable compiled for 3D with a 2D input file will crash).
+ * ``DIM=3`` or ``2``: Geometry of the simulation (note that running an executable compiled for 3D with a 2D input file will crash).
* ``DEBUG=FALSE`` or ``TRUE``: Compiling in ``DEBUG`` mode can help tremendously during code development.
* ``USE_PSATD=FALSE`` or ``TRUE``: Compile the Pseudo-Spectral Analytical Time Domain Maxwell solver. Requires an FFT library.
* ``USE_RZ=FALSE`` or ``TRUE``: Compile for 2D axisymmetric geometry.
diff --git a/Docs/source/visualization/yt.rst b/Docs/source/visualization/yt.rst
index a9e4fb964..1f20f502c 100644
--- a/Docs/source/visualization/yt.rst
+++ b/Docs/source/visualization/yt.rst
@@ -8,25 +8,14 @@ to use yt within a `Jupyter notebook <http://jupyter.org/>`__.
Installation
------------
-From the terminal:
-
-::
-
- pip install yt jupyter
-
-or with the `Anaconda distribution <https://anaconda.org/>`__ of python (recommended):
-
-::
-
- conda install -c conda-forge yt
-
-The latest version of `yt` can be required for advanced options (e.g., rigid
-injection for particles). To built `yt` directly from source, you can use
+From the terminal, install the latest version of yt:
::
+ pip install cython
pip install git+https://github.com/yt-project/yt.git
+Alternatively, yt can be installed via their installation script, see `yt installation web page <https://yt-project.org/doc/installing.html>`__, which can be particularly useful to setup a post-processing workflow on supercomputers.
Visualizing the data
--------------------
diff --git a/Examples/Tests/reduced_diags/analysis_reduced_diags.py b/Examples/Tests/reduced_diags/analysis_reduced_diags.py
index 2623d48ce..3d3191561 100755
--- a/Examples/Tests/reduced_diags/analysis_reduced_diags.py
+++ b/Examples/Tests/reduced_diags/analysis_reduced_diags.py
@@ -23,7 +23,6 @@ import sys
import yt
import numpy as np
import scipy.constants as scc
-from read_raw_data import read_reduced_diags
fn = sys.argv[1]
@@ -61,11 +60,10 @@ EFyt = 0.5*Es*scc.epsilon_0*dV + 0.5*Bs/scc.mu_0*dV
# PART2: get results from reduced diagnostics
-metadata, data = read_reduced_diags( './diags/reducedfiles/EF.txt' )
-EF = data['total_lev0'][-1] # last iteration
-
-metadata, data = read_reduced_diags( './diags/reducedfiles/EP.txt' )
-EP = data['total'][-1] # last iteration
+EFdata = np.genfromtxt("./diags/reducedfiles/EF.txt")
+EPdata = np.genfromtxt("./diags/reducedfiles/EP.txt")
+EF = EFdata[1][2]
+EP = EPdata[1][2]
# PART3: print and assert
diff --git a/Regression/WarpX-tests.ini b/Regression/WarpX-tests.ini
index c94ba2536..3865f0e12 100644
--- a/Regression/WarpX-tests.ini
+++ b/Regression/WarpX-tests.ini
@@ -1114,4 +1114,3 @@ compileTest = 0
doVis = 0
compareParticles = 0
analysisRoutine = Examples/Tests/reduced_diags/analysis_reduced_diags.py
-aux1File = Tools/read_raw_data.py
diff --git a/Source/Diagnostics/BackTransformedDiagnostic.cpp b/Source/Diagnostics/BackTransformedDiagnostic.cpp
index 2f50dda21..e458ee167 100644
--- a/Source/Diagnostics/BackTransformedDiagnostic.cpp
+++ b/Source/Diagnostics/BackTransformedDiagnostic.cpp
@@ -1377,7 +1377,7 @@ AddPartDataToParticleBuffer(
int nspeciesBoostedFrame) {
for (int isp = 0; isp < nspeciesBoostedFrame; ++isp) {
auto np = tmp_particle_buffer[isp].GetRealData(DiagIdx::w).size();
- if (np == 0) return;
+ if (np == 0) continue;
// allocate size of particle buffer array to np
// This is a growing array. Each time we add np elements
@@ -1442,7 +1442,7 @@ AddPartDataToParticleBuffer(
for (int isp = 0; isp < nSpeciesBackTransformedDiagnostics; ++isp) {
auto np = tmp_particle_buffer[isp].GetRealData(DiagIdx::w).size();
- if (np == 0) return;
+ if (np == 0) continue;
Real const* const AMREX_RESTRICT wp_temp =
tmp_particle_buffer[isp].GetRealData(DiagIdx::w).data();
diff --git a/Source/Initialization/InjectorDensity.H b/Source/Initialization/InjectorDensity.H
index 4558eeb96..8bb5650ba 100644
--- a/Source/Initialization/InjectorDensity.H
+++ b/Source/Initialization/InjectorDensity.H
@@ -45,7 +45,7 @@ struct InjectorDensityParser
}
// InjectorDensityParser constructs this GpuParser from WarpXParser.
- GpuParser m_parser;
+ GpuParser<3> m_parser;
};
// struct whose getDensity returns local density computed from predefined profile.
@@ -152,8 +152,6 @@ struct InjectorDensity
~InjectorDensity ();
- std::size_t sharedMemoryNeeded () const noexcept;
-
// call getDensity from the object stored in the union
// (the union is called Object, and the instance is called object).
AMREX_GPU_HOST_DEVICE
diff --git a/Source/Initialization/InjectorDensity.cpp b/Source/Initialization/InjectorDensity.cpp
index f59202db9..accadc74a 100644
--- a/Source/Initialization/InjectorDensity.cpp
+++ b/Source/Initialization/InjectorDensity.cpp
@@ -34,23 +34,6 @@ InjectorDensity::~InjectorDensity ()
}
}
-// Compute the amount of memory needed in GPU Shared Memory.
-std::size_t
-InjectorDensity::sharedMemoryNeeded () const noexcept
-{
- switch (type)
- {
- case Type::parser:
- {
- // For parser injector, the 3D position of each particle
- // and time, t, is stored in shared memory.
- return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4;
- }
- default:
- return 0;
- }
-}
-
InjectorDensityPredefined::InjectorDensityPredefined (
std::string const& a_species_name) noexcept
: profile(Profile::null)
diff --git a/Source/Initialization/InjectorMomentum.H b/Source/Initialization/InjectorMomentum.H
index bb5a70784..1d407508c 100644
--- a/Source/Initialization/InjectorMomentum.H
+++ b/Source/Initialization/InjectorMomentum.H
@@ -223,7 +223,7 @@ struct InjectorMomentumParser
return amrex::XDim3{m_ux_parser(x,y,z),m_uy_parser(x,y,z),m_uz_parser(x,y,z)};
}
- GpuParser m_ux_parser, m_uy_parser, m_uz_parser;
+ GpuParser<3> m_ux_parser, m_uy_parser, m_uz_parser;
};
// Base struct for momentum injector.
@@ -301,8 +301,6 @@ struct InjectorMomentum
~InjectorMomentum ();
- std::size_t sharedMemoryNeeded () const noexcept;
-
// call getMomentum from the object stored in the union
// (the union is called Object, and the instance is called object).
AMREX_GPU_HOST_DEVICE
diff --git a/Source/Initialization/InjectorMomentum.cpp b/Source/Initialization/InjectorMomentum.cpp
index edbba8ac5..0765eb0a3 100644
--- a/Source/Initialization/InjectorMomentum.cpp
+++ b/Source/Initialization/InjectorMomentum.cpp
@@ -28,21 +28,3 @@ InjectorMomentum::~InjectorMomentum ()
}
}
}
-
-// Compute the amount of memory needed in GPU Shared Memory.
-std::size_t
-InjectorMomentum::sharedMemoryNeeded () const noexcept
-{
- switch (type)
- {
- case Type::parser:
- {
- // For parser injector, the 3D position of each particle and time, t,
- // is stored in shared memory.
- return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4;
- }
- default:
- return 0;
- }
-}
-
diff --git a/Source/Initialization/InjectorPosition.H b/Source/Initialization/InjectorPosition.H
index a8d2200e9..0ef6c0390 100644
--- a/Source/Initialization/InjectorPosition.H
+++ b/Source/Initialization/InjectorPosition.H
@@ -105,8 +105,6 @@ struct InjectorPosition
void operator= (InjectorPosition const&) = delete;
void operator= (InjectorPosition &&) = delete;
- std::size_t sharedMemoryNeeded () const noexcept { return 0; }
-
// call getPositionUnitBox from the object stored in the union
// (the union is called Object, and the instance is called object).
AMREX_GPU_HOST_DEVICE
diff --git a/Source/Initialization/PlasmaInjector.H b/Source/Initialization/PlasmaInjector.H
index 70d99b9a3..308b4121e 100644
--- a/Source/Initialization/PlasmaInjector.H
+++ b/Source/Initialization/PlasmaInjector.H
@@ -83,15 +83,6 @@ public:
InjectorDensity* getInjectorDensity ();
InjectorMomentum* getInjectorMomentum ();
- // When running on GPU, injector for position, momentum and density store
- // particle 3D positions in shared memory IF using the parser.
- std::size_t
- sharedMemoryNeeded () const noexcept {
- return amrex::max(inj_pos->sharedMemoryNeeded(),
- inj_rho->sharedMemoryNeeded(),
- inj_mom->sharedMemoryNeeded());
- }
-
protected:
amrex::Real mass, charge;
diff --git a/Source/Initialization/PlasmaInjector.cpp b/Source/Initialization/PlasmaInjector.cpp
index cacbaab75..5fa82e48f 100644
--- a/Source/Initialization/PlasmaInjector.cpp
+++ b/Source/Initialization/PlasmaInjector.cpp
@@ -201,7 +201,7 @@ void PlasmaInjector::parseDensity (ParmParse& pp)
Store_parserString(pp, "density_function(x,y,z)", str_density_function);
// Construct InjectorDensity with InjectorDensityParser.
inj_rho.reset(new InjectorDensity((InjectorDensityParser*)nullptr,
- makeParser(str_density_function)));
+ makeParser(str_density_function,{"x","y","z"})));
} else {
StringParseAbortMessage("Density profile type", rho_prof_s);
}
@@ -324,9 +324,9 @@ void PlasmaInjector::parseMomentum (ParmParse& pp)
str_momentum_function_uz);
// Construct InjectorMomentum with InjectorMomentumParser.
inj_mom.reset(new InjectorMomentum((InjectorMomentumParser*)nullptr,
- makeParser(str_momentum_function_ux),
- makeParser(str_momentum_function_uy),
- makeParser(str_momentum_function_uz)));
+ makeParser(str_momentum_function_ux,{"x","y","z"}),
+ makeParser(str_momentum_function_uy,{"x","y","z"}),
+ makeParser(str_momentum_function_uz,{"x","y","z"})));
} else {
StringParseAbortMessage("Momentum distribution type", mom_dist_s);
}
diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp
index 957e22b68..e5571c519 100644
--- a/Source/Initialization/WarpXInitData.cpp
+++ b/Source/Initialization/WarpXInitData.cpp
@@ -319,12 +319,12 @@ WarpX::InitLevelData (int lev, Real time)
Store_parserString(pp, "Bz_external_grid_function(x,y,z)",
str_Bz_ext_grid_function);
- Bxfield_parser.reset(new ParserWrapper(
- makeParser(str_Bx_ext_grid_function)));
- Byfield_parser.reset(new ParserWrapper(
- makeParser(str_By_ext_grid_function)));
- Bzfield_parser.reset(new ParserWrapper(
- makeParser(str_Bz_ext_grid_function)));
+ Bxfield_parser.reset(new ParserWrapper<3>(
+ makeParser(str_Bx_ext_grid_function,{"x","y","z"})));
+ Byfield_parser.reset(new ParserWrapper<3>(
+ makeParser(str_By_ext_grid_function,{"x","y","z"})));
+ Bzfield_parser.reset(new ParserWrapper<3>(
+ makeParser(str_Bz_ext_grid_function,{"x","y","z"})));
// Initialize Bfield_fp with external function
InitializeExternalFieldsOnGridUsingParser(Bfield_fp[lev][0].get(),
@@ -371,12 +371,12 @@ WarpX::InitLevelData (int lev, Real time)
Store_parserString(pp, "Ez_external_grid_function(x,y,z)",
str_Ez_ext_grid_function);
- Exfield_parser.reset(new ParserWrapper(
- makeParser(str_Ex_ext_grid_function)));
- Eyfield_parser.reset(new ParserWrapper(
- makeParser(str_Ey_ext_grid_function)));
- Ezfield_parser.reset(new ParserWrapper(
- makeParser(str_Ez_ext_grid_function)));
+ Exfield_parser.reset(new ParserWrapper<3>(
+ makeParser(str_Ex_ext_grid_function,{"x","y","z"})));
+ Eyfield_parser.reset(new ParserWrapper<3>(
+ makeParser(str_Ey_ext_grid_function,{"x","y","z"})));
+ Ezfield_parser.reset(new ParserWrapper<3>(
+ makeParser(str_Ez_ext_grid_function,{"x","y","z"})));
// Initialize Efield_fp with external function
InitializeExternalFieldsOnGridUsingParser(Efield_fp[lev][0].get(),
@@ -467,8 +467,8 @@ WarpX::InitLevelDataFFT (int lev, Real time)
void
WarpX::InitializeExternalFieldsOnGridUsingParser (
MultiFab *mfx, MultiFab *mfy, MultiFab *mfz,
- ParserWrapper *xfield_parser, ParserWrapper *yfield_parser,
- ParserWrapper *zfield_parser, IntVect x_nodal_flag,
+ ParserWrapper<3> *xfield_parser, ParserWrapper<3> *yfield_parser,
+ ParserWrapper<3> *zfield_parser, IntVect x_nodal_flag,
IntVect y_nodal_flag, IntVect z_nodal_flag,
const int lev)
{
@@ -518,7 +518,7 @@ WarpX::InitializeExternalFieldsOnGridUsingParser (
Real z = k*dx_lev[2] + real_box.lo(2) + fac_z;
#endif
// Initialize the x-component of the field.
- mfxfab(i,j,k) = xfield_parser->getField(x,y,z);
+ mfxfab(i,j,k) = (*xfield_parser)(x,y,z);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k) {
Real fac_x = (1.0 - mfy_type[0]) * dx_lev[0]*0.5;
@@ -534,7 +534,7 @@ WarpX::InitializeExternalFieldsOnGridUsingParser (
Real z = k*dx_lev[2] + real_box.lo(2) + fac_z;
#endif
// Initialize the y-component of the field.
- mfyfab(i,j,k) = yfield_parser->getField(x,y,z);
+ mfyfab(i,j,k) = (*yfield_parser)(x,y,z);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k) {
Real fac_x = (1.0 - mfz_type[0]) * dx_lev[0]*0.5;
@@ -550,13 +550,9 @@ WarpX::InitializeExternalFieldsOnGridUsingParser (
Real z = k*dx_lev[2] + real_box.lo(2) + fac_z;
#endif
// Initialize the z-component of the field.
- mfzfab(i,j,k) = zfield_parser->getField(x,y,z);
- },
- /* To allocate shared memory for the GPU threads. */
- /* But, for now only 4 doubles (x,y,z,t) are allocated. */
- amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4
+ mfzfab(i,j,k) = (*zfield_parser)(x,y,z);
+ }
);
-
}
}
diff --git a/Source/Parser/GpuParser.H b/Source/Parser/GpuParser.H
index c6d870800..65db03524 100644
--- a/Source/Parser/GpuParser.H
+++ b/Source/Parser/GpuParser.H
@@ -10,42 +10,36 @@
#include <WarpXParser.H>
#include <AMReX_Gpu.H>
+#include <AMReX_Array.H>
+#include <AMReX_TypeTraits.H>
// When compiled for CPU, wrap WarpXParser and enable threading.
// When compiled for GPU, store one copy of the parser in
// CUDA managed memory for __device__ code, and one copy of the parser
// in CUDA managed memory for __host__ code. This way, the parser can be
// efficiently called from both host and device.
+template <int N>
class GpuParser
{
public:
GpuParser (WarpXParser const& wp);
void clear ();
+ template <typename... Ts>
AMREX_GPU_HOST_DEVICE
- amrex::Real
- operator() (amrex::Real x, amrex::Real y, amrex::Real z, amrex::Real t=0.0) const noexcept
+ std::enable_if_t<sizeof...(Ts) == N
+ and amrex::Same<amrex::Real,Ts...>::value,
+ amrex::Real>
+ operator() (Ts... var) const noexcept
{
#ifdef AMREX_USE_GPU
-
-#ifdef AMREX_DEVICE_COMPILE
+ amrex::GpuArray<amrex::Real,N> l_var{var...};
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
// WarpX compiled for GPU, function compiled for __device__
- // the 3D position of each particle is stored in shared memory.
- amrex::Gpu::SharedMemory<amrex::Real> gsm;
- amrex::Real* p = gsm.dataPtr();
- int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y);
- p[tid*4] = x;
- p[tid*4+1] = y;
- p[tid*4+2] = z;
- p[tid*4+3] = t;
- return wp_ast_eval(m_gpu_parser.ast);
+ return wp_ast_eval(m_gpu_parser.ast, l_var.data());
#else
// WarpX compiled for GPU, function compiled for __host__
- m_var.x = x;
- m_var.y = y;
- m_var.z = z;
- m_t = t;
- return wp_ast_eval(m_cpu_parser.ast);
+ return wp_ast_eval(m_cpu_parser->ast, nullptr);
#endif
#else
@@ -55,11 +49,8 @@ public:
#else
int tid = 0;
#endif
- m_var[tid].x = x;
- m_var[tid].y = y;
- m_var[tid].z = z;
- m_t[tid] = t;
- return wp_ast_eval(m_parser[tid]->ast);
+ m_var[tid] = amrex::GpuArray<amrex::Real,N>{var...};
+ return wp_ast_eval(m_parser[tid]->ast, nullptr);
#endif
}
@@ -70,16 +61,85 @@ private:
// Copy of the parser running on __device__
struct wp_parser m_gpu_parser;
// Copy of the parser running on __host__
- struct wp_parser m_cpu_parser;
- mutable amrex::XDim3 m_var;
- mutable amrex::Real m_t;
+ struct wp_parser* m_cpu_parser;
+ mutable amrex::GpuArray<amrex::Real,N> m_var;
#else
// Only one parser
struct wp_parser** m_parser;
- mutable amrex::XDim3* m_var;
- mutable amrex::Real* m_t;
+ mutable amrex::GpuArray<amrex::Real,N>* m_var;
int nthreads;
#endif
};
+template <int N>
+GpuParser<N>::GpuParser (WarpXParser const& wp)
+{
+#ifdef AMREX_USE_GPU
+
+ struct wp_parser* a_wp = wp.m_parser;
+ // Initialize GPU parser: allocate memory in CUDA managed memory,
+ // copy all data needed on GPU to m_gpu_parser
+ m_gpu_parser.sz_mempool = wp_ast_size(a_wp->ast);
+ m_gpu_parser.p_root = (struct wp_node*)
+ amrex::The_Managed_Arena()->alloc(m_gpu_parser.sz_mempool);
+ m_gpu_parser.p_free = m_gpu_parser.p_root;
+ // 0: don't free the source
+ m_gpu_parser.ast = wp_parser_ast_dup(&m_gpu_parser, a_wp->ast, 0);
+ for (int i = 0; i < N; ++i) {
+ wp_parser_regvar_gpu(&m_gpu_parser, wp.m_varnames[i].c_str(), i);
+ }
+
+ // Initialize CPU parser:
+ m_cpu_parser = wp_parser_dup(a_wp);
+ for (int i = 0; i < N; ++i) {
+ wp_parser_regvar(m_cpu_parser, wp.m_varnames[i].c_str(), &m_var[i]);
+ }
+
+#else // not defined AMREX_USE_GPU
+
+#ifdef _OPENMP
+ nthreads = omp_get_max_threads();
+#else // _OPENMP
+ nthreads = 1;
+#endif // _OPENMP
+
+ m_parser = ::new struct wp_parser*[nthreads];
+ m_var = ::new amrex::GpuArray<amrex::Real,N>[nthreads];
+
+ for (int tid = 0; tid < nthreads; ++tid)
+ {
+#ifdef _OPENMP
+ m_parser[tid] = wp_parser_dup(wp.m_parser[tid]);
+ for (int i = 0; i < N; ++i) {
+ wp_parser_regvar(m_parser[tid], wp.m_varnames[tid][i].c_str(), &(m_var[tid][i]));
+ }
+#else // _OPENMP
+ m_parser[tid] = wp_parser_dup(wp.m_parser);
+ for (int i = 0; i < N; ++i) {
+ wp_parser_regvar(m_parser[tid], wp.m_varnames[i].c_str(), &(m_var[tid][i]));
+ }
+#endif // _OPENMP
+ }
+
+#endif // AMREX_USE_GPU
+}
+
+
+template <int N>
+void
+GpuParser<N>::clear ()
+{
+#ifdef AMREX_USE_GPU
+ amrex::The_Managed_Arena()->free(m_gpu_parser.ast);
+ wp_parser_delete(m_cpu_parser);
+#else
+ for (int tid = 0; tid < nthreads; ++tid)
+ {
+ wp_parser_delete(m_parser[tid]);
+ }
+ ::delete[] m_parser;
+ ::delete[] m_var;
+#endif
+}
+
#endif
diff --git a/Source/Parser/GpuParser.cpp b/Source/Parser/GpuParser.cpp
deleted file mode 100644
index 22fab6313..000000000
--- a/Source/Parser/GpuParser.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-/* Copyright 2019-2020 Maxence Thevenet, Revathi Jambunathan, Weiqun Zhang
- *
- *
- * This file is part of WarpX.
- *
- * License: BSD-3-Clause-LBNL
- */
-#include <GpuParser.H>
-
-GpuParser::GpuParser (WarpXParser const& wp)
-{
-#ifdef AMREX_USE_GPU
-
- struct wp_parser* a_wp = wp.m_parser;
- // Initialize GPU parser: allocate memory in CUDA managed memory,
- // copy all data needed on GPU to m_gpu_parser
- m_gpu_parser.sz_mempool = wp_ast_size(a_wp->ast);
- m_gpu_parser.p_root = (struct wp_node*)
- amrex::The_Managed_Arena()->alloc(m_gpu_parser.sz_mempool);
- m_gpu_parser.p_free = m_gpu_parser.p_root;
- // 0: don't free the source
- m_gpu_parser.ast = wp_parser_ast_dup(&m_gpu_parser, a_wp->ast, 0);
- wp_parser_regvar_gpu(&m_gpu_parser, "x", 0);
- wp_parser_regvar_gpu(&m_gpu_parser, "y", 1);
- wp_parser_regvar_gpu(&m_gpu_parser, "z", 2);
- wp_parser_regvar_gpu(&m_gpu_parser, "t", 3);
-
- // Initialize CPU parser: allocate memory in CUDA managed memory,
- // copy all data needed on CPU to m_cpu_parser
- m_cpu_parser.sz_mempool = wp_ast_size(a_wp->ast);
- m_cpu_parser.p_root = (struct wp_node*)
- amrex::The_Managed_Arena()->alloc(m_cpu_parser.sz_mempool);
- m_cpu_parser.p_free = m_cpu_parser.p_root;
- // 0: don't free the source
- m_cpu_parser.ast = wp_parser_ast_dup(&m_cpu_parser, a_wp->ast, 0);
- wp_parser_regvar(&m_cpu_parser, "x", &(m_var.x));
- wp_parser_regvar(&m_cpu_parser, "y", &(m_var.y));
- wp_parser_regvar(&m_cpu_parser, "z", &(m_var.z));
- wp_parser_regvar(&m_cpu_parser, "t", &(m_t));
-
-#else // not defined AMREX_USE_GPU
-
-#ifdef _OPENMP
- nthreads = omp_get_max_threads();
-#else // _OPENMP
- nthreads = 1;
-#endif // _OPENMP
-
- m_parser = ::new struct wp_parser*[nthreads];
- m_var = ::new amrex::XDim3[nthreads];
- m_t = ::new amrex::Real[nthreads];
-
- for (int tid = 0; tid < nthreads; ++tid)
- {
-#ifdef _OPENMP
- m_parser[tid] = wp_parser_dup(wp.m_parser[tid]);
-#else // _OPENMP
- m_parser[tid] = wp_parser_dup(wp.m_parser);
-#endif // _OPENMP
- wp_parser_regvar(m_parser[tid], "x", &(m_var[tid].x));
- wp_parser_regvar(m_parser[tid], "y", &(m_var[tid].y));
- wp_parser_regvar(m_parser[tid], "z", &(m_var[tid].z));
- wp_parser_regvar(m_parser[tid], "t", &(m_t[tid]));
- }
-
-#endif // AMREX_USE_GPU
-}
-
-void
-GpuParser::clear ()
-{
-#ifdef AMREX_USE_GPU
- amrex::The_Managed_Arena()->free(m_gpu_parser.ast);
- amrex::The_Managed_Arena()->free(m_cpu_parser.ast);
-#else
- for (int tid = 0; tid < nthreads; ++tid)
- {
- wp_parser_delete(m_parser[tid]);
- }
- ::delete[] m_parser;
- ::delete[] m_var;
-#endif
-}
-
diff --git a/Source/Parser/Make.package b/Source/Parser/Make.package
index 15115c138..be07e3a7d 100644
--- a/Source/Parser/Make.package
+++ b/Source/Parser/Make.package
@@ -4,7 +4,6 @@ cEXE_headers += wp_parser_y.h wp_parser.tab.h wp_parser.lex.h wp_parser_c.h
CEXE_sources += WarpXParser.cpp
CEXE_headers += WarpXParser.H
CEXE_headers += GpuParser.H
-CEXE_sources += GpuParser.cpp
CEXE_headers += WarpXParserWrapper.H
INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Parser
diff --git a/Source/Parser/WarpXParser.H b/Source/Parser/WarpXParser.H
index 863b35fb8..703b1effc 100644
--- a/Source/Parser/WarpXParser.H
+++ b/Source/Parser/WarpXParser.H
@@ -21,7 +21,7 @@
#include <omp.h>
#endif
-class GpuParser;
+template <int N> class GpuParser;
class WarpXParser
{
@@ -56,7 +56,7 @@ public:
std::set<std::string> symbols () const;
- friend class GpuParser;
+ template <int N> friend class GpuParser;
private:
void clear ();
@@ -71,9 +71,11 @@ private:
#ifdef _OPENMP
std::vector<struct wp_parser*> m_parser;
mutable std::vector<std::array<amrex::Real,16> > m_variables;
+ mutable std::vector<std::vector<std::string> > m_varnames;
#else
struct wp_parser* m_parser = nullptr;
mutable std::array<amrex::Real,16> m_variables;
+ mutable std::vector<std::string> m_varnames;
#endif
};
@@ -82,9 +84,9 @@ amrex::Real
WarpXParser::eval () const noexcept
{
#ifdef _OPENMP
- return wp_ast_eval(m_parser[omp_get_thread_num()]->ast);
+ return wp_ast_eval(m_parser[omp_get_thread_num()]->ast,nullptr);
#else
- return wp_ast_eval(m_parser->ast);
+ return wp_ast_eval(m_parser->ast,nullptr);
#endif
}
diff --git a/Source/Parser/WarpXParser.cpp b/Source/Parser/WarpXParser.cpp
index 8c8be7ecb..dd000792b 100644
--- a/Source/Parser/WarpXParser.cpp
+++ b/Source/Parser/WarpXParser.cpp
@@ -27,6 +27,7 @@ WarpXParser::define (std::string const& func_body)
int nthreads = omp_get_max_threads();
m_variables.resize(nthreads);
+ m_varnames.resize(nthreads);
m_parser.resize(nthreads);
m_parser[0] = wp_c_parser_new(f.c_str());
#pragma omp parallel
@@ -53,6 +54,7 @@ void
WarpXParser::clear ()
{
m_expression.clear();
+ m_varnames.clear();
#ifdef _OPENMP
@@ -80,8 +82,10 @@ WarpXParser::registerVariable (std::string const& name, amrex::Real& var)
// We assume this is called inside OMP parallel region
#ifdef _OPENMP
wp_parser_regvar(m_parser[omp_get_thread_num()], name.c_str(), &var);
+ m_varnames[omp_get_thread_num()].push_back(name);
#else
wp_parser_regvar(m_parser, name.c_str(), &var);
+ m_varnames.push_back(name);
#endif
}
@@ -98,6 +102,7 @@ WarpXParser::registerVariables (std::vector<std::string> const& names)
auto& v = m_variables[tid];
for (int j = 0; j < names.size(); ++j) {
wp_parser_regvar(p, names[j].c_str(), &(v[j]));
+ m_varnames[tid].push_back(names[j]);
}
}
@@ -105,6 +110,7 @@ WarpXParser::registerVariables (std::vector<std::string> const& names)
for (int j = 0; j < names.size(); ++j) {
wp_parser_regvar(m_parser, names[j].c_str(), &(m_variables[j]));
+ m_varnames.push_back(names[j]);
}
#endif
diff --git a/Source/Parser/WarpXParserWrapper.H b/Source/Parser/WarpXParserWrapper.H
index 2c76d97a3..38147aba5 100644
--- a/Source/Parser/WarpXParserWrapper.H
+++ b/Source/Parser/WarpXParserWrapper.H
@@ -18,24 +18,16 @@
* in a safe way. The ParserWrapper struct is used to avoid memory leak
* in the EB parser functions.
*/
+template <int N>
struct ParserWrapper
- : public amrex::Gpu::Managed
+ : public amrex::Gpu::Managed, public GpuParser<N>
{
- ParserWrapper (WarpXParser const& a_parser) noexcept
- : m_parser(a_parser) {}
+ using GpuParser<N>::GpuParser;
- ~ParserWrapper() {
- m_parser.clear();
- }
+ ParserWrapper (ParserWrapper<N> const&) = delete;
+ void operator= (ParserWrapper<N> const&) = delete;
- AMREX_GPU_HOST_DEVICE
- amrex::Real
- getField (amrex::Real x, amrex::Real y, amrex::Real z, amrex::Real t=0.0) const noexcept
- {
- return m_parser(x,y,z,t);
- }
-
- GpuParser m_parser;
+ ~ParserWrapper() { GpuParser<N>::clear(); }
};
#endif
diff --git a/Source/Parser/wp_parser_c.h b/Source/Parser/wp_parser_c.h
index 2cf0e2c00..c9c0d82ac 100644
--- a/Source/Parser/wp_parser_c.h
+++ b/Source/Parser/wp_parser_c.h
@@ -23,16 +23,10 @@ extern "C" {
AMREX_GPU_HOST_DEVICE
inline amrex_real
-wp_ast_eval (struct wp_node* node)
+wp_ast_eval (struct wp_node* node, amrex_real const* x)
{
amrex_real result;
-#ifdef AMREX_DEVICE_COMPILE
- extern __shared__ amrex_real extern_xyz[];
- int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y);
- amrex_real* x = extern_xyz + tid*4; // parser assumes 4 independent variables (x,y,z,t)
-#endif
-
switch (node->type)
{
case WP_NUMBER:
@@ -42,7 +36,7 @@ wp_ast_eval (struct wp_node* node)
}
case WP_SYMBOL:
{
-#ifdef AMREX_DEVICE_COMPILE
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
int i =((struct wp_symbol*)node)->ip.i;
result = x[i];
#else
@@ -52,45 +46,45 @@ wp_ast_eval (struct wp_node* node)
}
case WP_ADD:
{
- result = wp_ast_eval(node->l) + wp_ast_eval(node->r);
+ result = wp_ast_eval(node->l,x) + wp_ast_eval(node->r,x);
break;
}
case WP_SUB:
{
- result = wp_ast_eval(node->l) - wp_ast_eval(node->r);
+ result = wp_ast_eval(node->l,x) - wp_ast_eval(node->r,x);
break;
}
case WP_MUL:
{
- result = wp_ast_eval(node->l) * wp_ast_eval(node->r);
+ result = wp_ast_eval(node->l,x) * wp_ast_eval(node->r,x);
break;
}
case WP_DIV:
{
- result = wp_ast_eval(node->l) / wp_ast_eval(node->r);
+ result = wp_ast_eval(node->l,x) / wp_ast_eval(node->r,x);
break;
}
case WP_NEG:
{
- result = -wp_ast_eval(node->l);
+ result = -wp_ast_eval(node->l,x);
break;
}
case WP_F1:
{
result = wp_call_f1(((struct wp_f1*)node)->ftype,
- wp_ast_eval(((struct wp_f1*)node)->l));
+ wp_ast_eval(((struct wp_f1*)node)->l,x));
break;
}
case WP_F2:
{
result = wp_call_f2(((struct wp_f2*)node)->ftype,
- wp_ast_eval(((struct wp_f2*)node)->l),
- wp_ast_eval(((struct wp_f2*)node)->r));
+ wp_ast_eval(((struct wp_f2*)node)->l,x),
+ wp_ast_eval(((struct wp_f2*)node)->r,x));
break;
}
case WP_ADD_VP:
{
-#ifdef AMREX_DEVICE_COMPILE
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
int i = node->rip.i;
result = node->lvp.v + x[i];
#else
@@ -100,7 +94,7 @@ wp_ast_eval (struct wp_node* node)
}
case WP_ADD_PP:
{
-#ifdef AMREX_DEVICE_COMPILE
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
int i = node->lvp.ip.i;
int j = node->rip.i;
result = x[i] + x[j];
@@ -111,7 +105,7 @@ wp_ast_eval (struct wp_node* node)
}
case WP_SUB_VP:
{
-#ifdef AMREX_DEVICE_COMPILE
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
int i = node->rip.i;
result = node->lvp.v - x[i];
#else
@@ -121,7 +115,7 @@ wp_ast_eval (struct wp_node* node)
}
case WP_SUB_PP:
{
-#ifdef AMREX_DEVICE_COMPILE
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
int i = node->lvp.ip.i;
int j = node->rip.i;
result = x[i] - x[j];
@@ -132,7 +126,7 @@ wp_ast_eval (struct wp_node* node)
}
case WP_MUL_VP:
{
-#ifdef AMREX_DEVICE_COMPILE
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
int i = node->rip.i;
result = node->lvp.v * x[i];
#else
@@ -142,7 +136,7 @@ wp_ast_eval (struct wp_node* node)
}
case WP_MUL_PP:
{
-#ifdef AMREX_DEVICE_COMPILE
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
int i = node->lvp.ip.i;
int j = node->rip.i;
result = x[i] * x[j];
@@ -153,7 +147,7 @@ wp_ast_eval (struct wp_node* node)
}
case WP_DIV_VP:
{
-#ifdef AMREX_DEVICE_COMPILE
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
int i = node->rip.i;
result = node->lvp.v / x[i];
#else
@@ -163,7 +157,7 @@ wp_ast_eval (struct wp_node* node)
}
case WP_DIV_PP:
{
-#ifdef AMREX_DEVICE_COMPILE
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
int i = node->lvp.ip.i;
int j = node->rip.i;
result = x[i] / x[j];
@@ -174,7 +168,7 @@ wp_ast_eval (struct wp_node* node)
}
case WP_NEG_P:
{
-#ifdef AMREX_DEVICE_COMPILE
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
int i = node->rip.i;
result = -x[i];
#else
diff --git a/Source/Parser/wp_parser_y.c b/Source/Parser/wp_parser_y.c
index b71b42638..57293ab87 100644
--- a/Source/Parser/wp_parser_y.c
+++ b/Source/Parser/wp_parser_y.c
@@ -80,7 +80,7 @@ yyerror (char const *s, ...)
{
va_list vl;
va_start(vl, s);
-#ifdef AMREX_DEVICE_COMPILE
+#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
printf(s,"\n");
assert(0);
#else
diff --git a/Source/Particles/MultiParticleContainer.H b/Source/Particles/MultiParticleContainer.H
index 8d951263e..5f809f2cc 100644
--- a/Source/Particles/MultiParticleContainer.H
+++ b/Source/Particles/MultiParticleContainer.H
@@ -223,13 +223,13 @@ public:
amrex::Vector<amrex::Real> m_B_external_particle;
amrex::Vector<amrex::Real> m_E_external_particle;
// ParserWrapper for B_external on the particle
- std::unique_ptr<ParserWrapper> m_Bx_particle_parser;
- std::unique_ptr<ParserWrapper> m_By_particle_parser;
- std::unique_ptr<ParserWrapper> m_Bz_particle_parser;
+ std::unique_ptr<ParserWrapper<4> > m_Bx_particle_parser;
+ std::unique_ptr<ParserWrapper<4> > m_By_particle_parser;
+ std::unique_ptr<ParserWrapper<4> > m_Bz_particle_parser;
// ParserWrapper for E_external on the particle
- std::unique_ptr<ParserWrapper> m_Ex_particle_parser;
- std::unique_ptr<ParserWrapper> m_Ey_particle_parser;
- std::unique_ptr<ParserWrapper> m_Ez_particle_parser;
+ std::unique_ptr<ParserWrapper<4> > m_Ex_particle_parser;
+ std::unique_ptr<ParserWrapper<4> > m_Ey_particle_parser;
+ std::unique_ptr<ParserWrapper<4> > m_Ez_particle_parser;
protected:
diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp
index ac3bac467..6252d1ac4 100644
--- a/Source/Particles/MultiParticleContainer.cpp
+++ b/Source/Particles/MultiParticleContainer.cpp
@@ -130,12 +130,12 @@ MultiParticleContainer::ReadParameters ()
str_Bz_ext_particle_function);
// Parser for B_external on the particle
- m_Bx_particle_parser.reset(new ParserWrapper(
- makeParser(str_Bx_ext_particle_function)));
- m_By_particle_parser.reset(new ParserWrapper(
- makeParser(str_By_ext_particle_function)));
- m_Bz_particle_parser.reset(new ParserWrapper(
- makeParser(str_Bz_ext_particle_function)));
+ m_Bx_particle_parser.reset(new ParserWrapper<4>(
+ makeParser(str_Bx_ext_particle_function,{"x","y","z","t"})));
+ m_By_particle_parser.reset(new ParserWrapper<4>(
+ makeParser(str_By_ext_particle_function,{"x","y","z","t"})));
+ m_Bz_particle_parser.reset(new ParserWrapper<4>(
+ makeParser(str_Bz_ext_particle_function,{"x","y","z","t"})));
}
@@ -155,12 +155,12 @@ MultiParticleContainer::ReadParameters ()
Store_parserString(pp, "Ez_external_particle_function(x,y,z,t)",
str_Ez_ext_particle_function);
// Parser for E_external on the particle
- m_Ex_particle_parser.reset(new ParserWrapper(
- makeParser(str_Ex_ext_particle_function)));
- m_Ey_particle_parser.reset(new ParserWrapper(
- makeParser(str_Ey_ext_particle_function)));
- m_Ez_particle_parser.reset(new ParserWrapper(
- makeParser(str_Ez_ext_particle_function)));
+ m_Ex_particle_parser.reset(new ParserWrapper<4>(
+ makeParser(str_Ex_ext_particle_function,{"x","y","z","t"})));
+ m_Ey_particle_parser.reset(new ParserWrapper<4>(
+ makeParser(str_Ey_ext_particle_function,{"x","y","z","t"})));
+ m_Ez_particle_parser.reset(new ParserWrapper<4>(
+ makeParser(str_Ez_ext_particle_function,{"x","y","z","t"})));
}
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 6572657ff..0277a9fe6 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -553,7 +553,6 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
overlap_realbox.lo(1),
overlap_realbox.lo(2))};
- std::size_t shared_mem_bytes = plasma_injector->sharedMemoryNeeded();
int lrrfac = rrfac;
bool loc_do_field_ionization = do_field_ionization;
@@ -738,7 +737,7 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
p.pos(0) = xb;
p.pos(1) = z;
#endif
- }, shared_mem_bytes);
+ });
if (cost) {
wt = (amrex::second() - wt) / tile_box.d_numPts();
@@ -992,44 +991,36 @@ PhysicalParticleContainer::AssignExternalFieldOnParticles(WarpXParIter& pti,
Real* const AMREX_RESTRICT Exp_data = Exp.dataPtr();
Real* const AMREX_RESTRICT Eyp_data = Eyp.dataPtr();
Real* const AMREX_RESTRICT Ezp_data = Ezp.dataPtr();
- ParserWrapper *xfield_partparser = mypc.m_Ex_particle_parser.get();
- ParserWrapper *yfield_partparser = mypc.m_Ey_particle_parser.get();
- ParserWrapper *zfield_partparser = mypc.m_Ez_particle_parser.get();
+ ParserWrapper<4> *xfield_partparser = mypc.m_Ex_particle_parser.get();
+ ParserWrapper<4> *yfield_partparser = mypc.m_Ey_particle_parser.get();
+ ParserWrapper<4> *zfield_partparser = mypc.m_Ez_particle_parser.get();
Real time = warpx.gett_new(lev);
amrex::ParallelFor(pti.numParticles(),
[=] AMREX_GPU_DEVICE (long i) {
ParticleReal x, y, z;
GetPosition(i, x, y, z);
- Exp_data[i] = xfield_partparser->getField(x, y, z, time);
- Eyp_data[i] = yfield_partparser->getField(x, y, z, time);
- Ezp_data[i] = zfield_partparser->getField(x, y, z, time);
- },
- /* To allocate shared memory for the GPU threads. */
- /* But, for now only 4 doubles (x,y,z,t) are allocated. */
- amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4
- );
+ Exp_data[i] = (*xfield_partparser)(x, y, z, time);
+ Eyp_data[i] = (*yfield_partparser)(x, y, z, time);
+ Ezp_data[i] = (*zfield_partparser)(x, y, z, time);
+ });
}
if (mypc.m_B_ext_particle_s=="parse_b_ext_particle_function") {
const auto GetPosition = GetParticlePosition(pti);
Real* const AMREX_RESTRICT Bxp_data = Bxp.dataPtr();
Real* const AMREX_RESTRICT Byp_data = Byp.dataPtr();
Real* const AMREX_RESTRICT Bzp_data = Bzp.dataPtr();
- ParserWrapper *xfield_partparser = mypc.m_Bx_particle_parser.get();
- ParserWrapper *yfield_partparser = mypc.m_By_particle_parser.get();
- ParserWrapper *zfield_partparser = mypc.m_Bz_particle_parser.get();
+ ParserWrapper<4> *xfield_partparser = mypc.m_Bx_particle_parser.get();
+ ParserWrapper<4> *yfield_partparser = mypc.m_By_particle_parser.get();
+ ParserWrapper<4> *zfield_partparser = mypc.m_Bz_particle_parser.get();
Real time = warpx.gett_new(lev);
amrex::ParallelFor(pti.numParticles(),
[=] AMREX_GPU_DEVICE (long i) {
ParticleReal x, y, z;
GetPosition(i, x, y, z);
- Bxp_data[i] = xfield_partparser->getField(x, y, z, time);
- Byp_data[i] = yfield_partparser->getField(x, y, z, time);
- Bzp_data[i] = zfield_partparser->getField(x, y, z, time);
- },
- /* To allocate shared memory for the GPU threads. */
- /* But, for now only 4 doubles (x,y,z,t) are allocated. */
- amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4
- );
+ Bxp_data[i] = (*xfield_partparser)(x, y, z, time);
+ Byp_data[i] = (*yfield_partparser)(x, y, z, time);
+ Bzp_data[i] = (*zfield_partparser)(x, y, z, time);
+ });
}
}
diff --git a/Source/Utils/WarpXMovingWindow.cpp b/Source/Utils/WarpXMovingWindow.cpp
index f6cd6de20..a94ffede9 100644
--- a/Source/Utils/WarpXMovingWindow.cpp
+++ b/Source/Utils/WarpXMovingWindow.cpp
@@ -111,8 +111,8 @@ WarpX::MoveWindow (bool move_j)
// Shift each component of vector fields (E, B, j)
for (int dim = 0; dim < 3; ++dim) {
// Fine grid
- ParserWrapper *Bfield_parser;
- ParserWrapper *Efield_parser;
+ ParserWrapper<3> *Bfield_parser;
+ ParserWrapper<3> *Efield_parser;
bool use_Bparser = false;
bool use_Eparser = false;
if (B_ext_grid_s == "parse_b_ext_grid_function") {
@@ -233,7 +233,7 @@ WarpX::MoveWindow (bool move_j)
void
WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir,
IntVect ng_extra, amrex::Real external_field, bool useparser,
- ParserWrapper *field_parser)
+ ParserWrapper<3> *field_parser)
{
BL_PROFILE("WarpX::shiftMF()");
const BoxArray& ba = mf.boxArray();
@@ -329,10 +329,8 @@ WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir,
Real fac_z = (1.0 - mf_type[2]) * dx[2]*0.5;
Real z = k*dx[2] + real_box.lo(2) + fac_z;
#endif
- srcfab(i,j,k,n) = field_parser->getField(x,y,z);
- }
- , amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double)*4
- );
+ srcfab(i,j,k,n) = (*field_parser)(x,y,z);
+ });
}
}
diff --git a/Source/Utils/WarpXUtil.H b/Source/Utils/WarpXUtil.H
index 9231fa60a..cfc1b2440 100644
--- a/Source/Utils/WarpXUtil.H
+++ b/Source/Utils/WarpXUtil.H
@@ -126,6 +126,6 @@ T trilinear_interp(T x0, T x1,T y0, T y1, T z0, T z1,
*
* \param parse_function String to read to initialize the parser.
*/
-WarpXParser makeParser (std::string const& parse_function);
+WarpXParser makeParser (std::string const& parse_function, std::vector<std::string> const& varnames);
#endif //WARPX_UTILS_H_
diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp
index 983654aed..65aa0edb2 100644
--- a/Source/Utils/WarpXUtil.cpp
+++ b/Source/Utils/WarpXUtil.cpp
@@ -190,16 +190,13 @@ void Store_parserString(amrex::ParmParse& pp, std::string query_string,
}
-WarpXParser makeParser (std::string const& parse_function)
+WarpXParser makeParser (std::string const& parse_function, std::vector<std::string> const& varnames)
{
WarpXParser parser(parse_function);
- parser.registerVariables({"x","y","z","t"});
+ parser.registerVariables(varnames);
ParmParse pp("my_constants");
std::set<std::string> symbols = parser.symbols();
- symbols.erase("x");
- symbols.erase("y");
- symbols.erase("z");
- symbols.erase("t");
+ for (auto const& v : varnames) symbols.erase(v.c_str());
for (auto it = symbols.begin(); it != symbols.end(); ) {
Real v;
if (pp.query(it->c_str(), v)) {
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 5ffd9f15b..4191f25a6 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -91,7 +91,7 @@ public:
static void shiftMF (amrex::MultiFab& mf, const amrex::Geometry& geom,
int num_shift, int dir, amrex::IntVect ng_extra,
amrex::Real external_field=0.0, bool useparser = false,
- ParserWrapper *field_parser=nullptr);
+ ParserWrapper<3> *field_parser=nullptr);
static void GotoNextLine (std::istream& is);
@@ -116,13 +116,13 @@ public:
static std::string str_Ez_ext_grid_function;
// ParserWrapper for B_external on the grid
- std::unique_ptr<ParserWrapper> Bxfield_parser;
- std::unique_ptr<ParserWrapper> Byfield_parser;
- std::unique_ptr<ParserWrapper> Bzfield_parser;
+ std::unique_ptr<ParserWrapper<3> > Bxfield_parser;
+ std::unique_ptr<ParserWrapper<3> > Byfield_parser;
+ std::unique_ptr<ParserWrapper<3> > Bzfield_parser;
// ParserWrapper for E_external on the grid
- std::unique_ptr<ParserWrapper> Exfield_parser;
- std::unique_ptr<ParserWrapper> Eyfield_parser;
- std::unique_ptr<ParserWrapper> Ezfield_parser;
+ std::unique_ptr<ParserWrapper<3> > Exfield_parser;
+ std::unique_ptr<ParserWrapper<3> > Eyfield_parser;
+ std::unique_ptr<ParserWrapper<3> > Ezfield_parser;
// Algorithms
static long current_deposition_algo;
@@ -410,8 +410,8 @@ public:
*/
void InitializeExternalFieldsOnGridUsingParser (
amrex::MultiFab *mfx, amrex::MultiFab *mfy, amrex::MultiFab *mfz,
- ParserWrapper *xfield_parser, ParserWrapper *yfield_parser,
- ParserWrapper *zfield_parser, amrex::IntVect x_nodal_flag,
+ ParserWrapper<3> *xfield_parser, ParserWrapper<3> *yfield_parser,
+ ParserWrapper<3> *zfield_parser, amrex::IntVect x_nodal_flag,
amrex::IntVect y_nodal_flag, amrex::IntVect z_nodal_flag,
const int lev);