aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Dave Grote <grote1@llnl.gov> 2018-02-06 13:56:38 -0800
committerGravatar Dave Grote <grote1@llnl.gov> 2018-02-06 13:56:38 -0800
commitec6dd873d46a87707c20aa09b58c691d37a36f98 (patch)
treecfa6a71947445b185f9d197bfeec356ddbbadffb
parentc081449f8f1729ef6f635743e95e96c345ae5d2c (diff)
parent2d3cca8885f44fe4f8829e1f066e83cffce82c57 (diff)
downloadWarpX-ec6dd873d46a87707c20aa09b58c691d37a36f98.tar.gz
WarpX-ec6dd873d46a87707c20aa09b58c691d37a36f98.tar.zst
WarpX-ec6dd873d46a87707c20aa09b58c691d37a36f98.zip
Merge branch 'master' into with_python
-rw-r--r--Docs/source/running_cpp/parameters.rst59
-rw-r--r--Example/boosted_diags/inputs.2d100
-rw-r--r--Example/boosted_diags/inputs.3d100
-rwxr-xr-xPython/pywarpx/_libwarpx.py468
-rw-r--r--Source/CustomDensityProb.cpp55
-rw-r--r--Source/ParticleContainer.H2
-rw-r--r--Source/ParticleContainer.cpp8
-rw-r--r--Source/PhysicalParticleContainer.cpp4
-rw-r--r--Source/WarpX.H12
-rw-r--r--Source/WarpXBoostedFrameDiagnostic.H4
-rw-r--r--Source/WarpXBoostedFrameDiagnostic.cpp30
-rw-r--r--Source/WarpXEvolve.cpp7
-rw-r--r--Source/WarpXIO.cpp2
-rw-r--r--Source/WarpXInitData.cpp8
-rw-r--r--Source/WarpXWrappers.cpp72
-rw-r--r--Tools/performance_tests/automated_test_1_uniform_rest_32ppc58
-rw-r--r--Tools/performance_tests/automated_test_2_uniform_rest_1ppc48
-rw-r--r--Tools/performance_tests/automated_test_3_uniform_drift_4ppc58
-rw-r--r--Tools/performance_tests/automated_test_4_labdiags_2ppc100
-rw-r--r--Tools/performance_tests/automated_test_5_loadimbalance76
-rw-r--r--Tools/performance_tests/automated_test_6_output_2ppc58
-rw-r--r--Tools/performance_tests/functions_perftest.py189
-rw-r--r--Tools/performance_tests/performance_log.txt70
-rw-r--r--Tools/performance_tests/run_alltests.py244
-rw-r--r--Tools/performance_tests/run_alltests_1node.py333
-rw-r--r--Tools/read_raw_data.py45
26 files changed, 1961 insertions, 249 deletions
diff --git a/Docs/source/running_cpp/parameters.rst b/Docs/source/running_cpp/parameters.rst
index cf03a04ad..540389f0f 100644
--- a/Docs/source/running_cpp/parameters.rst
+++ b/Docs/source/running_cpp/parameters.rst
@@ -12,7 +12,6 @@ Overall simulation parameters
* ``max_step`` (`integer`)
The number of PIC cycles to perform.
-
* ``warpx.gamma_boost`` (`float`)
The Lorentz factor of the boosted frame in which the simulation is run.
(The corresponding Lorentz transformation is assumed to be along ``warpx.boost_direction``.)
@@ -91,6 +90,42 @@ Distribution across MPI ranks and parallelization
Particle initialization
-----------------------
+* ``particles.nspecies`` (`int`)
+ The number of species that will be used in the simulation.
+
+* ``particles.species_names`` (`strings`, separated by spaces)
+ The name of each species. This is then used in the rest of the input deck ;
+ in this documentation we use `<species_name>` as a placeholder.
+
+* ``<species_name>.charge`` (`float`)
+ The charge of one `physical` particle of this species.
+
+* ``<species_name>.mass`` (`float`)
+ The mass of one `physical` particle of this species.
+
+* ``<species_name>.injection_style`` (`string`)
+ Determines how the particles will be injected in the simulation.
+ The options are:
+
+ * ``NUniformPerCell``: injection with a fixed number of particles
+ per cell, with particles being evenly-spaced in each direction within a cell.
+ This requires the additional parameter ``<species_name>.num_particles_per_cell_each_dim``.
+
+ * ``NRandomPerCell``: injection with a fixed number of particles
+ per cell, with particles being randomly distributed within each cell.
+ This requires the additional parameter ``<species_name>.num_particles_per_cell``.
+
+ * ``Gaussian_Beam``:
+
+Additional parameters for plasma injection (``NUniformPerCell`` and ``NRandomPerCell``)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+Additional parameters for gaussian beams (``Gaussian_Beam``)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+
Laser initialization
--------------------
@@ -212,6 +247,11 @@ Laser initialization
Numerics and algorithms
-----------------------
+* ``warpx.cfl`` (`float`)
+ The ratio between the actual timestep that is used in the simulation
+ and the CFL limit. (e.g. for `warpx.cfl=1`, the timestep will be
+ exactly equal to the CFL limit.)
+
* ``warpx.use_filter`` (`0 or 1`)
Whether to smooth the charge and currents on the mesh, after depositing
them from the macroparticles. This uses a bilinear filter
@@ -274,3 +314,20 @@ Diagnostics and output
Only used when ``warpx.do_boosted_frame_diagnostic`` is ``1``.
The time interval inbetween the lab-frame snapshots (where this
time interval is expressed in the laboratory frame).
+
+* ``warpx.plot_raw_fields`` (`0` or `1`)
+ By default, the fields written in the plot files are averaged on the nodes.
+ When ```warpx.plot_raw_fields`` is `1`, then the raw (i.e. unaveraged)
+ fields are also saved in the plot files.
+
+* ``warpx.plot_raw_fields_guards`` (`0` or `1`)
+ Only used when ``warpx.plot_raw_fields`` is ``1``.
+ Whether to include the guard cells in the output of the raw fields.
+
+* ``warpx.plot_finepatch`` (`0` or `1`)
+ Only used when mesh refinement is activated and ``warpx.plot_raw_fields`` is ``1``.
+ Whether to output the data of the fine patch, in the plot files.
+
+* ``warpx.plot_crsepatch`` (`0` or `1`)
+ Only used when mesh refinement is activated and ``warpx.plot_raw_fields`` is ``1``.
+ Whether to output the data of the coarse patch, in the plot files.
diff --git a/Example/boosted_diags/inputs.2d b/Example/boosted_diags/inputs.2d
new file mode 100644
index 000000000..fc7a23f0b
--- /dev/null
+++ b/Example/boosted_diags/inputs.2d
@@ -0,0 +1,100 @@
+# Maximum number of time steps
+max_step = 260
+
+# number of grid points
+amr.n_cell = 64 64 512
+
+# Maximum allowable size of each subdomain in the problem domain;
+# this is used to decompose the domain for parallel calculations.
+
+amr.max_grid_size = 32
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+amr.plot_int = 10 # How often to write plotfiles. "<= 0" means no plotfiles.
+amr.check_int = 10
+
+# Geometry
+geometry.coord_sys = 0 # 0: Cartesian
+geometry.is_periodic = 1 1 0 # Is periodic?
+geometry.prob_lo = -150.e-6 -150.e-6 -0.6e-3 # physical domain
+geometry.prob_hi = 150.e-6 150.e-6 0.
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 3
+algo.charge_deposition = 0
+algo.field_gathering = 0
+algo.particle_pusher = 0
+
+# Numerics
+interpolation.nox = 3
+interpolation.noy = 3
+interpolation.noz = 3
+warpx.use_filter = 1
+warpx.cfl = 1.0
+warpx.do_pml = 0
+
+# Moving window
+warpx.do_moving_window = 1
+warpx.moving_window_dir = z
+warpx.moving_window_v = 1.0 # in units of the speed of light
+
+# Boosted frame
+warpx.gamma_boost = 15.
+warpx.boost_direction = z
+
+# Diagnostics
+warpx.do_boosted_frame_diagnostic = 1
+warpx.num_snapshots_lab = 20;
+warpx.dt_snapshots_lab = 7.0e-14;
+
+# Particle Injection
+warpx.do_plasma_injection = 1
+warpx.num_injected_species = 2
+warpx.injected_plasma_species = 0 1
+
+# Species
+particles.nspecies = 2
+particles.species_names = electrons ions
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "NUniformPerCell"
+electrons.xmin = -150.e-6
+electrons.xmax = 150.e-6
+electrons.ymin = -150.e-6
+electrons.ymax = 150.e-6
+electrons.zmin = 0.e-6
+electrons.num_particles_per_cell_each_dim = 1 1 2
+electrons.profile = constant
+electrons.density = 1.
+electrons.momentum_distribution_type = "constant"
+
+ions.charge = q_e
+ions.mass = m_p
+ions.injection_style = "NUniformPerCell"
+ions.xmin = -150.e-6
+ions.xmax = 150.e-6
+ions.ymin = -150.e-6
+ions.ymax = 150.e-6
+ions.zmin = 0.e-6
+ions.num_particles_per_cell_each_dim = 1 1 2
+ions.profile = constant
+ions.density = 1.
+ions.momentum_distribution_type = "constant"
+
+# Laser
+warpx.use_laser = 1
+laser.profile = Gaussian
+laser.position = 0. 0. -1.e-6 # This point is on the laser plane
+laser.direction = 0. 0. 1. # The plane normal direction
+laser.polarization = 1. 0. 0. # The main polarization vector
+laser.e_max = 8.e12 # Maximum amplitude of the laser field (in V/m)
+laser.profile_waist = 5.e-5 # The waist of the laser (in meters)
+laser.profile_duration = 16.7e-15 # The duration of the laser (in seconds)
+laser.profile_t_peak = 33.4e-15 # The time at which the laser reaches its peak (in seconds)
+laser.profile_focal_distance = 0.e-6 # Focal distance from the antenna (in meters)
+laser.wavelength = 0.8e-6 # The wavelength of the laser (in meters)
diff --git a/Example/boosted_diags/inputs.3d b/Example/boosted_diags/inputs.3d
new file mode 100644
index 000000000..fc7a23f0b
--- /dev/null
+++ b/Example/boosted_diags/inputs.3d
@@ -0,0 +1,100 @@
+# Maximum number of time steps
+max_step = 260
+
+# number of grid points
+amr.n_cell = 64 64 512
+
+# Maximum allowable size of each subdomain in the problem domain;
+# this is used to decompose the domain for parallel calculations.
+
+amr.max_grid_size = 32
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+amr.plot_int = 10 # How often to write plotfiles. "<= 0" means no plotfiles.
+amr.check_int = 10
+
+# Geometry
+geometry.coord_sys = 0 # 0: Cartesian
+geometry.is_periodic = 1 1 0 # Is periodic?
+geometry.prob_lo = -150.e-6 -150.e-6 -0.6e-3 # physical domain
+geometry.prob_hi = 150.e-6 150.e-6 0.
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 3
+algo.charge_deposition = 0
+algo.field_gathering = 0
+algo.particle_pusher = 0
+
+# Numerics
+interpolation.nox = 3
+interpolation.noy = 3
+interpolation.noz = 3
+warpx.use_filter = 1
+warpx.cfl = 1.0
+warpx.do_pml = 0
+
+# Moving window
+warpx.do_moving_window = 1
+warpx.moving_window_dir = z
+warpx.moving_window_v = 1.0 # in units of the speed of light
+
+# Boosted frame
+warpx.gamma_boost = 15.
+warpx.boost_direction = z
+
+# Diagnostics
+warpx.do_boosted_frame_diagnostic = 1
+warpx.num_snapshots_lab = 20;
+warpx.dt_snapshots_lab = 7.0e-14;
+
+# Particle Injection
+warpx.do_plasma_injection = 1
+warpx.num_injected_species = 2
+warpx.injected_plasma_species = 0 1
+
+# Species
+particles.nspecies = 2
+particles.species_names = electrons ions
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "NUniformPerCell"
+electrons.xmin = -150.e-6
+electrons.xmax = 150.e-6
+electrons.ymin = -150.e-6
+electrons.ymax = 150.e-6
+electrons.zmin = 0.e-6
+electrons.num_particles_per_cell_each_dim = 1 1 2
+electrons.profile = constant
+electrons.density = 1.
+electrons.momentum_distribution_type = "constant"
+
+ions.charge = q_e
+ions.mass = m_p
+ions.injection_style = "NUniformPerCell"
+ions.xmin = -150.e-6
+ions.xmax = 150.e-6
+ions.ymin = -150.e-6
+ions.ymax = 150.e-6
+ions.zmin = 0.e-6
+ions.num_particles_per_cell_each_dim = 1 1 2
+ions.profile = constant
+ions.density = 1.
+ions.momentum_distribution_type = "constant"
+
+# Laser
+warpx.use_laser = 1
+laser.profile = Gaussian
+laser.position = 0. 0. -1.e-6 # This point is on the laser plane
+laser.direction = 0. 0. 1. # The plane normal direction
+laser.polarization = 1. 0. 0. # The main polarization vector
+laser.e_max = 8.e12 # Maximum amplitude of the laser field (in V/m)
+laser.profile_waist = 5.e-5 # The waist of the laser (in meters)
+laser.profile_duration = 16.7e-15 # The duration of the laser (in seconds)
+laser.profile_t_peak = 33.4e-15 # The time at which the laser reaches its peak (in seconds)
+laser.profile_focal_distance = 0.e-6 # Focal distance from the antenna (in meters)
+laser.wavelength = 0.8e-6 # The wavelength of the laser (in meters)
diff --git a/Python/pywarpx/_libwarpx.py b/Python/pywarpx/_libwarpx.py
index e0a7262be..71e0a6729 100755
--- a/Python/pywarpx/_libwarpx.py
+++ b/Python/pywarpx/_libwarpx.py
@@ -83,18 +83,54 @@ f.restype = LP_LP_c_double
f = libwarpx.warpx_getEfieldLoVects
f.restype = LP_c_int
+f = libwarpx.warpx_getEfieldCP
+f.restype = LP_LP_c_double
+
+f = libwarpx.warpx_getEfieldCPLoVects
+f.restype = LP_c_int
+
+f = libwarpx.warpx_getEfieldFP
+f.restype = LP_LP_c_double
+
+f = libwarpx.warpx_getEfieldFPLoVects
+f.restype = LP_c_int
+
f = libwarpx.warpx_getBfield
f.restype = LP_LP_c_double
f = libwarpx.warpx_getBfieldLoVects
f.restype = LP_c_int
+f = libwarpx.warpx_getBfieldCP
+f.restype = LP_LP_c_double
+
+f = libwarpx.warpx_getBfieldCPLoVects
+f.restype = LP_c_int
+
+f = libwarpx.warpx_getBfieldFP
+f.restype = LP_LP_c_double
+
+f = libwarpx.warpx_getBfieldFPLoVects
+f.restype = LP_c_int
+
f = libwarpx.warpx_getCurrentDensity
f.restype = LP_LP_c_double
f = libwarpx.warpx_getCurrentDensityLoVects
f.restype = LP_c_int
+f = libwarpx.warpx_getCurrentDensityCP
+f.restype = LP_LP_c_double
+
+f = libwarpx.warpx_getCurrentDensityCPLoVects
+f.restype = LP_c_int
+
+f = libwarpx.warpx_getCurrentDensityFP
+f.restype = LP_LP_c_double
+
+f = libwarpx.warpx_getCurrentDensityFPLoVects
+f.restype = LP_c_int
+
#f = libwarpx.warpx_getPMLSigma
#f.restype = LP_c_double
#
@@ -550,6 +586,8 @@ def get_mesh_electric_field(level, direction, include_ghosts=True):
This returns a list of numpy arrays containing the mesh electric field
data on each grid for this process.
+ This version is for the full "auxillary" solution on the given level.
+
The data for the numpy arrays are not copied, but share the underlying
memory buffer with WarpX. The numpy arrays are fully writeable.
@@ -592,12 +630,112 @@ def get_mesh_electric_field(level, direction, include_ghosts=True):
return grid_data
+def get_mesh_electric_field_cp(level, direction, include_ghosts=True):
+ '''
+
+ This returns a list of numpy arrays containing the mesh electric field
+ data on each grid for this process. This version returns the field on
+ the coarse patch for the given level.
+
+ The data for the numpy arrays are not copied, but share the underlying
+ memory buffer with WarpX. The numpy arrays are fully writeable.
+
+ Parameters
+ ----------
+
+ level : the AMR level to get the data for
+ direction : the component of the data you want
+ include_ghosts : whether to include ghost zones or not
+
+ Returns
+ -------
+
+ A List of numpy arrays.
+
+ '''
+
+ assert(level == 0)
+
+ shapes = LP_c_int()
+ size = ctypes.c_int(0)
+ ngrow = ctypes.c_int(0)
+ data = libwarpx.warpx_getEfieldCP(level, direction,
+ ctypes.byref(size), ctypes.byref(ngrow),
+ ctypes.byref(shapes))
+ ng = ngrow.value
+ grid_data = []
+ for i in range(size.value):
+ shape = tuple([shapes[dim*i + d] for d in range(dim)])
+ # --- The data is stored in Fortran order, hence shape is reversed and a transpose is taken.
+ arr = np.ctypeslib.as_array(data[i], shape[::-1]).T
+ arr.setflags(write=1)
+ if include_ghosts:
+ grid_data.append(arr)
+ else:
+ grid_data.append(arr[[slice(ng, -ng) for _ in range(dim)]])
+
+ libc.free(shapes)
+ libc.free(data)
+ return grid_data
+
+
+def get_mesh_electric_field_fp(level, direction, include_ghosts=True):
+ '''
+
+ This returns a list of numpy arrays containing the mesh electric field
+ data on each grid for this process. This version returns the field on
+ the fine patch for the given level.
+
+ The data for the numpy arrays are not copied, but share the underlying
+ memory buffer with WarpX. The numpy arrays are fully writeable.
+
+ Parameters
+ ----------
+
+ level : the AMR level to get the data for
+ direction : the component of the data you want
+ include_ghosts : whether to include ghost zones or not
+
+ Returns
+ -------
+
+ A List of numpy arrays.
+
+ '''
+
+ assert(level == 0)
+
+ shapes = LP_c_int()
+ size = ctypes.c_int(0)
+ ngrow = ctypes.c_int(0)
+ data = libwarpx.warpx_getEfieldFP(level, direction,
+ ctypes.byref(size), ctypes.byref(ngrow),
+ ctypes.byref(shapes))
+ ng = ngrow.value
+ grid_data = []
+ for i in range(size.value):
+ shape = tuple([shapes[dim*i + d] for d in range(dim)])
+ # --- The data is stored in Fortran order, hence shape is reversed and a transpose is taken.
+ arr = np.ctypeslib.as_array(data[i], shape[::-1]).T
+ arr.setflags(write=1)
+ if include_ghosts:
+ grid_data.append(arr)
+ else:
+ grid_data.append(arr[[slice(ng, -ng) for _ in range(dim)]])
+
+ libc.free(shapes)
+ libc.free(data)
+ return grid_data
+
+
def get_mesh_magnetic_field(level, direction, include_ghosts=True):
'''
This returns a list of numpy arrays containing the mesh magnetic field
data on each grid for this process.
+ This version is for the full "auxillary" solution on the given level.
+
The data for the numpy arrays are not copied, but share the underlying
memory buffer with WarpX. The numpy arrays are fully writeable.
@@ -640,6 +778,104 @@ def get_mesh_magnetic_field(level, direction, include_ghosts=True):
return grid_data
+def get_mesh_magnetic_field_cp(level, direction, include_ghosts=True):
+ '''
+
+ This returns a list of numpy arrays containing the mesh magnetic field
+ data on each grid for this process. This version returns the field on
+ the coarse patch for the given level.
+
+ The data for the numpy arrays are not copied, but share the underlying
+ memory buffer with WarpX. The numpy arrays are fully writeable.
+
+ Parameters
+ ----------
+
+ level : the AMR level to get the data for
+ direction : the component of the data you want
+ include_ghosts : whether to include ghost zones or not
+
+ Returns
+ -------
+
+ A List of numpy arrays.
+
+ '''
+
+ assert(level == 0)
+
+ shapes = LP_c_int()
+ size = ctypes.c_int(0)
+ ngrow = ctypes.c_int(0)
+ data = libwarpx.warpx_getBfieldCP(level, direction,
+ ctypes.byref(size), ctypes.byref(ngrow),
+ ctypes.byref(shapes))
+ ng = ngrow.value
+ grid_data = []
+ for i in range(size.value):
+ shape = tuple([shapes[dim*i + d] for d in range(dim)])
+ # --- The data is stored in Fortran order, hence shape is reversed and a transpose is taken.
+ arr = np.ctypeslib.as_array(data[i], shape[::-1]).T
+ arr.setflags(write=1)
+ if include_ghosts:
+ grid_data.append(arr)
+ else:
+ grid_data.append(arr[[slice(ng, -ng) for _ in range(dim)]])
+
+ libc.free(shapes)
+ libc.free(data)
+ return grid_data
+
+
+def get_mesh_magnetic_field_fp(level, direction, include_ghosts=True):
+ '''
+
+ This returns a list of numpy arrays containing the mesh magnetic field
+ data on each grid for this process. This version returns the field on
+ the fine patch for the given level.
+
+ The data for the numpy arrays are not copied, but share the underlying
+ memory buffer with WarpX. The numpy arrays are fully writeable.
+
+ Parameters
+ ----------
+
+ level : the AMR level to get the data for
+ direction : the component of the data you want
+ include_ghosts : whether to include ghost zones or not
+
+ Returns
+ -------
+
+ A List of numpy arrays.
+
+ '''
+
+ assert(level == 0)
+
+ shapes = LP_c_int()
+ size = ctypes.c_int(0)
+ ngrow = ctypes.c_int(0)
+ data = libwarpx.warpx_getBfieldFP(level, direction,
+ ctypes.byref(size), ctypes.byref(ngrow),
+ ctypes.byref(shapes))
+ ng = ngrow.value
+ grid_data = []
+ for i in range(size.value):
+ shape = tuple([shapes[dim*i + d] for d in range(dim)])
+ # --- The data is stored in Fortran order, hence shape is reversed and a transpose is taken.
+ arr = np.ctypeslib.as_array(data[i], shape[::-1]).T
+ arr.setflags(write=1)
+ if include_ghosts:
+ grid_data.append(arr)
+ else:
+ grid_data.append(arr[[slice(ng, -ng) for _ in range(dim)]])
+
+ libc.free(shapes)
+ libc.free(data)
+ return grid_data
+
+
def get_mesh_current_density(level, direction, include_ghosts=True):
'''
@@ -688,6 +924,104 @@ def get_mesh_current_density(level, direction, include_ghosts=True):
return grid_data
+def get_mesh_current_density_cp(level, direction, include_ghosts=True):
+ '''
+
+ This returns a list of numpy arrays containing the mesh current density
+ data on each grid for this process. This version returns the density for
+ the coarse patch on the given level.
+
+ The data for the numpy arrays are not copied, but share the underlying
+ memory buffer with WarpX. The numpy arrays are fully writeable.
+
+ Parameters
+ ----------
+
+ level : the AMR level to get the data for
+ direction : the component of the data you want
+ include_ghosts : whether to include ghost zones or not
+
+ Returns
+ -------
+
+ A List of numpy arrays.
+
+ '''
+
+ assert(level == 0)
+
+ shapes = LP_c_int()
+ size = ctypes.c_int(0)
+ ngrow = ctypes.c_int(0)
+ data = libwarpx.warpx_getCurrentDensityCP(level, direction,
+ ctypes.byref(size), ctypes.byref(ngrow),
+ ctypes.byref(shapes))
+ ng = ngrow.value
+ grid_data = []
+ for i in range(size.value):
+ shape = tuple([shapes[dim*i + d] for d in range(dim)])
+ # --- The data is stored in Fortran order, hence shape is reversed and a transpose is taken.
+ arr = np.ctypeslib.as_array(data[i], shape[::-1]).T
+ arr.setflags(write=1)
+ if include_ghosts:
+ grid_data.append(arr)
+ else:
+ grid_data.append(arr[[slice(ng, -ng) for _ in range(dim)]])
+
+ libc.free(shapes)
+ libc.free(data)
+ return grid_data
+
+
+def get_mesh_current_density_fp(level, direction, include_ghosts=True):
+ '''
+
+ This returns a list of numpy arrays containing the mesh current density
+ data on each grid for this process. This version returns the density on
+ the fine patch for the given level.
+
+ The data for the numpy arrays are not copied, but share the underlying
+ memory buffer with WarpX. The numpy arrays are fully writeable.
+
+ Parameters
+ ----------
+
+ level : the AMR level to get the data for
+ direction : the component of the data you want
+ include_ghosts : whether to include ghost zones or not
+
+ Returns
+ -------
+
+ A List of numpy arrays.
+
+ '''
+
+ assert(level == 0)
+
+ shapes = LP_c_int()
+ size = ctypes.c_int(0)
+ ngrow = ctypes.c_int(0)
+ data = libwarpx.warpx_getCurrentDensityFP(level, direction,
+ ctypes.byref(size), ctypes.byref(ngrow),
+ ctypes.byref(shapes))
+ ng = ngrow.value
+ grid_data = []
+ for i in range(size.value):
+ shape = tuple([shapes[dim*i + d] for d in range(dim)])
+ # --- The data is stored in Fortran order, hence shape is reversed and a transpose is taken.
+ arr = np.ctypeslib.as_array(data[i], shape[::-1]).T
+ arr.setflags(write=1)
+ if include_ghosts:
+ grid_data.append(arr)
+ else:
+ grid_data.append(arr[[slice(ng, -ng) for _ in range(dim)]])
+
+ libc.free(shapes)
+ libc.free(data)
+ return grid_data
+
+
def _get_mesh_array_lovects(level, direction, include_ghosts=True, getarrayfunc=None):
assert(0 <= level and level <= libwarpx.warpx_finestLevel())
@@ -715,6 +1049,8 @@ def get_mesh_electric_field_lovects(level, direction, include_ghosts=True):
This returns a list of the lo vectors of the arrays containing the mesh electric field
data on each grid for this process.
+ This version is for the full "auxillary" solution on the given level.
+
Parameters
----------
@@ -731,12 +1067,58 @@ def get_mesh_electric_field_lovects(level, direction, include_ghosts=True):
return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getEfieldLoVects)
+def get_mesh_electric_field_cp_lovects(level, direction, include_ghosts=True):
+ '''
+
+ This returns a list of the lo vectors of the arrays containing the mesh electric field
+ data on each grid for this process.
+
+ Parameters
+ ----------
+
+ level : the AMR level to get the data for
+ direction : the component of the data you want
+ include_ghosts : whether to include ghost zones or not
+
+ Returns
+ -------
+
+ A 2d numpy array of the lo vector for each grid with the shape (dims, number of grids)
+
+ '''
+ return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getEfieldCPLoVects)
+
+
+def get_mesh_electric_field_fp_lovects(level, direction, include_ghosts=True):
+ '''
+
+ This returns a list of the lo vectors of the arrays containing the mesh electric field
+ data on each grid for this process.
+
+ Parameters
+ ----------
+
+ level : the AMR level to get the data for
+ direction : the component of the data you want
+ include_ghosts : whether to include ghost zones or not
+
+ Returns
+ -------
+
+ A 2d numpy array of the lo vector for each grid with the shape (dims, number of grids)
+
+ '''
+ return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getEfieldFPLoVects)
+
+
def get_mesh_magnetic_field_lovects(level, direction, include_ghosts=True):
'''
This returns a list of the lo vectors of the arrays containing the mesh electric field
data on each grid for this process.
+ This version is for the full "auxillary" solution on the given level.
+
Parameters
----------
@@ -753,6 +1135,50 @@ def get_mesh_magnetic_field_lovects(level, direction, include_ghosts=True):
return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getBfieldLoVects)
+def get_mesh_magnetic_field_cp_lovects(level, direction, include_ghosts=True):
+ '''
+
+ This returns a list of the lo vectors of the arrays containing the mesh electric field
+ data on each grid for this process.
+
+ Parameters
+ ----------
+
+ level : the AMR level to get the data for
+ direction : the component of the data you want
+ include_ghosts : whether to include ghost zones or not
+
+ Returns
+ -------
+
+ A 2d numpy array of the lo vector for each grid with the shape (dims, number of grids)
+
+ '''
+ return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getBfieldCPLoVects)
+
+
+def get_mesh_magnetic_field_fp_lovects(level, direction, include_ghosts=True):
+ '''
+
+ This returns a list of the lo vectors of the arrays containing the mesh electric field
+ data on each grid for this process.
+
+ Parameters
+ ----------
+
+ level : the AMR level to get the data for
+ direction : the component of the data you want
+ include_ghosts : whether to include ghost zones or not
+
+ Returns
+ -------
+
+ A 2d numpy array of the lo vector for each grid with the shape (dims, number of grids)
+
+ '''
+ return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getBfieldFPLoVects)
+
+
def get_mesh_current_density_lovects(level, direction, include_ghosts=True):
'''
@@ -775,3 +1201,45 @@ def get_mesh_current_density_lovects(level, direction, include_ghosts=True):
return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getCurrentDensityLoVects)
+def get_mesh_current_density_cp_lovects(level, direction, include_ghosts=True):
+ '''
+
+ This returns a list of the lo vectors of the arrays containing the mesh electric field
+ data on each grid for this process.
+
+ Parameters
+ ----------
+
+ level : the AMR level to get the data for
+ direction : the component of the data you want
+ include_ghosts : whether to include ghost zones or not
+
+ Returns
+ -------
+
+ A 2d numpy array of the lo vector for each grid with the shape (dims, number of grids)
+
+ '''
+ return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getCurrentDensityCPLoVects)
+
+def get_mesh_current_density_fp_lovects(level, direction, include_ghosts=True):
+ '''
+
+ This returns a list of the lo vectors of the arrays containing the mesh electric field
+ data on each grid for this process.
+
+ Parameters
+ ----------
+
+ level : the AMR level to get the data for
+ direction : the component of the data you want
+ include_ghosts : whether to include ghost zones or not
+
+ Returns
+ -------
+
+ A 2d numpy array of the lo vector for each grid with the shape (dims, number of grids)
+
+ '''
+ return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getCurrentDensityFPLoVects)
+
diff --git a/Source/CustomDensityProb.cpp b/Source/CustomDensityProb.cpp
index 1df3d75ad..2f6005bc2 100644
--- a/Source/CustomDensityProb.cpp
+++ b/Source/CustomDensityProb.cpp
@@ -1,13 +1,52 @@
#include <PlasmaInjector.H>
-#include <iostream>
+using namespace amrex;
-#include <AMReX.H>
+///
+/// This "custom" density profile just does constant
+///
+Real CustomDensityProfile::getDensity(Real x, Real y, Real z) const {
+ const Real on_axis_density = params[0];
+ const Real plasma_zmin = params[1];
+ const Real plasma_zmax = params[2];
+ const Real plasma_lramp_start = params[3];
+ const Real plasma_lramp_end = params[4];
+ const Real plasma_rcap = params[5];
+ const Real plasma_rdownramp = params[6];
+ const Real plasma_rchannel = params[7];
+ static const Real re = 2.8178403227e-15; // Electron classical radius
+ static const Real pi = 3.14159265359;
-amrex::Real CustomDensityProfile::getDensity(amrex::Real x,
- amrex::Real y,
- amrex::Real z) const
-{
- amrex::Abort("If running with a custom density profile, you must supply a CustomDensityProb.cpp file");
- return 0.0;
+ Real r2 = x*x + y*y;
+ Real r = std::sqrt( r2 );
+
+ // Transverse part of the profile
+ Real nr;
+ if (r<plasma_rcap) {
+ nr = 1. + 1./(pi*on_axis_density*re) * r2/pow(plasma_rchannel, 4);
+ } else {
+ nr = 1. + 1./(pi*on_axis_density*re) *
+ pow(plasma_rcap, 2)/pow(plasma_rchannel, 4) *
+ (plasma_rcap+plasma_rdownramp-r)/plasma_rdownramp;
+ }
+ // Longitudinal part of the profile
+ Real nz;
+ if (z<plasma_zmin) {
+ nz = 0;
+ } else if (z<plasma_zmin+plasma_lramp_start) {
+ nz = (z-plasma_zmin)/plasma_lramp_start;
+ } else if (z<plasma_zmax-plasma_lramp_end) {
+ nz = 1.;
+ } else if (z<plasma_zmax){
+ nz = -(z-plasma_zmax)/plasma_lramp_end;
+ } else {
+ nz = 0;
+ }
+ // Combine and saturate profile
+ Real n = nr*nz;
+ if (n > 4.) {
+ n = 4.;
+ }
+
+ return on_axis_density*n;
}
diff --git a/Source/ParticleContainer.H b/Source/ParticleContainer.H
index a1402d4df..9767edd59 100644
--- a/Source/ParticleContainer.H
+++ b/Source/ParticleContainer.H
@@ -120,6 +120,8 @@ public:
void Redistribute ();
+ void RedistributeLocal ();
+
amrex::Vector<long> NumberOfParticlesInGrid(int lev) const;
void Increment (amrex::MultiFab& mf, int lev);
diff --git a/Source/ParticleContainer.cpp b/Source/ParticleContainer.cpp
index 55889cf8d..168b9167c 100644
--- a/Source/ParticleContainer.cpp
+++ b/Source/ParticleContainer.cpp
@@ -186,6 +186,14 @@ MultiParticleContainer::Redistribute ()
}
}
+void
+MultiParticleContainer::RedistributeLocal ()
+{
+ for (auto& pc : allcontainers) {
+ pc->Redistribute(0, 0, 0, true);
+ }
+}
+
Vector<long>
MultiParticleContainer::NumberOfParticlesInGrid(int lev) const
{
diff --git a/Source/PhysicalParticleContainer.cpp b/Source/PhysicalParticleContainer.cpp
index e5f6b0c82..050e50daa 100644
--- a/Source/PhysicalParticleContainer.cpp
+++ b/Source/PhysicalParticleContainer.cpp
@@ -225,9 +225,9 @@ PhysicalParticleContainer::AddPlasma(int lev, RealBox part_realbox )
// and that the boost is along z)
Real t = WarpX::GetInstance().gett_new(lev);
Real v_boost = WarpX::beta_boost*PhysConst::c;
- Real z_lab = WarpX::gamma_boost*( z - v_boost*t );
+ Real z_lab = WarpX::gamma_boost*( z + v_boost*t );
plasma_injector->getMomentum(u, x, y, z_lab);
- dens = plasma_injector->getDensity(x, y, z);
+ dens = plasma_injector->getDensity(x, y, z_lab);
// Perform Lorentz transform
// (Assumes that the plasma has a low velocity)
u[2] = WarpX::gamma_boost * ( u[2] - v_boost );
diff --git a/Source/WarpX.H b/Source/WarpX.H
index d0876afd2..35cf93cc7 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -90,8 +90,16 @@ public:
static amrex::Vector<amrex::Real> boost_direction;
const amrex::MultiFab& getcurrent (int lev, int direction) {return *current_fp[lev][direction];}
- const amrex::MultiFab& getEfield (int lev, int direction) {return *Efield_fp[lev][direction];}
- const amrex::MultiFab& getBfield (int lev, int direction) {return *Bfield_fp[lev][direction];}
+ const amrex::MultiFab& getEfield (int lev, int direction) {return *Efield_aux[lev][direction];}
+ const amrex::MultiFab& getBfield (int lev, int direction) {return *Bfield_aux[lev][direction];}
+
+ const amrex::MultiFab& getcurrent_cp (int lev, int direction) {return *current_cp[lev][direction];}
+ const amrex::MultiFab& getEfield_cp (int lev, int direction) {return *Efield_cp[lev][direction];}
+ const amrex::MultiFab& getBfield_cp (int lev, int direction) {return *Bfield_cp[lev][direction];}
+
+ const amrex::MultiFab& getcurrent_fp (int lev, int direction) {return *current_fp[lev][direction];}
+ const amrex::MultiFab& getEfield_fp (int lev, int direction) {return *Efield_fp[lev][direction];}
+ const amrex::MultiFab& getBfield_fp (int lev, int direction) {return *Bfield_fp[lev][direction];}
static amrex::MultiFab* getCosts (int lev) {
if (m_instance) {
diff --git a/Source/WarpXBoostedFrameDiagnostic.H b/Source/WarpXBoostedFrameDiagnostic.H
index a5f0bd7ff..96a77f182 100644
--- a/Source/WarpXBoostedFrameDiagnostic.H
+++ b/Source/WarpXBoostedFrameDiagnostic.H
@@ -67,8 +67,8 @@ class BoostedFrameDiagnostic {
int boost_direction_;
amrex::Vector<std::unique_ptr<amrex::MultiFab> > data_buffer_;
- int num_buffer_ = 32;
- int max_box_size_ = 64;
+ int num_buffer_ = 256;
+ int max_box_size_ = 256;
amrex::Vector<int> buff_counter_;
amrex::Vector<LabSnapShot> snapshots_;
diff --git a/Source/WarpXBoostedFrameDiagnostic.cpp b/Source/WarpXBoostedFrameDiagnostic.cpp
index d58ed0be7..a617961ee 100644
--- a/Source/WarpXBoostedFrameDiagnostic.cpp
+++ b/Source/WarpXBoostedFrameDiagnostic.cpp
@@ -46,7 +46,10 @@ BoostedFrameDiagnostic::
writeLabFrameData(const MultiFab& cell_centered_data, const Geometry& geom, Real t_boost) {
BL_PROFILE("BoostedFrameDiagnostic::writeLabFrameData");
-
+
+ VisMF::Header::Version current_version = VisMF::GetHeaderVersion();
+ VisMF::SetHeaderVersion(amrex::VisMF::Header::NoFabHeader_v1);
+
const RealBox& domain_z_boost = geom.ProbDomain();
const Real zlo_boost = domain_z_boost.lo(boost_direction_);
const Real zhi_boost = domain_z_boost.hi(boost_direction_);
@@ -119,6 +122,8 @@ writeLabFrameData(const MultiFab& cell_centered_data, const Geometry& geom, Real
buff_counter_[i] = 0;
}
}
+
+ VisMF::SetHeaderVersion(current_version);
}
void
@@ -167,20 +172,23 @@ LabSnapShot(Real t_lab_in, Real zmin_lab_in,
current_z_boost = 0.0;
file_name = Concatenate("lab_frame_data/snapshot", file_num, 5);
- const int nlevels = 1;
- const std::string level_prefix = "Level_";
-
- if (!UtilCreateDirectory(file_name, 0755))
- CreateDirectoryFailed(file_name);
- for(int i(0); i < nlevels; ++i) {
- const std::string &fullpath = LevelFullPath(i, file_name);
- if (!UtilCreateDirectory(fullpath, 0755))
- CreateDirectoryFailed(fullpath);
+ if (ParallelDescriptor::IOProcessor()) {
+
+ const int nlevels = 1;
+ const std::string level_prefix = "Level_";
+
+ if (!UtilCreateDirectory(file_name, 0755))
+ CreateDirectoryFailed(file_name);
+ for(int i(0); i < nlevels; ++i) {
+ const std::string &fullpath = LevelFullPath(i, file_name);
+ if (!UtilCreateDirectory(fullpath, 0755))
+ CreateDirectoryFailed(fullpath);
+ }
}
ParallelDescriptor::Barrier();
- writeSnapShotHeader();
+ if (ParallelDescriptor::IOProcessor()) writeSnapShotHeader();
}
void
diff --git a/Source/WarpXEvolve.cpp b/Source/WarpXEvolve.cpp
index 6ce7db0ef..cda70dd53 100644
--- a/Source/WarpXEvolve.cpp
+++ b/Source/WarpXEvolve.cpp
@@ -260,7 +260,12 @@ WarpX::EvolveEM (int numsteps)
// We might need to move j because we are going to make a plotfile.
MoveWindow(move_j);
- mypc->Redistribute(); // Redistribute particles
+ if (max_level == 0) {
+ mypc->RedistributeLocal();
+ }
+ else {
+ mypc->Redistribute();
+ }
amrex::Print()<< "STEP " << step+1 << " ends." << " TIME = " << cur_time
<< " DT = " << dt[0] << "\n";
diff --git a/Source/WarpXIO.cpp b/Source/WarpXIO.cpp
index b289fdde9..212d69a14 100644
--- a/Source/WarpXIO.cpp
+++ b/Source/WarpXIO.cpp
@@ -404,7 +404,7 @@ WarpX::GetCellCenteredData() {
DistributionMap(lev),
nc, ng) );
- Array<const MultiFab*> srcmf(BL_SPACEDIM);
+ Vector<const MultiFab*> srcmf(BL_SPACEDIM);
int dcomp = 0;
// first the electric field
diff --git a/Source/WarpXInitData.cpp b/Source/WarpXInitData.cpp
index df01afe88..0d6c35a4d 100644
--- a/Source/WarpXInitData.cpp
+++ b/Source/WarpXInitData.cpp
@@ -54,8 +54,12 @@ WarpX::InitDiagnostics () {
const Real* current_hi = geom[0].ProbHi();
Real dt_boost = dt[0];
- myBFD.reset(new BoostedFrameDiagnostic(current_lo[moving_window_dir],
- current_hi[moving_window_dir],
+ // Find the positions of the lab-frame box that corresponds to the boosted-frame box at t=0
+ Real zmin_lab = current_lo[moving_window_dir]/( (1.+beta_boost)*gamma_boost );
+ Real zmax_lab = current_hi[moving_window_dir]/( (1.+beta_boost)*gamma_boost );
+
+ myBFD.reset(new BoostedFrameDiagnostic(zmin_lab,
+ zmax_lab,
moving_window_v, dt_snapshots_lab,
num_snapshots_lab, gamma_boost, dt_boost,
moving_window_dir));
diff --git a/Source/WarpXWrappers.cpp b/Source/WarpXWrappers.cpp
index 54aeedf35..d106cfca7 100644
--- a/Source/WarpXWrappers.cpp
+++ b/Source/WarpXWrappers.cpp
@@ -192,6 +192,30 @@ extern "C"
return getMultiFabLoVects(mf, return_size, ngrow);
}
+ double** warpx_getEfieldCP(int lev, int direction,
+ int *return_size, int *ngrow, int **shapes) {
+ auto & mf = WarpX::GetInstance().getEfield_cp(lev, direction);
+ return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ }
+
+ int* warpx_getEfieldCPLoVects(int lev, int direction,
+ int *return_size, int *ngrow) {
+ auto & mf = WarpX::GetInstance().getEfield_cp(lev, direction);
+ return getMultiFabLoVects(mf, return_size, ngrow);
+ }
+
+ double** warpx_getEfieldFP(int lev, int direction,
+ int *return_size, int *ngrow, int **shapes) {
+ auto & mf = WarpX::GetInstance().getEfield_fp(lev, direction);
+ return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ }
+
+ int* warpx_getEfieldFPLoVects(int lev, int direction,
+ int *return_size, int *ngrow) {
+ auto & mf = WarpX::GetInstance().getEfield_fp(lev, direction);
+ return getMultiFabLoVects(mf, return_size, ngrow);
+ }
+
double** warpx_getBfield(int lev, int direction,
int *return_size, int *ngrow, int **shapes) {
auto & mf = WarpX::GetInstance().getBfield(lev, direction);
@@ -204,6 +228,30 @@ extern "C"
return getMultiFabLoVects(mf, return_size, ngrow);
}
+ double** warpx_getBfieldCP(int lev, int direction,
+ int *return_size, int *ngrow, int **shapes) {
+ auto & mf = WarpX::GetInstance().getBfield_cp(lev, direction);
+ return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ }
+
+ int* warpx_getBfieldCPLoVects(int lev, int direction,
+ int *return_size, int *ngrow) {
+ auto & mf = WarpX::GetInstance().getBfield_cp(lev, direction);
+ return getMultiFabLoVects(mf, return_size, ngrow);
+ }
+
+ double** warpx_getBfieldFP(int lev, int direction,
+ int *return_size, int *ngrow, int **shapes) {
+ auto & mf = WarpX::GetInstance().getBfield_fp(lev, direction);
+ return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ }
+
+ int* warpx_getBfieldFPLoVects(int lev, int direction,
+ int *return_size, int *ngrow) {
+ auto & mf = WarpX::GetInstance().getBfield_fp(lev, direction);
+ return getMultiFabLoVects(mf, return_size, ngrow);
+ }
+
double** warpx_getCurrentDensity(int lev, int direction,
int *return_size, int *ngrow, int **shapes) {
auto & mf = WarpX::GetInstance().getcurrent(lev, direction);
@@ -216,6 +264,30 @@ extern "C"
return getMultiFabLoVects(mf, return_size, ngrow);
}
+ double** warpx_getCurrentDensityCP(int lev, int direction,
+ int *return_size, int *ngrow, int **shapes) {
+ auto & mf = WarpX::GetInstance().getcurrent_cp(lev, direction);
+ return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ }
+
+ int* warpx_getCurrentDensityCPLoVects(int lev, int direction,
+ int *return_size, int *ngrow) {
+ auto & mf = WarpX::GetInstance().getcurrent_cp(lev, direction);
+ return getMultiFabLoVects(mf, return_size, ngrow);
+ }
+
+ double** warpx_getCurrentDensityFP(int lev, int direction,
+ int *return_size, int *ngrow, int **shapes) {
+ auto & mf = WarpX::GetInstance().getcurrent_fp(lev, direction);
+ return getMultiFabPointers(mf, return_size, ngrow, shapes);
+ }
+
+ int* warpx_getCurrentDensityFPLoVects(int lev, int direction,
+ int *return_size, int *ngrow) {
+ auto & mf = WarpX::GetInstance().getcurrent_fp(lev, direction);
+ return getMultiFabLoVects(mf, return_size, ngrow);
+ }
+
double** warpx_getParticleStructs(int speciesnumber,
int* num_tiles, int** particles_per_tile) {
auto & mypc = WarpX::GetInstance().GetPartContainer();
diff --git a/Tools/performance_tests/automated_test_1_uniform_rest_32ppc b/Tools/performance_tests/automated_test_1_uniform_rest_32ppc
new file mode 100644
index 000000000..0f2f5e036
--- /dev/null
+++ b/Tools/performance_tests/automated_test_1_uniform_rest_32ppc
@@ -0,0 +1,58 @@
+# Maximum number of time steps
+max_step = 100
+
+# number of grid points
+amr.n_cell = 128 128 128
+
+amr.plot_int = -1 # How often to write plotfiles.
+
+# Maximum allowable size of each subdomain in the problem domain;
+# this is used to decompose the domain for parallel calculations.
+amr.max_grid_size = 32
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+
+# Geometry
+geometry.coord_sys = 0 # 0: Cartesian
+geometry.is_periodic = 1 1 1 # Is periodic?
+geometry.prob_lo = -20.e-6 -20.e-6 -20.e-6 # physical domain
+geometry.prob_hi = 20.e-6 20.e-6 20.e-6
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 2
+algo.charge_deposition = 0
+algo.field_gathering = 0
+algo.particle_pusher = 0
+interpolation.nox = 1
+interpolation.noy = 1
+interpolation.noz = 1
+
+# CFL
+warpx.cfl = 1.0
+
+particles.nspecies = 2
+particles.species_names = electrons ions
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "NUniformPerCell"
+electrons.num_particles_per_cell_each_dim = 2 2 4
+electrons.profile = constant
+electrons.density = 1.e20 # number of electrons per m^3
+electrons.momentum_distribution_type = "gaussian"
+electrons.u_th = 0.01 # uth the std of the (unitless) momentum
+electrons.uz_m = 0. # Mean momentum along z (unitless)
+
+ions.charge = q_e
+ions.mass = m_p
+ions.injection_style = "NUniformPerCell"
+ions.num_particles_per_cell_each_dim = 2 2 4
+ions.profile = constant
+ions.density = 1.e20 # number of electrons per m^3
+ions.momentum_distribution_type = "gaussian"
+ions.u_th = 0.01 # uth the std of the (unitless) momentum
+ions.uz_m = 0. # Mean momentum along z (unitless)
diff --git a/Tools/performance_tests/automated_test_2_uniform_rest_1ppc b/Tools/performance_tests/automated_test_2_uniform_rest_1ppc
new file mode 100644
index 000000000..603d29a6d
--- /dev/null
+++ b/Tools/performance_tests/automated_test_2_uniform_rest_1ppc
@@ -0,0 +1,48 @@
+# Maximum number of time steps
+max_step = 100
+
+# number of grid points
+amr.n_cell = 256 256 256
+
+amr.plot_int = -1 # How often to write plotfiles.
+
+# Maximum allowable size of each subdomain in the problem domain;
+# this is used to decompose the domain for parallel calculations.
+amr.max_grid_size = 32
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+
+# Geometry
+geometry.coord_sys = 0 # 0: Cartesian
+geometry.is_periodic = 1 1 1 # Is periodic?
+geometry.prob_lo = -20.e-6 -20.e-6 -20.e-6 # physical domain
+geometry.prob_hi = 20.e-6 20.e-6 20.e-6
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 0
+algo.charge_deposition = 0
+algo.field_gathering = 0
+algo.particle_pusher = 0
+interpolation.nox = 1
+interpolation.noy = 1
+interpolation.noz = 1
+
+# CFL
+warpx.cfl = 1.0
+
+particles.nspecies = 1
+particles.species_names = electrons
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "NUniformPerCell"
+electrons.num_particles_per_cell_each_dim = 1 1 1
+electrons.profile = constant
+electrons.density = 1.e20 # number of electrons per m^3
+electrons.momentum_distribution_type = "gaussian"
+electrons.u_th = 0.01 # uth the std of the (unitless) momentum
+electrons.uz_m = 0. # Mean momentum along z (unitless)
diff --git a/Tools/performance_tests/automated_test_3_uniform_drift_4ppc b/Tools/performance_tests/automated_test_3_uniform_drift_4ppc
new file mode 100644
index 000000000..d8a257d96
--- /dev/null
+++ b/Tools/performance_tests/automated_test_3_uniform_drift_4ppc
@@ -0,0 +1,58 @@
+# Maximum number of time steps
+max_step = 100
+
+# number of grid points
+amr.n_cell = 128 128 128
+
+amr.plot_int = -1 # How often to write plotfiles.
+
+# Maximum allowable size of each subdomain in the problem domain;
+# this is used to decompose the domain for parallel calculations.
+amr.max_grid_size = 32
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+
+# Geometry
+geometry.coord_sys = 0 # 0: Cartesian
+geometry.is_periodic = 1 1 1 # Is periodic?
+geometry.prob_lo = -20.e-6 -20.e-6 -20.e-6 # physical domain
+geometry.prob_hi = 20.e-6 20.e-6 20.e-6
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 2
+algo.charge_deposition = 0
+algo.field_gathering = 0
+algo.particle_pusher = 0
+interpolation.nox = 1
+interpolation.noy = 1
+interpolation.noz = 1
+
+# CFL
+warpx.cfl = 1.0
+
+particles.nspecies = 2
+particles.species_names = electrons ions
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "NUniformPerCell"
+electrons.num_particles_per_cell_each_dim = 2 1 1
+electrons.profile = constant
+electrons.density = 1.e20 # number of electrons per m^3
+electrons.momentum_distribution_type = "gaussian"
+electrons.u_th = 0.01 # uth the std of the (unitless) momentum
+electrons.uz_m = 100. # Mean momentum along z (unitless)
+
+ions.charge = q_e
+ions.mass = m_p
+ions.injection_style = "NUniformPerCell"
+ions.num_particles_per_cell_each_dim = 2 1 1
+ions.profile = constant
+ions.density = 1.e20 # number of electrons per m^3
+ions.momentum_distribution_type = "gaussian"
+ions.u_th = 0.01 # uth the std of the (unitless) momentum
+ions.uz_m = 100. # Mean momentum along z (unitless)
diff --git a/Tools/performance_tests/automated_test_4_labdiags_2ppc b/Tools/performance_tests/automated_test_4_labdiags_2ppc
new file mode 100644
index 000000000..54512001c
--- /dev/null
+++ b/Tools/performance_tests/automated_test_4_labdiags_2ppc
@@ -0,0 +1,100 @@
+# Maximum number of time steps
+max_step = 100
+
+# number of grid points
+amr.n_cell = 64 64 512
+
+# Maximum allowable size of each subdomain in the problem domain;
+# this is used to decompose the domain for parallel calculations.
+
+amr.max_grid_size = 32
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+amr.plot_int = 10 # How often to write plotfiles. "<= 0" means no plotfiles.
+amr.check_int = 10
+
+# Geometry
+geometry.coord_sys = 0 # 0: Cartesian
+geometry.is_periodic = 1 1 0 # Is periodic?
+geometry.prob_lo = -150.e-6 -150.e-6 -0.6e-3 # physical domain
+geometry.prob_hi = 150.e-6 150.e-6 0.
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 2
+algo.charge_deposition = 0
+algo.field_gathering = 0
+algo.particle_pusher = 0
+
+# Numerics
+interpolation.nox = 3
+interpolation.noy = 3
+interpolation.noz = 3
+warpx.use_filter = 1
+warpx.cfl = 1.0
+warpx.do_pml = 0
+
+# Moving window
+warpx.do_moving_window = 1
+warpx.moving_window_dir = z
+warpx.moving_window_v = 1.0 # in units of the speed of light
+
+# Boosted frame
+warpx.gamma_boost = 15.
+warpx.boost_direction = z
+
+# Diagnostics
+warpx.do_boosted_frame_diagnostic = 1
+warpx.num_snapshots_lab = 20
+warpx.dt_snapshots_lab = 7.0e-14
+
+# Particle Injection
+warpx.do_plasma_injection = 1
+warpx.num_injected_species = 2
+warpx.injected_plasma_species = 0 1
+
+# Species
+particles.nspecies = 2
+particles.species_names = electrons ions
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "NUniformPerCell"
+electrons.xmin = -150.e-6
+electrons.xmax = 150.e-6
+electrons.ymin = -150.e-6
+electrons.ymax = 150.e-6
+electrons.zmin = 0.e-6
+electrons.num_particles_per_cell_each_dim = 1 1 1
+electrons.profile = constant
+electrons.density = 1.
+electrons.momentum_distribution_type = "constant"
+
+ions.charge = q_e
+ions.mass = m_p
+ions.injection_style = "NUniformPerCell"
+ions.xmin = -150.e-6
+ions.xmax = 150.e-6
+ions.ymin = -150.e-6
+ions.ymax = 150.e-6
+ions.zmin = 0.e-6
+ions.num_particles_per_cell_each_dim = 1 1 1
+ions.profile = constant
+ions.density = 1.
+ions.momentum_distribution_type = "constant"
+
+# Laser
+warpx.use_laser = 1
+laser.profile = Gaussian
+laser.position = 0. 0. -1.e-6 # This point is on the laser plane
+laser.direction = 0. 0. 1. # The plane normal direction
+laser.polarization = 1. 0. 0. # The main polarization vector
+laser.e_max = 8.e12 # Maximum amplitude of the laser field (in V/m)
+laser.profile_waist = 5.e-5 # The waist of the laser (in meters)
+laser.profile_duration = 16.7e-15 # The duration of the laser (in seconds)
+laser.profile_t_peak = 33.4e-15 # The time at which the laser reaches its peak (in seconds)
+laser.profile_focal_distance = 0.e-6 # Focal distance from the antenna (in meters)
+laser.wavelength = 0.8e-6 # The wavelength of the laser (in meters)
diff --git a/Tools/performance_tests/automated_test_5_loadimbalance b/Tools/performance_tests/automated_test_5_loadimbalance
new file mode 100644
index 000000000..6546f6804
--- /dev/null
+++ b/Tools/performance_tests/automated_test_5_loadimbalance
@@ -0,0 +1,76 @@
+# Maximum number of time steps
+max_step = 100
+
+# number of grid points
+amr.n_cell = 256 256 256
+
+amr.plot_int = -1 # How often to write plotfiles.
+
+# Maximum allowable size of each subdomain in the problem domain;
+# this is used to decompose the domain for parallel calculations.
+amr.max_grid_size = 32
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+
+# Geometry
+geometry.coord_sys = 0 # 0: Cartesian
+geometry.is_periodic = 1 1 1 # Is periodic?
+geometry.prob_lo = -20.e-6 -20.e-6 -20.e-6 # physical domain
+geometry.prob_hi = 20.e-6 20.e-6 20.e-6
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 2
+algo.charge_deposition = 0
+algo.field_gathering = 0
+algo.particle_pusher = 0
+interpolation.nox = 1
+interpolation.noy = 1
+interpolation.noz = 1
+
+# CFL
+warpx.cfl = 1.0
+
+particles.nspecies = 2
+particles.species_names = electrons ions
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "gaussian_beam"
+electrons.x_rms = 2.e-6
+electrons.y_rms = 2.e-6
+electrons.z_rms = 5.e-6
+electrons.x_m = 0.
+electrons.y_m = 0.
+electrons.z_m = 0.
+electrons.npart = 500000
+electrons.q_tot = -1.602e-9
+electrons.profile = "constant"
+electrons.density = 1.e25
+electrons.momentum_distribution_type = "gaussian"
+electrons.ux_m = 0.0
+electrons.uy_m = 0.0
+electrons.uz_m = 0.0
+electrons.u_th = 0.01
+
+ions.charge = q_e
+ions.mass = m_p
+ions.injection_style = "gaussian_beam"
+ions.x_rms = 2.e-6
+ions.y_rms = 2.e-6
+ions.z_rms = 5.e-6
+ions.x_m = 0.
+ions.y_m = 0.
+ions.z_m = 0.
+ions.npart = 500000
+ions.q_tot = -1.602e-9
+ions.profile = "constant"
+ions.density = 1.e25
+ions.momentum_distribution_type = "gaussian"
+ions.ux_m = 0.0
+ions.uy_m = 0.0
+ions.uz_m = 0.0
+ions.u_th = 0.01
diff --git a/Tools/performance_tests/automated_test_6_output_2ppc b/Tools/performance_tests/automated_test_6_output_2ppc
new file mode 100644
index 000000000..a1c4172fe
--- /dev/null
+++ b/Tools/performance_tests/automated_test_6_output_2ppc
@@ -0,0 +1,58 @@
+# Maximum number of time steps
+max_step = 10
+
+# number of grid points
+amr.n_cell = 128 128 128
+
+amr.plot_int = 2 # How often to write plotfiles.
+
+# Maximum allowable size of each subdomain in the problem domain;
+# this is used to decompose the domain for parallel calculations.
+amr.max_grid_size = 32
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+
+# Geometry
+geometry.coord_sys = 0 # 0: Cartesian
+geometry.is_periodic = 1 1 1 # Is periodic?
+geometry.prob_lo = -20.e-6 -20.e-6 -20.e-6 # physical domain
+geometry.prob_hi = 20.e-6 20.e-6 20.e-6
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 2
+algo.charge_deposition = 0
+algo.field_gathering = 0
+algo.particle_pusher = 0
+interpolation.nox = 1
+interpolation.noy = 1
+interpolation.noz = 1
+
+# CFL
+warpx.cfl = 1.0
+
+particles.nspecies = 2
+particles.species_names = electrons ions
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "NUniformPerCell"
+electrons.num_particles_per_cell_each_dim = 1 1 1
+electrons.profile = constant
+electrons.density = 1.e20 # number of electrons per m^3
+electrons.momentum_distribution_type = "gaussian"
+electrons.u_th = 0.01 # uth the std of the (unitless) momentum
+electrons.uz_m = 0. # Mean momentum along z (unitless)
+
+ions.charge = q_e
+ions.mass = m_p
+ions.injection_style = "NUniformPerCell"
+ions.num_particles_per_cell_each_dim = 1 1 1
+ions.profile = constant
+ions.density = 1.e20 # number of electrons per m^3
+ions.momentum_distribution_type = "gaussian"
+ions.u_th = 0.01 # uth the std of the (unitless) momentum
+ions.uz_m = 0. # Mean momentum along z (unitless)
diff --git a/Tools/performance_tests/functions_perftest.py b/Tools/performance_tests/functions_perftest.py
new file mode 100644
index 000000000..2085367c7
--- /dev/null
+++ b/Tools/performance_tests/functions_perftest.py
@@ -0,0 +1,189 @@
+import os, shutil, re
+
+def run_batch_nnode(test_list, res_dir, bin_name, config_command, architecture='knl', Cname='knl', n_node=1):
+ # Clean res_dir
+ if os.path.exists(res_dir):
+ shutil.rmtree(res_dir)
+ os.makedirs(res_dir)
+ # Copy files to res_dir
+ cwd = os.environ['WARPX'] + '/Tools/performance_tests/'
+ bin_dir = cwd + 'Bin/'
+ shutil.copy(bin_dir + bin_name, res_dir)
+ os.chdir(res_dir)
+ # Calculate simulation time. Take 10 min + 10 min / simulation
+ job_time_min = 5. + len(test_list)*5.
+ job_time_str = str(int(job_time_min/60)) + ':' + str(int(job_time_min%60)) + ':00'
+ batch_string = ''
+ batch_string += '#!/bin/bash\n'
+ batch_string += '#SBATCH --job-name=' + test_list[0][0] + '\n'
+ batch_string += '#SBATCH --time=' + job_time_str + '\n'
+ batch_string += '#SBATCH -C ' + Cname + '\n'
+ batch_string += '#SBATCH -N ' + str(n_node) + '\n'
+ batch_string += '#SBATCH -q regular\n'
+ batch_string += '#SBATCH -e error.txt\n'
+ batch_string += '#SBATCH --account=m2852\n'
+
+ for count, test_item in enumerate(test_list):
+ # test_item reads [input_file, int n_mpi, int n_omp]
+ input_file = test_item[0];
+ shutil.copy(cwd + input_file, res_dir)
+ # test_item[1] is not read since it contain the number of node, which is an
+ # global parameter. However, we keep it for compatibility with run_alltests.py
+ n_mpi = test_item[2]
+ n_omp = test_item[3]
+ srun_string = ''
+ srun_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n'
+ # number of logical cores per MPI process
+ if architecture == 'cpu':
+ cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives
+ elif architecture == 'knl':
+ cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives
+ output_filename = 'out_' + '_'.join([input_file, str(n_node), str(n_mpi), str(n_omp), str(count)]) + '.txt'
+ srun_string += 'srun --cpu_bind=cores '+ \
+ ' -n ' + str(n_node*n_mpi) + \
+ ' -c ' + str(cflag_value) + \
+ ' ./' + bin_name + \
+ ' ' + input_file + \
+ ' > ' + output_filename + '\n'
+ batch_string += srun_string
+ batch_string += 'rm -rf plt*\n'
+ batch_string += 'rm -rf chk*\n'
+ batch_string += 'rm -rf lab_frame_data\n'
+ batch_file = 'slurm'
+ f_exe = open(batch_file,'w')
+ f_exe.write(batch_string)
+ f_exe.close()
+ os.system('chmod 700 ' + bin_name)
+ os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt')
+ return 0
+
+def run_batch(run_name, res_dir, bin_name, config_command, architecture='knl',\
+ Cname='knl', n_node=1, n_mpi=1, n_omp=1):
+ # Clean res_dir
+ if os.path.exists(res_dir):
+ shutil.rmtree(res_dir)
+ os.makedirs(res_dir)
+ # Copy files to res_dir
+ # Copy files to res_dir
+ cwd = os.environ['WARPX'] + '/Tools/performance_tests/'
+ bin_dir = cwd + 'Bin/'
+ shutil.copy(bin_dir + bin_name, res_dir)
+ shutil.copyfile(cwd + run_name, res_dir + 'inputs')
+ os.chdir(res_dir)
+ batch_string = ''
+ batch_string += '#!/bin/bash\n'
+ batch_string += '#SBATCH --job-name=' + run_name + str(n_node) + str(n_mpi) + str(n_omp) + '\n'
+ batch_string += '#SBATCH --time=00:20:00\n'
+ batch_string += '#SBATCH -C ' + Cname + '\n'
+ batch_string += '#SBATCH -N ' + str(n_node) + '\n'
+ batch_string += '#SBATCH -q regular\n'
+ batch_string += '#SBATCH -e error.txt\n'
+ batch_string += '#SBATCH --account=m2852\n'
+ batch_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n'
+ if architecture == 'cpu':
+ cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives
+ batch_string += 'srun --cpu_bind=cores '+ \
+ ' -n ' + str(n_node*n_mpi) + \
+ ' -c ' + str(cflag_value) + \
+ ' ./' + bin_name + ' inputs > perf_output.txt'
+ elif architecture == 'knl':
+ # number of logical cores per MPI process
+ cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives
+ batch_string += 'srun --cpu_bind=cores ' + \
+ ' -n ' + str(n_node*n_mpi) + \
+ ' -c ' + str(cflag_value) + \
+ ' ./' + bin_name + ' inputs > perf_output.txt\n'
+ batch_file = 'slurm'
+ f_exe = open(batch_file,'w')
+ f_exe.write(batch_string)
+ f_exe.close()
+ os.system('chmod 700 ' + bin_name)
+ os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt')
+ return 0
+
+# Read output file and return init time and 1-step time
+def read_run_perf(filename, n_steps):
+ timing_list = []
+ # Search inclusive time to get simulation step time
+ partition_limit = 'NCalls Incl. Min Incl. Avg Incl. Max Max %'
+ with open(filename) as file_handler:
+ output_text = file_handler.read()
+ # Get total simulation time
+ line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text)
+ total_time = float(line_match_totaltime.group(0).split()[8])
+ search_area = output_text.partition(partition_limit)[2]
+ line_match_looptime = re.search('\nWarpX::Evolve().*', search_area)
+ time_wo_initialization = float(line_match_looptime.group(0).split()[3])
+ timing_list += [str(total_time - time_wo_initialization)]
+ timing_list += [str(time_wo_initialization/n_steps)]
+ partition_limit1 = 'NCalls Excl. Min Excl. Avg Excl. Max Max %'
+ partition_limit2 = 'NCalls Incl. Min Incl. Avg Incl. Max Max %'
+ file_handler.close()
+ with open(filename) as file_handler:
+ output_text = file_handler.read()
+ # Search EXCLISUSIVE routine timings
+ search_area = output_text.partition(partition_limit1)[2].partition(partition_limit2)[0]
+ pattern_list = ['\nParticleContainer::Redistribute().*',\
+ '\nFabArray::FillBoundary().*',\
+ '\nFabArray::ParallelCopy().*',\
+ '\nPICSAR::CurrentDeposition.*',\
+ '\nPICSAR::FieldGather.*',\
+ '\nPICSAR::ParticlePush.*',\
+ '\nPPC::Evolve::Copy.*',\
+ '\nWarpX::EvolveEM().*',\
+ 'Checkpoint().*',\
+ 'WriteParticles().*',\
+ '\nVisMF::Write(FabArray).*',\
+ '\nWriteMultiLevelPlotfile().*',\
+ '\nParticleContainer::RedistributeMPI().*']
+ for pattern in pattern_list:
+ timing = '0'
+ line_match = re.search(pattern, search_area)
+ if line_match is not None:
+ timing = [str(float(line_match.group(0).split()[3])/n_steps)]
+ timing_list += timing
+ return timing_list
+
+# Write time into logfile
+def write_perf_logfile(log_file, log_line):
+ f_log = open(log_file, 'a')
+ f_log.write(log_line)
+ f_log.close()
+ return 0
+
+def get_nsteps(run_name):
+ with open(run_name) as file_handler:
+ run_name_text = file_handler.read()
+ line_match_nsteps = re.search('\nmax_step.*', run_name_text)
+ nsteps = float(line_match_nsteps.group(0).split()[2])
+ return nsteps
+
+
+# Run a performance test in an interactive allocation
+# def run_interactive(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1):
+# # Clean res_dir #
+# if os.path.exists(res_dir):
+# shutil.rmtree(res_dir)
+# os.makedirs(res_dir)
+# # Copy files to res_dir #
+# shutil.copyfile(bin_dir + bin_name, res_dir + bin_name)
+# shutil.copyfile(cwd + run_name, res_dir + 'inputs')
+# os.chdir(res_dir)
+# if args.architecture == 'cpu':
+# cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives #
+# exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
+# 'srun --cpu_bind=cores ' + \
+# ' -n ' + str(n_node*n_mpi) + \
+# ' -c ' + str(cflag_value) + \
+# ' ./' + bin_name + ' inputs > perf_output.txt'
+# elif args.architecture == 'knl':
+# # number of logical cores per MPI process #
+# cflag_value = max(1,int(68/n_mpi) * 4) # Follow NERSC directives #
+# exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
+# 'srun --cpu_bind=cores ' + \
+# ' -n ' + str(n_node*n_mpi) + \
+# ' -c ' + str(cflag_value) + \
+# ' ./' + bin_name + ' inputs > perf_output.txt'
+# os.system('chmod 700 ' + bin_name)
+# os.system(config_command + exec_command)
+# return 0
diff --git a/Tools/performance_tests/performance_log.txt b/Tools/performance_tests/performance_log.txt
index cb38025d9..543d257a0 100644
--- a/Tools/performance_tests/performance_log.txt
+++ b/Tools/performance_tests/performance_log.txt
@@ -1,37 +1,33 @@
-## year month day run_name compiler architecture n_node n_mpi n_omp time_initialization(s) time_one_iteration(s)
-2017 10 13 uniform_plasma intel knl 1 1 1 1.88 0.8257
-2017 10 13 uniform_plasma intel knl 1 1 1 1.87 0.8229
-2017 10 13 uniform_plasma intel knl 1 1 1 1.87 0.8244
-2017 10 13 uniform_plasma intel knl 1 1 2 1.87 0.4372
-2017 10 13 uniform_plasma intel knl 1 1 2 1.89 0.4374
-2017 10 13 uniform_plasma intel knl 1 1 2 1.9 0.4366
-2017 10 13 uniform_plasma intel knl 2 2 1 0.54 0.2152
-2017 10 13 uniform_plasma intel knl 2 2 1 0.6 0.2163
-2017 10 13 uniform_plasma intel knl 2 2 1 0.6 0.217
-2017 10 13 uniform_plasma gnu knl 1 1 1 1.17 0.8062
-2017 10 13 uniform_plasma gnu knl 1 1 1 1.39 0.8062
-2017 10 13 uniform_plasma gnu knl 1 1 1 1.4 0.8067
-2017 10 13 uniform_plasma gnu knl 1 1 2 1.07 0.4271
-2017 10 13 uniform_plasma gnu knl 1 1 2 1.02 0.4249
-2017 10 13 uniform_plasma gnu knl 1 1 2 1.06 0.425
-2017 10 13 uniform_plasma gnu knl 2 2 1 0.75 0.2119
-2017 10 13 uniform_plasma gnu knl 2 2 1 0.86 0.2111
-2017 10 13 uniform_plasma gnu knl 2 2 1 0.71 0.2121
-2017 10 13 uniform_plasma gnu cpu 1 1 1 0.66 0.1667
-2017 10 13 uniform_plasma gnu cpu 1 1 1 0.71 0.1653
-2017 10 13 uniform_plasma gnu cpu 1 1 1 0.8 0.1667
-2017 10 13 uniform_plasma gnu cpu 1 1 2 0.579 0.09701
-2017 10 13 uniform_plasma gnu cpu 1 1 2 0.629 0.09651
-2017 10 13 uniform_plasma gnu cpu 1 1 2 0.892 0.09718
-2017 10 13 uniform_plasma gnu cpu 2 2 1 0.466 0.04317
-2017 10 13 uniform_plasma gnu cpu 2 2 1 0.535 0.04414
-2017 10 13 uniform_plasma gnu cpu 2 2 1 0.542 0.04404
-2017 10 15 uniform_plasma gnu knl 1 8 16 0.94 0.1971
-2017 10 15 uniform_plasma gnu knl 1 8 16 0.76 0.1795
-2017 10 15 uniform_plasma gnu knl 1 8 16 1.07 0.1799
-2017 10 15 uniform_plasma gnu knl 1 4 32 1.17 0.2019
-2017 10 15 uniform_plasma gnu knl 1 4 32 1.09 0.2055
-2017 10 15 uniform_plasma gnu knl 1 4 32 1.4 0.1926
-2017 10 15 uniform_plasma gnu knl 2 4 32 0.97 0.1313
-2017 10 15 uniform_plasma gnu knl 2 4 32 1.05 0.1402
-2017 10 15 uniform_plasma gnu knl 2 4 32 1.07 0.1429
+## year month day run_name compiler architecture n_node n_mpi n_omp time_initialization time_one_iteration Redistribute FillBoundary ParallelCopy CurrentDeposition FieldGather ParthiclePush Copy EvolveEM Checkpoint WriteParticles Write_FabArray WriteMultiLevelPlotfile(unit: second) RedistributeMPI
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.14 0.3986 0.1713 0.01719 0.01615 0.06987 0.03636 0.01901 0.01999 0.003602 0 0 0 0 0.007262
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.39 0.4009 0.1712 0.01676 0.01583 0.07061 0.03684 0.01926 0.02011 0.003687 0 0 0 0 0.007841
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 2.91 0.4024 0.1716 0.01826 0.01918 0.0703 0.0363 0.01912 0.01989 0.003017 0 0 0 0 0.007256
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.21 0.3997 0.1717 0.01706 0.0162 0.07026 0.03655 0.01928 0.01999 0.003687 0 0 0 0 0.006799
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 0.89 0.4779 0.04441 0.1143 0.09117 0.1072 0.01254 0.003702 0.004217 0.01247 0 0 0 0 0.003441
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.58 0.4626 0.04424 0.1048 0.0851 0.1073 0.01259 0.003767 0.004282 0.01311 0 0 0 0 0.002798
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.63 0.4616 0.04441 0.1033 0.08398 0.1079 0.01312 0.003802 0.004224 0.01278 0 0 0 0 0.003188
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.72 0.461 0.04419 0.1038 0.08424 0.1074 0.01257 0.003799 0.0043 0.01318 0 0 0 0 0.002816
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.32 0.3986 0.1712 0.01804 0.01697 0.06999 0.03615 0.01842 0.01896 0.003445 0 0 0 0 0.00738
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.17 0.3974 0.1711 0.01722 0.01587 0.07016 0.03642 0.01844 0.01902 0.003431 0 0 0 0 0.007332
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 2.88 0.3946 0.1709 0.01686 0.01562 0.06972 0.03595 0.01848 0.01916 0.003269 0 0 0 0 0.006887
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 2.95 0.4094 0.1708 0.01761 0.01632 0.07001 0.03651 0.01863 0.01906 0.003314 0 0 0 0 0.01898
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.3 0.4787 0.04447 0.1139 0.09124 0.108 0.01287 0.003811 0.004205 0.01249 0 0 0 0 0.003045
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 3.16 0.4578 0.04412 0.1015 0.08339 0.1078 0.01301 0.003919 0.004182 0.0125 0 0 0 0 0.002701
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 2.78 0.4679 0.04418 0.1035 0.08456 0.1079 0.01303 0.003902 0.004214 0.0127 0 0 0 0 0.009118
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.12 0.4613 0.04425 0.1043 0.08517 0.1073 0.01242 0.003797 0.004221 0.01239 0 0 0 0 0.003665
+2018 01 31 automated_test_3_uniform_drift_4ppc intel knl 1 16 8 0.48 0.1237 0.03056 0.01622 0.01468 0.02039 0.005016 0.003737 0.002632 0.00326 0 0 0 0 0.006871
+2018 01 31 automated_test_3_uniform_drift_4ppc intel knl 1 16 8 0.79 0.1287 0.0308 0.01706 0.01715 0.02042 0.005452 0.003636 0.002797 0.003143 0 0 0 0 0.007324
+2018 01 31 automated_test_3_uniform_drift_4ppc intel knl 1 16 8 0.9 0.1296 0.03084 0.01711 0.01731 0.02053 0.005379 0.003641 0.002843 0.003137 0 0 0 0 0.008151
+2018 01 31 automated_test_3_uniform_drift_4ppc intel knl 1 16 8 0.9 0.1323 0.03081 0.01703 0.01736 0.02065 0.005339 0.003638 0.002751 0.004008 0 0 0 0 0.01015
+2018 01 31 automated_test_4_labdiags_2ppc intel knl 1 16 8 0.85 0.2896 0.03832 0.06449 0.07493 0.003507 0.002987 0.0001515 0.0001762 0.007921 0.0371 0.001537 0 0.0004387 0.03832
+2018 01 31 automated_test_4_labdiags_2ppc intel knl 1 16 8 1.12 0.2895 0.03845 0.06423 0.07481 0.003489 0.002994 0.000152 0.0001779 0.00834 0.0357 0.001545 0 0.0005249 0.03845
+2018 01 31 automated_test_4_labdiags_2ppc intel knl 1 16 8 0.76 0.3243 0.03804 0.0646 0.07462 0.003483 0.002991 0.0001508 0.0001769 0.008051 0.05983 0.001565 0 0.005392 0.03804
+2018 01 31 automated_test_4_labdiags_2ppc intel knl 1 16 8 0.74 0.3143 0.03941 0.06478 0.07547 0.003486 0.003007 0.0001518 0.0001808 0.007845 0.05079 0.001543 0 0.0007033 0.03941
+2018 01 31 automated_test_5_loadimbalance intel knl 1 16 8 9.2 0.3845 0.08558 0.1042 0.1332 0 0 0 0 0.01226 0 0 0 0 0.08558
+2018 01 31 automated_test_5_loadimbalance intel knl 1 16 8 9.19 0.3864 0.085 0.1051 0.134 0 0 0 0 0.01202 0 0 0 0 0.085
+2018 01 31 automated_test_5_loadimbalance intel knl 1 16 8 8.98 0.3912 0.08665 0.1061 0.1356 0 0 0 0 0.01193 0 0 0 0 0.08665
+2018 01 31 automated_test_5_loadimbalance intel knl 1 16 8 9.03 0.3826 0.08484 0.1031 0.1329 0 0 0 0 0.01205 0 0 0 0 0.08484
+2018 01 31 automated_test_6_output_2ppc intel knl 1 16 8 3.6 1.086 0.0898 0.1311 0.09441 0.1345 0.027 0.008783 0.009792 0.02151 0.08454 0.04962 0 0.0008218 0.005303
+2018 01 31 automated_test_6_output_2ppc intel knl 1 16 8 4.7 1.136 0.09059 0.1437 0.09535 0.1358 0.02915 0.009238 0.01002 0.02315 0.09088 0.05006 0 0.01081 0.005381
+2018 01 31 automated_test_6_output_2ppc intel knl 1 16 8 4.0 1.132 0.09145 0.1377 0.09592 0.1365 0.02817 0.009353 0.0103 0.02447 0.066 0.05309 0 0.02047 0.009196
+2018 01 31 automated_test_6_output_2ppc intel knl 1 16 8 3.8 1.135 0.09088 0.1308 0.09623 0.135 0.02762 0.008839 0.009758 0.02561 0.1144 0.04874 0 0.0008693 0.008112
diff --git a/Tools/performance_tests/run_alltests.py b/Tools/performance_tests/run_alltests.py
index 7c02481fb..440da363d 100644
--- a/Tools/performance_tests/run_alltests.py
+++ b/Tools/performance_tests/run_alltests.py
@@ -1,5 +1,6 @@
import os, sys, shutil
import argparse, re, time
+from functions_perftest import *
# This script runs automated performance tests for WarpX.
# It runs tests in list test_list defined below, and write
@@ -9,7 +10,7 @@ import argparse, re, time
# Before running performance tests, make sure you have the latest version
# of performance_log.txt
# A typical execution reads:
-# > python run_alltests.py --no-recompile --compiler=gnu --architecture=cpu --mode=run --no-commit --log_file='my_performance_log.txt'
+# > python run_alltests.py --no-recompile --compiler=gnu --architecture=cpu --mode=run --log_file='my_performance_log.txt'
# These are default values, and will give the same result as
# > python run_alltests.py
# To add a new test item, extent the test_list with a line like
@@ -33,9 +34,53 @@ import argparse, re, time
# write data into the performance log file
# push file performance_log.txt on the repo
+# Define the list of tests to run
+# -------------------------------
+# each element of test_list contains
+# [str runname, int n_node, int n_mpi PER NODE, int n_omp]
+test_list = []
+n_repeat = 3
+basename1 = 'uniform_t0.01_'
+
+test_list.extend([[basename1 + '128', 1, 16, 8]]*n_repeat)
+test_list.extend([[basename1 + '128', 1, 32, 16]]*n_repeat)
+
+# test_list.extend([[basename1 + '128', 1, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 + '256', 8, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 + '512', 64, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 + '1024', 512, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 + '2048', 4096, 16, 8]]*n_repeat)
+
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_128', 1, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_128', 1, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_128', 1, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_128', 1, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_256', 8, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_256', 8, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_256', 8, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_256', 8, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_512', 64, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_512', 64, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_512', 64, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_512', 64, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_1024', 512, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_1024', 512, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_1024', 512, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_1024', 512, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_2048', 4096, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_2048', 4096, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_2048', 4096, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_2048', 4096, 16, 8]]*n_repeat)
+
+n_tests = len(test_list)
+
# Read command-line arguments
# ---------------------------
-
# Create parser and read arguments
parser = argparse.ArgumentParser(
description='Run performance tests and write results in files')
@@ -115,136 +160,6 @@ if args.recompile == True:
# Define functions to run a test and analyse results
# --------------------------------------------------
-
-# Run a performance test in an interactive allocation
-def run_interactive(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1):
- # Clean res_dir
- if os.path.exists(res_dir):
- shutil.rmtree(res_dir)
- os.makedirs(res_dir)
- # Copy files to res_dir
- shutil.copyfile(bin_dir + bin_name, res_dir + bin_name)
- shutil.copyfile(cwd + run_name, res_dir + 'inputs')
- os.chdir(res_dir)
- if args.architecture == 'cpu':
- cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives
- exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
- 'srun --cpu_bind=cores ' + \
- ' -n ' + str(n_node*n_mpi) + \
- ' -c ' + str(cflag_value) + \
- ' ./' + bin_name + ' inputs > perf_output.txt'
- elif args.architecture == 'knl':
- # number of logical cores per MPI process
- cflag_value = max(1,int(68/n_mpi) * 4) # Follow NERSC directives
- exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
- 'srun --cpu_bind=cores ' + \
- ' -n ' + str(n_node*n_mpi) + \
- ' -c ' + str(cflag_value) + \
- ' ./' + bin_name + ' inputs > perf_output.txt'
- os.system('chmod 700 ' + bin_name)
- os.system(config_command + exec_command)
- return 0
-
-def run_batch(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1):
- # Clean res_dir
- if os.path.exists(res_dir):
- shutil.rmtree(res_dir)
- os.makedirs(res_dir)
- # Copy files to res_dir
- shutil.copyfile(bin_dir + bin_name, res_dir + bin_name)
- shutil.copyfile(cwd + run_name, res_dir + 'inputs')
- os.chdir(res_dir)
- batch_string = ''
- batch_string += '#!/bin/bash\n'
- batch_string += '#SBATCH --job-name=' + run_name + str(n_node) + str(n_mpi) + str(n_omp) + '\n'
- batch_string += '#SBATCH --time=00:30:00\n'
- batch_string += '#SBATCH -C ' + module_Cname[args.architecture] + '\n'
- batch_string += '#SBATCH -N ' + str(n_node) + '\n'
- batch_string += '#SBATCH --partition=regular\n'
- batch_string += '#SBATCH --qos=normal\n'
- batch_string += '#SBATCH -e error.txt\n'
- batch_string += '#SBATCH --account=m2852\n'
- batch_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n'
- if args.architecture == 'cpu':
- cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives
- batch_string += 'srun --cpu_bind=cores '+ \
- ' -n ' + str(n_node*n_mpi) + \
- ' -c ' + str(cflag_value) + \
- ' ./' + bin_name + ' inputs > perf_output.txt'
- elif args.architecture == 'knl':
- # number of logical cores per MPI process
- cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives
- batch_string += 'srun --cpu_bind=cores ' + \
- ' -n ' + str(n_node*n_mpi) + \
- ' -c ' + str(cflag_value) + \
- ' ./' + bin_name + ' inputs > perf_output.txt\n'
- batch_file = 'slurm'
- f_exe = open(batch_file,'w')
- f_exe.write(batch_string)
- f_exe.close()
- os.system('chmod 700 ' + bin_name)
- os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt')
- return 0
-
-# Read output file and return init time and 1-step time
-def read_run_perf(filename):
- timing_list = []
- # Search inclusive time to get simulation step time
- partition_limit = 'NCalls Incl. Min Incl. Avg Incl. Max Max %'
- with open(filename) as file_handler:
- output_text = file_handler.read()
- # Get total simulation time
- line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text)
- total_time = float(line_match_totaltime.group(0).split()[8])
- search_area = output_text.partition(partition_limit)[2]
- line_match_looptime = re.search('\nWarpX::Evolve().*', search_area)
- time_wo_initialization = float(line_match_looptime.group(0).split()[3])
- timing_list += [str(total_time - time_wo_initialization)]
- timing_list += [str(time_wo_initialization/n_steps)]
- # Search exclusive time to get routines timing
- partition_limit1 = 'NCalls Excl. Min Excl. Avg Excl. Max Max %'
- partition_limit2 = 'NCalls Incl. Min Incl. Avg Incl. Max Max %'
- file_handler.close()
- with open(filename) as file_handler:
- output_text = file_handler.read()
- search_area = output_text.partition(partition_limit1)[2].partition(partition_limit2)[0]
- pattern_list = ['\nParticleContainer::Redistribute().*',\
- '\nFabArray::FillBoundary().*',\
- '\nFabArray::ParallelCopy().*',\
- '\nPICSAR::CurrentDeposition.*',\
- '\nPICSAR::FieldGather.*',\
- '\nPICSAR::ParticlePush.*',\
- '\nPPC::Evolve::Copy.*',\
- '\nWarpX::EvolveEM().*',\
- 'NArrayInt>::Checkpoint().*',\
- 'NArrayInt>::WriteParticles().*',\
- '\nVisMF::Write_FabArray.*',\
- '\nWriteMultiLevelPlotfile().*']
- for pattern in pattern_list:
- timing = '0'
- line_match = re.search(pattern, search_area)
- if line_match is not None:
- timing = [str(float(line_match.group(0).split()[3])/n_steps)]
- timing_list += timing
- return timing_list
-
-# Write time into logfile
-def write_perf_logfile(log_file):
- log_line = ' '.join([year, month, day, run_name, args.compiler,\
- args.architecture, str(n_node), str(n_mpi),\
- str(n_omp)] + timing_list + ['\n'])
- f_log = open(log_file, 'a')
- f_log.write(log_line)
- f_log.close()
- return 0
-
-def get_nsteps(runname):
- with open(runname) as file_handler:
- runname_text = file_handler.read()
- line_match_nsteps = re.search('\nmax_step.*', runname_text)
- nsteps = float(line_match_nsteps.group(0).split()[2])
- return nsteps
-
def process_analysis():
dependencies = ''
f_log = open(cwd + 'log_jobids_tmp.txt','r')
@@ -254,18 +169,19 @@ def process_analysis():
dependencies += line.split()[3] + ':'
batch_string = ''
batch_string += '#!/bin/bash\n'
- batch_string += '#SBATCH --job-name=perftests_read\n'
+ batch_string += '#SBATCH --job-name=warpx_read\n'
batch_string += '#SBATCH --time=00:05:00\n'
batch_string += '#SBATCH -C ' + module_Cname[args.architecture] + '\n'
batch_string += '#SBATCH -N 1\n'
batch_string += '#SBATCH -S 4\n'
- batch_string += '#SBATCH --partition=regular\n'
- batch_string += '#SBATCH --qos=normal\n'
+ batch_string += '#SBATCH -q regular\n'
batch_string += '#SBATCH -e read_error.txt\n'
batch_string += '#SBATCH -o read_output.txt\n'
batch_string += '#SBATCH --mail-type=end\n'
batch_string += '#SBATCH --account=m2852\n'
- batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + args.compiler + ' --architecture=' + args.architecture + ' --mode=read' + ' --log_file=' + log_file
+ batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + \
+ args.compiler + ' --architecture=' + args.architecture + \
+ ' --mode=read' + ' --log_file=' + log_file
if args.commit == True:
batch_string += ' --commit'
batch_string += '\n'
@@ -279,20 +195,6 @@ def process_analysis():
# Loop over the tests and return run time + details
# -------------------------------------------------
-
-# each element of test_list contains
-# [str runname, int n_node, int n_mpi PER NODE, int n_omp]
-
-test_list = []
-n_repeat = 1
-filename1 = 'uniform_plasma'
-
-test_list.extend([[filename1, 1, 8, 16]]*3)
-test_list.extend([[filename1, 1, 4, 32]]*3)
-test_list.extend([[filename1, 2, 4, 32]]*3)
-
-n_tests = len(test_list)
-
if args.mode == 'run':
# Remove file log_jobids_tmp.txt if exists.
# This file contains the jobid of every perf test
@@ -308,13 +210,14 @@ if args.mode == 'run':
n_omp = current_run[3]
n_steps = get_nsteps(cwd + run_name)
res_dir = res_dir_base
- res_dir += '_'.join([year, month, day, run_name, args.compiler,\
+ res_dir += '_'.join([run_name, args.compiler,\
args.architecture, str(n_node), str(n_mpi),\
- str(n_omp)]) + '/'
+ str(n_omp), str(count)]) + '/'
# Run the simulation.
# If you are currently in an interactive session and want to run interactive,
# just replace run_batch with run_interactive
- run_batch(run_name, res_dir, n_node=n_node, n_mpi=n_mpi, n_omp=n_omp)
+ run_batch(run_name, res_dir, bin_name, config_command, architecture=args.architecture, \
+ Cname=module_Cname[args.architecture], n_node=n_node, n_mpi=n_mpi, n_omp=n_omp)
os.chdir(cwd)
process_analysis()
@@ -326,7 +229,8 @@ if args.mode == 'read':
'FillBoundary ParallelCopy CurrentDeposition FieldGather '+\
'ParthiclePush Copy EvolveEM Checkpoint '+\
'WriteParticles Write_FabArray '+\
- 'WriteMultiLevelPlotfile(unit: second)\n'
+ 'WriteMultiLevelPlotfile '+\
+ 'RedistributeMPI(unit: second)\n'
f_log = open(log_dir + log_file, 'a')
f_log.write(log_line)
f_log.close()
@@ -340,13 +244,20 @@ if args.mode == 'read':
n_steps = get_nsteps(cwd + run_name)
print('n_steps = ' + str(n_steps))
res_dir = res_dir_base
- res_dir += '_'.join([year, month, day, run_name, args.compiler,\
+ res_dir += '_'.join([run_name, args.compiler,\
args.architecture, str(n_node), str(n_mpi),\
- str(n_omp)]) + '/'
- # Read performance data from the output file
- timing_list = read_run_perf(res_dir + 'perf_output.txt')
+ str(n_omp), str(count)]) + '/'
+# res_dir += '_'.join([year, month, '25', run_name, args.compiler,\
+# args.architecture, str(n_node), str(n_mpi), \
+# str(n_omp)]) + '/'
+ # Read performance data from the output file
+ output_filename = 'perf_output.txt'
+ timing_list = read_run_perf(res_dir + output_filename, n_steps)
# Write performance data to the performance log file
- write_perf_logfile(log_dir + log_file)
+ log_line = ' '.join([year, month, day, run_name, args.compiler,\
+ args.architecture, str(n_node), str(n_mpi),\
+ str(n_omp)] + timing_list + ['\n'])
+ write_perf_logfile(log_dir + log_file, log_line)
# Store test parameters fot record
dir_record_base = './perf_warpx_record/'
@@ -363,6 +274,21 @@ if args.mode == 'read':
for count, current_run in enumerate(test_list):
shutil.copy(current_run[0], dir_record)
+ for count, current_run in enumerate(test_list):
+ run_name = current_run[0]
+ n_node = current_run[1]
+ n_mpi = current_run[2]
+ n_omp = current_run[3]
+ res_dir = res_dir_base
+ res_dir += '_'.join([run_name, args.compiler,\
+ args.architecture, str(n_node), str(n_mpi),\
+ str(n_omp), str(count)]) + '/'
+ res_dir_arch = res_dir_base
+ res_dir_arch += '_'.join([year, month, day, run_name, args.compiler,\
+ args.architecture, str(n_node), str(n_mpi), \
+ str(n_omp), str(count)]) + '/'
+ os.rename(res_dir, res_dir_arch)
+
# Commit results to the Repo
if args.commit == True:
os.system('git add ' + log_dir + log_file + ';'\
diff --git a/Tools/performance_tests/run_alltests_1node.py b/Tools/performance_tests/run_alltests_1node.py
new file mode 100644
index 000000000..4c6849c3b
--- /dev/null
+++ b/Tools/performance_tests/run_alltests_1node.py
@@ -0,0 +1,333 @@
+import os, sys, shutil
+import argparse, re, time
+from functions_perftest import *
+
+# This script runs automated performance tests for WarpX.
+# It runs tests in list test_list defined below, and write
+# results in file performance_log.txt in warpx/performance_tests/
+
+# ---- User's manual ----
+# Before running performance tests, make sure you have the latest version
+# of performance_log.txt
+
+# ---- Running a custom set of performance tests ----
+# > python run_alltests_1node.py --no-recompile --compiler=intel
+# > --architecture=knl --mode=run --input_file=uniform_plasma
+# > --n_node=1 --log_file='my_performance_log.txt'
+
+# ---- Running the pre-drefined automated tests ----
+# Compile and run:
+# > python run_alltests_1node.py --automated --recompile
+# Just run:
+# > python run_alltests_1node.py --automated
+
+# To add a new test item, extent the test_list with a line like
+# test_list.extend([['my_input_file', n_node, n_mpi, n_omp]]*n_repeat)
+# - my_input_file must be in warpx/performance_tests
+
+# ---- Developer's manual ----
+# This script can run in two modes:
+# - 'run' mode: for each test item, a batch job is executed.
+# create folder '$SCRATCH/performance_warpx/'
+# recompile the code if option --recompile is used
+# loop over test_list and submit one batch script per item
+# Submit a batch job that executes the script in read mode
+# This last job runs once all others are completed
+# - 'read' mode: Get performance data from all test items
+# create performance log file if does not exist
+# loop over test_file
+# read initialization time and step time
+# write data into the performance log file
+# push file performance_log.txt on the repo
+
+# Read command-line arguments
+# ---------------------------
+# Create parser and read arguments
+parser = argparse.ArgumentParser(
+ description='Run performance tests and write results in files')
+parser.add_argument('--recompile', dest='recompile', action='store_true', default=False)
+parser.add_argument('--no-recompile', dest='recompile', action='store_false', default=False)
+parser.add_argument('--commit', dest='commit', action='store_true', default=False)
+parser.add_argument( '--compiler', choices=['gnu', 'intel'], default='intel',
+ help='which compiler to use')
+parser.add_argument( '--architecture', choices=['cpu', 'knl'], default='knl',
+ help='which architecture to cross-compile for NERSC machines')
+parser.add_argument( '--mode', choices=['run', 'read'], default='run',
+ help='whether to run perftests or read their perf output. run calls read')
+parser.add_argument( '--log_file', dest = 'log_file', default='my_performance_log.txt',
+ help='name of log file where data will be written. ignored if option --commit is used')
+parser.add_argument('--n_node', dest='n_node', default=1, help='nomber of nodes for the runs')
+parser.add_argument('--input_file', dest='input_file', default='input_file.pixr',
+ type=str, help='input file to run')
+parser.add_argument('--automated', dest='automated', action='store_true', default=False,
+ help='Use to run the automated test list')
+
+args = parser.parse_args()
+log_file = args.log_file
+do_commit = args.commit
+run_name = args.input_file
+
+# list of tests to run and analyse.
+# Note: This is overwritten if is_automated
+# each element of test_list contains
+# [str input_file, int n_node, int n_mpi PER NODE, int n_omp]
+test_list = []
+n_repeat = 2
+filename1 = args.input_file
+test_list.extend([[filename1, 1, 128, 1]]*n_repeat)
+test_list.extend([[filename1, 1, 64, 2]]*n_repeat)
+# test_list.extend([[filename1, 1, 32, 4]]*n_repeat)
+# test_list.extend([[filename1, 1, 16, 8]]*n_repeat)
+# test_list.extend([[filename1, 1, 8, 16]]*n_repeat)
+# test_list.extend([[filename1, 1, 4, 32]]*n_repeat)
+# test_list.extend([[filename1, 1, 2, 64]]*n_repeat)
+# test_list.extend([[filename1, 1, 1, 128]]*n_repeat)
+
+# Nothing should be changed after this line
+# if flag --automated is used, test_list and do_commit are
+# overwritten
+
+if args.automated == True:
+ test_list = []
+ n_repeat = 4
+ test_list.extend([['automated_test_1_uniform_rest_32ppc', 1, 16, 8]]*n_repeat)
+ test_list.extend([['automated_test_2_uniform_rest_1ppc', 1, 16, 8]]*n_repeat)
+ test_list.extend([['automated_test_3_uniform_drift_4ppc', 1, 16, 8]]*n_repeat)
+ test_list.extend([['automated_test_4_labdiags_2ppc', 1, 16, 8]]*n_repeat)
+ test_list.extend([['automated_test_5_loadimbalance', 1, 16, 8]]*n_repeat)
+ test_list.extend([['automated_test_6_output_2ppc', 1, 16, 8]]*n_repeat)
+ do_commit = False
+ run_name = 'automated_tests'
+
+n_tests = len(test_list)
+if do_commit == True:
+ log_file = 'performance_log.txt'
+
+# Dictionaries
+# compiler names. Used for WarpX executable name
+compiler_name = {'intel': 'intel', 'gnu': 'gcc'}
+# architecture. Used for WarpX executable name
+module_name = {'cpu': 'haswell', 'knl': 'mic-knl'}
+# architecture. Used in batch scripts
+module_Cname = {'cpu': 'haswell', 'knl': 'knl,quad,cache'}
+# Define environment variables
+cwd = os.getcwd() + '/'
+res_dir_base = os.environ['SCRATCH'] + '/performance_warpx/'
+bin_dir = cwd + 'Bin/'
+bin_name = 'perf_tests3d.' + args.compiler + '.' + module_name[args.architecture] + '.TPROF.MPI.OMP.ex'
+log_dir = cwd
+
+day = time.strftime('%d')
+month = time.strftime('%m')
+year = time.strftime('%Y')
+n_node = int(args.n_node)
+
+# Initialize tests
+# ----------------
+if args.mode == 'run':
+# Set default options for compilation and execution
+ config_command = ''
+ config_command += 'module unload darshan;'
+ config_command += 'module load craype-hugepages4M;'
+ if args.architecture == 'knl':
+ if args.compiler == 'intel':
+ config_command += 'module unload PrgEnv-gnu;'
+ config_command += 'module load PrgEnv-intel;'
+ elif args.compiler == 'gnu':
+ config_command += 'module unload PrgEnv-intel;'
+ config_command += 'module load PrgEnv-gnu;'
+ config_command += 'module unload craype-haswell;'
+ config_command += 'module load craype-mic-knl;'
+ elif args.architecture == 'cpu':
+ if args.compiler == 'intel':
+ config_command += 'module unload PrgEnv-gnu;'
+ config_command += 'module load PrgEnv-intel;'
+ elif args.compiler == 'gnu':
+ config_command += 'module unload PrgEnv-intel;'
+ config_command += 'module load PrgEnv-gnu;'
+ config_command += 'module unload craype-mic-knl;'
+ config_command += 'module load craype-haswell;'
+ # Create main result directory if does not exist
+ if not os.path.exists(res_dir_base):
+ os.mkdir(res_dir_base)
+
+# Recompile if requested
+if args.recompile == True:
+ with open(cwd + 'GNUmakefile_perftest') as makefile_handler:
+ makefile_text = makefile_handler.read()
+ makefile_text = re.sub('\nCOMP.*', '\nCOMP=%s' %compiler_name[args.compiler], makefile_text)
+ with open(cwd + 'GNUmakefile_perftest', 'w') as makefile_handler:
+ makefile_handler.write( makefile_text )
+ os.system(config_command + " make -f GNUmakefile_perftest realclean ; " + " rm -r tmp_build_dir *.mod; make -j 8 -f GNUmakefile_perftest")
+
+# This function runs a batch script with dependencies to perform the analysis
+# when performance runs are done.
+def process_analysis():
+ dependencies = ''
+ f_log = open(cwd + 'log_jobids_tmp.txt','r')
+ line = f_log.readline()
+ print(line)
+ dependencies += line.split()[3] + ':'
+ batch_string = ''
+ batch_string += '#!/bin/bash\n'
+ batch_string += '#SBATCH --job-name=warpx_1node_read\n'
+ batch_string += '#SBATCH --time=00:05:00\n'
+ batch_string += '#SBATCH -C haswell\n'
+ batch_string += '#SBATCH -N 1\n'
+ batch_string += '#SBATCH -S 4\n'
+ batch_string += '#SBATCH -q regular\n'
+ batch_string += '#SBATCH -e read_error.txt\n'
+ batch_string += '#SBATCH -o read_output.txt\n'
+ batch_string += '#SBATCH --mail-type=end\n'
+ batch_string += '#SBATCH --account=m2852\n'
+ batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + \
+ args.compiler + ' --architecture=' + args.architecture + \
+ ' --mode=read' + ' --log_file=' + log_file + \
+ ' --input_file=' + args.input_file
+ if do_commit == True:
+ batch_string += ' --commit'
+ if args.automated == True:
+ batch_string += ' --automated'
+ batch_string += '\n'
+ batch_file = 'slurm_perfread'
+ f_exe = open(batch_file,'w')
+ f_exe.write(batch_string)
+ f_exe.close()
+ os.system('chmod 700 ' + batch_file)
+ os.system('sbatch --dependency afterok:' + dependencies[0:-1] + ' ' + batch_file)
+ return 0
+
+# Loop over the tests and return run time + details
+# -------------------------------------------------
+if args.mode == 'run':
+ # Remove file log_jobids_tmp.txt if exists.
+ # This file contains the jobid of every perf test
+ # It is used to manage the analysis script dependencies
+ if os.path.isfile(cwd + 'log_jobids_tmp.txt'):
+ os.remove(cwd + 'log_jobids_tmp.txt')
+ res_dir = res_dir_base
+ res_dir += '_'.join([run_name, args.compiler,\
+ args.architecture, str(n_node)]) + '/'
+ # Run the simulation.
+ run_batch_nnode(test_list, res_dir, bin_name, config_command,\
+ architecture=args.architecture, Cname=module_Cname[args.architecture], \
+ n_node=n_node)
+ os.chdir(cwd)
+ process_analysis()
+
+if args.mode == 'read':
+ # Create log_file for performance tests if does not exist
+ if not os.path.isfile(log_dir + log_file):
+ log_line = '## year month day input_file compiler architecture n_node n_mpi ' +\
+ 'n_omp time_initialization time_one_iteration Redistribute '+\
+ 'FillBoundary ParallelCopy CurrentDeposition FieldGather '+\
+ 'ParthiclePush Copy EvolveEM Checkpoint '+\
+ 'WriteParticles Write_FabArray '+\
+ 'WriteMultiLevelPlotfile(unit: second) '+\
+ 'RedistributeMPI\n'
+ f_log = open(log_dir + log_file, 'a')
+ f_log.write(log_line)
+ f_log.close()
+ for count, current_run in enumerate(test_list):
+ # Results folder
+ print('read ' + str(current_run))
+ input_file = current_run[0]
+ # Do not read n_node = current_run[1], it is an external parameter
+ n_mpi = current_run[2]
+ n_omp = current_run[3]
+ n_steps = get_nsteps(cwd + input_file)
+ print('n_steps = ' + str(n_steps))
+ res_dir = res_dir_base
+ res_dir += '_'.join([run_name, args.compiler,\
+ args.architecture, str(n_node)]) + '/'
+ # Read performance data from the output file
+ output_filename = 'out_' + '_'.join([input_file, str(n_node), str(n_mpi), str(n_omp), str(count)]) + '.txt'
+ timing_list = read_run_perf(res_dir + output_filename, n_steps)
+ # Write performance data to the performance log file
+ log_line = ' '.join([year, month, day, input_file, args.compiler,\
+ args.architecture, str(n_node), str(n_mpi),\
+ str(n_omp)] + timing_list + ['\n'])
+ write_perf_logfile(log_dir + log_file, log_line)
+
+ # Store test parameters fot record
+ dir_record_base = './perf_warpx_record/'
+ if not os.path.exists(dir_record_base):
+ os.mkdir(dir_record_base)
+ count = 0
+ dir_record = dir_record_base + '_'.join([year, month, day]) + '_0'
+ while os.path.exists(dir_record):
+ count += 1
+ dir_record = dir_record[:-1] + str(count)
+ os.mkdir(dir_record)
+ shutil.copy(__file__, dir_record)
+ shutil.copy(log_dir + log_file, dir_record)
+ for count, current_run in enumerate(test_list):
+ shutil.copy(current_run[0], dir_record)
+
+ # Rename directory with precise date for archive purpose
+ res_dir_arch = res_dir_base
+ res_dir_arch += '_'.join([year, month, day, run_name, args.compiler,\
+ args.architecture, str(n_node)]) + '/'
+ os.rename(res_dir, res_dir_arch)
+
+ # Commit results to the Repo
+ if do_commit == True:
+ os.system('git add ' + log_dir + log_file + ';'\
+ 'git commit -m "performance tests";'\
+ 'git push -u origin master')
+
+ # Plot file
+ import numpy as np
+ import matplotlib
+ matplotlib.use('Agg')
+ import matplotlib.pyplot as plt
+ filename0 = 'performance_log'
+ filename = filename0 + '.txt'
+ fontsize = 14
+ matplotlib.rcParams.update({'font.size': fontsize})
+ nsteps = 100.
+ nrepeat = 4
+ legends = [ 'n_node', 'n_mpi', 'n_omp', 'time_initialization', 'time_one_iteration', \
+ 'Redistribute', 'FillBoundary', 'ParallelCopy', 'CurrentDeposition', \
+ 'FieldGather', 'ParthiclePush', 'Copy', 'EvolveEM', 'Checkpoint', \
+ 'WriteParticles', 'Write_FabArray', 'WriteMultiLevelPlotfile', \
+ 'RedistributeMPI']
+ date = np.loadtxt( filename, usecols = np.arange(0, 3 ))
+ data = np.loadtxt( filename, usecols = np.arange(6, 6+len(legends)) )
+ # Read run name
+ with open(filename) as f:
+ namelist_tmp = zip(*[line.split() for line in f])[3]
+ # Remove first line = comments
+ namelist = list(namelist_tmp[1:])
+ selector_list = ['automated_test_1_uniform_rest_32ppc',\
+ 'automated_test_2_uniform_rest_1ppc',\
+ 'automated_test_3_uniform_drift_4ppc',\
+ 'automated_test_4_labdiags_2ppc',\
+ 'automated_test_5_loadimbalance',\
+ 'automated_test_6_output_2ppc']
+ selector_string = selector_list[0]
+ selector = [idx for idx in range(len(namelist)) if selector_string in namelist[idx]]
+ lin_date = date[:,0]+date[:,1]/12.+date[:,2]/366.
+ unique_lin_date = np.unique(lin_date)
+ my_xticks = unique_lin_date
+# cmap = plt.get_cmap("tab20")
+ cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
+ for selector_string in selector_list:
+ selector = [idx for idx in range(len(namelist)) if selector_string in namelist[idx]]
+ plt.figure(num=0, figsize=(8,4))
+ plt.clf()
+ plt.title('warpx ' + selector_string)
+ for i in np.arange(data.shape[1]):
+ icolors = i-3
+ if i>3 and (data[selector,i] > 5./100*data[selector,4]).any():
+ plt.plot(lin_date[selector], data[selector,i],'+', ms=6, \
+ mew=2, label=legends[i] )
+ # plt.plot(lin_date[selector], data[selector,i],'+', ms=6, \
+ # mew=2, label=legends[i], color=cmap(i) )
+ plt.xlabel('date')
+ plt.ylabel('time/step (s)')
+ plt.grid()
+ plt.legend(loc='best')
+ plt.legend(bbox_to_anchor=(1.1, 1.05))
+ plt.savefig( selector_string + '.pdf', bbox_inches='tight')
+ plt.savefig( selector_string + '.png', bbox_inches='tight')
diff --git a/Tools/read_raw_data.py b/Tools/read_raw_data.py
index e9745f08b..deb1f36a2 100644
--- a/Tools/read_raw_data.py
+++ b/Tools/read_raw_data.py
@@ -9,18 +9,18 @@ _component_names = ['Ex', 'Ey', 'Ez', 'Bx', 'By', 'Bz', 'jx', 'jy', 'jz', 'rho']
def read_data(plt_file):
'''
- This function reads the raw (i.e. not averaged to cell centers) data
+ This function reads the raw (i.e. not averaged to cell centers) data
from a WarpX plt file. The plt file must have been written with the
- plot_raw_fields option turned on, so that it contains a raw_data
+ plot_raw_fields option turned on, so that it contains a raw_data
sub-directory. This is only really useful for single-level data.
Arguments:
plt_file : An AMReX plt_file file. Must contain a raw_data directory.
-
+
Returns:
- A list of dictionaries where the keys are field name strings and the values
+ A list of dictionaries where the keys are field name strings and the values
are numpy arrays. Each entry in the list corresponds to a different level.
Example:
@@ -34,7 +34,7 @@ def read_data(plt_file):
raw_files = glob(plt_file + "/raw_fields/Level_*/")
for raw_file in raw_files:
field_names = _get_field_names(raw_file)
-
+
data = {}
for field in field_names:
data[field] = _read_field(raw_file, field)
@@ -48,19 +48,19 @@ def read_lab_snapshot(snapshot):
'''
This reads the data from one of the lab frame snapshots generated when
- WarpX is run with boosted frame diagnostics turned on. It returns a
+ WarpX is run with boosted frame diagnostics turned on. It returns a
dictionary of numpy arrays, where each key corresponds to one of the
data fields ("Ex", "By,", etc... ). These values are cell-centered.
'''
- hdrs = glob(snapshot + "/Level_0/buffer?????_H")
+ hdrs = glob(snapshot + "/Level_0/buffer??????_H")
hdrs.sort()
boxes, file_names, offsets, header = _read_header(hdrs[0])
dom_lo, dom_hi = _combine_boxes(boxes)
domain_size = dom_hi - dom_lo + 1
-
+
space_dim = len(dom_lo)
if space_dim == 2:
direction = 1
@@ -69,14 +69,14 @@ def read_lab_snapshot(snapshot):
buffer_data = _read_buffer(snapshot, hdrs[0])
buffer_size = buffer_data['Bx'].shape[direction]
-
+
data = {}
for i in range(header.ncomp):
if space_dim == 3:
data[_component_names[i]] = np.zeros((domain_size[0], domain_size[1], buffer_size*len(hdrs)))
elif space_dim == 2:
data[_component_names[i]] = np.zeros((domain_size[0], buffer_size*len(hdrs)))
-
+
for i, hdr in enumerate(hdrs):
buffer_data = _read_buffer(snapshot, hdr)
if data is None:
@@ -87,7 +87,7 @@ def read_lab_snapshot(snapshot):
data[k][:,:,buffer_size*i:buffer_size*(i+1)] = v[:,:,:]
elif space_dim == 2:
data[k][:,buffer_size*i:buffer_size*(i+1)] = v[:,:]
-
+
return data
@@ -103,7 +103,7 @@ def _string_to_numpy_array(s):
def _line_to_numpy_arrays(line):
lo_corner = _string_to_numpy_array(line[0][1:])
hi_corner = _string_to_numpy_array(line[1][:])
- node_type = _string_to_numpy_array(line[2][:-1])
+ node_type = _string_to_numpy_array(line[2][:-1])
return lo_corner, hi_corner, node_type
@@ -152,39 +152,39 @@ def _combine_boxes(boxes):
def _read_field(raw_file, field_name):
- header_file = raw_file + field + "_H"
+ header_file = raw_file + field_name + "_H"
boxes, file_names, offsets, header = _read_header(header_file)
ng = header.nghost
- lo, hi = _combine_boxes(boxes)
- data = np.zeros(hi - lo + 1)
+ dom_lo, dom_hi = _combine_boxes(boxes)
+ data = np.zeros(dom_hi - dom_lo + 1)
for box, fn, offset in zip(boxes, file_names, offsets):
- lo = box[0]
- hi = box[1]
+ lo = box[0] - dom_lo
+ hi = box[1] - dom_lo
shape = hi - lo + 1
with open(raw_file + fn, "rb") as f:
f.seek(offset)
f.readline() # always skip the first line
arr = np.fromfile(f, 'float64', np.product(shape))
arr = arr.reshape(shape, order='F')
- data[[slice(l,h+1) for l, h in zip(lo+ng, hi+ng)]] = arr
+ data[[slice(l,h+1) for l, h in zip(lo, hi)]] = arr
return data
def _read_buffer(snapshot, header_fn):
-
+
boxes, file_names, offsets, header = _read_header(header_fn)
ng = header.nghost
dom_lo, dom_hi = _combine_boxes(boxes)
-
+
all_data = {}
for i in range(header.ncomp):
all_data[_component_names[i]] = np.zeros(dom_hi - dom_lo + 1)
-
+
for box, fn, offset in zip(boxes, file_names, offsets):
lo = box[0] - dom_lo
hi = box[1] - dom_lo
@@ -213,7 +213,7 @@ if __name__ == "__main__":
for level in range(2):
for name, vals in data[level].items():
print(level, name, vals.shape, vals.min(), vals.max())
-
+
# make a projection along the z-axis of the 'jx' field for level 0
level = 0
plt.pcolormesh(data[level]['jx'].sum(axis=2))
@@ -223,4 +223,3 @@ if __name__ == "__main__":
level = 1
plt.pcolormesh(data[level]['Bx_cp'].sum(axis=0))
plt.savefig('Bx_cp')
-