10 files changed, 1038 insertions, 196 deletions
diff --git a/Tools/performance_tests/automated_test_1_uniform_rest_32ppc b/Tools/performance_tests/automated_test_1_uniform_rest_32ppc
new file mode 100644
index 000000000..0f2f5e036
--- /dev/null
+++ b/Tools/performance_tests/automated_test_1_uniform_rest_32ppc
@@ -0,0 +1,58 @@
+# Maximum number of time steps
+max_step = 100
+
+# number of grid points
+amr.n_cell =  128 128 128
+
+amr.plot_int = -1   # How often to write plotfiles.
+
+# Maximum allowable size of each subdomain in the problem domain;
+#    this is used to decompose the domain for parallel calculations.
+amr.max_grid_size = 32
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+
+# Geometry
+geometry.coord_sys   = 0                  # 0: Cartesian
+geometry.is_periodic = 1     1     1      # Is periodic?
+geometry.prob_lo     = -20.e-6   -20.e-6   -20.e-6    # physical domain
+geometry.prob_hi     =  20.e-6    20.e-6    20.e-6
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 2
+algo.charge_deposition = 0
+algo.field_gathering = 0
+algo.particle_pusher = 0
+interpolation.nox = 1
+interpolation.noy = 1
+interpolation.noz = 1
+
+# CFL
+warpx.cfl = 1.0
+
+particles.nspecies = 2
+particles.species_names = electrons ions
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "NUniformPerCell"
+electrons.num_particles_per_cell_each_dim = 2 2 4
+electrons.profile = constant
+electrons.density = 1.e20  # number of electrons per m^3
+electrons.momentum_distribution_type = "gaussian"
+electrons.u_th  = 0.01 # uth the std of the (unitless) momentum
+electrons.uz_m  = 0.  # Mean momentum along z (unitless)
+
+ions.charge = q_e
+ions.mass = m_p
+ions.injection_style = "NUniformPerCell"
+ions.num_particles_per_cell_each_dim = 2 2 4
+ions.profile = constant
+ions.density = 1.e20  # number of electrons per m^3
+ions.momentum_distribution_type = "gaussian"
+ions.u_th  = 0.01 # uth the std of the (unitless) momentum
+ions.uz_m  = 0.  # Mean momentum along z (unitless)
diff --git a/Tools/performance_tests/automated_test_2_uniform_rest_1ppc b/Tools/performance_tests/automated_test_2_uniform_rest_1ppc
new file mode 100644
index 000000000..603d29a6d
--- /dev/null
+++ b/Tools/performance_tests/automated_test_2_uniform_rest_1ppc
@@ -0,0 +1,48 @@
+# Maximum number of time steps
+max_step = 100
+
+# number of grid points
+amr.n_cell =  256 256 256
+
+amr.plot_int = -1   # How often to write plotfiles.
+
+# Maximum allowable size of each subdomain in the problem domain;
+#    this is used to decompose the domain for parallel calculations.
+amr.max_grid_size = 32
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+
+# Geometry
+geometry.coord_sys   = 0                  # 0: Cartesian
+geometry.is_periodic = 1     1     1      # Is periodic?
+geometry.prob_lo     = -20.e-6   -20.e-6   -20.e-6    # physical domain
+geometry.prob_hi     =  20.e-6    20.e-6    20.e-6
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 0
+algo.charge_deposition = 0
+algo.field_gathering = 0
+algo.particle_pusher = 0
+interpolation.nox = 1
+interpolation.noy = 1
+interpolation.noz = 1
+
+# CFL
+warpx.cfl = 1.0
+
+particles.nspecies = 1
+particles.species_names = electrons
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "NUniformPerCell"
+electrons.num_particles_per_cell_each_dim = 1 1 1
+electrons.profile = constant
+electrons.density = 1.e20  # number of electrons per m^3
+electrons.momentum_distribution_type = "gaussian"
+electrons.u_th  = 0.01 # uth the std of the (unitless) momentum
+electrons.uz_m  = 0.  # Mean momentum along z (unitless)
diff --git a/Tools/performance_tests/automated_test_3_uniform_drift_4ppc b/Tools/performance_tests/automated_test_3_uniform_drift_4ppc
new file mode 100644
index 000000000..d8a257d96
--- /dev/null
+++ b/Tools/performance_tests/automated_test_3_uniform_drift_4ppc
@@ -0,0 +1,58 @@
+# Maximum number of time steps
+max_step = 100
+
+# number of grid points
+amr.n_cell =  128 128 128
+
+amr.plot_int = -1   # How often to write plotfiles.
+
+# Maximum allowable size of each subdomain in the problem domain;
+#    this is used to decompose the domain for parallel calculations.
+amr.max_grid_size = 32
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+
+# Geometry
+geometry.coord_sys   = 0                  # 0: Cartesian
+geometry.is_periodic = 1     1     1      # Is periodic?
+geometry.prob_lo     = -20.e-6   -20.e-6   -20.e-6    # physical domain
+geometry.prob_hi     =  20.e-6    20.e-6    20.e-6
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 2
+algo.charge_deposition = 0
+algo.field_gathering = 0
+algo.particle_pusher = 0
+interpolation.nox = 1
+interpolation.noy = 1
+interpolation.noz = 1
+
+# CFL
+warpx.cfl = 1.0
+
+particles.nspecies = 2
+particles.species_names = electrons ions
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "NUniformPerCell"
+electrons.num_particles_per_cell_each_dim = 2 1 1
+electrons.profile = constant
+electrons.density = 1.e20  # number of electrons per m^3
+electrons.momentum_distribution_type = "gaussian"
+electrons.u_th  = 0.01 # uth the std of the (unitless) momentum
+electrons.uz_m  = 100.  # Mean momentum along z (unitless)
+
+ions.charge = q_e
+ions.mass = m_p
+ions.injection_style = "NUniformPerCell"
+ions.num_particles_per_cell_each_dim = 2 1 1
+ions.profile = constant
+ions.density = 1.e20  # number of electrons per m^3
+ions.momentum_distribution_type = "gaussian"
+ions.u_th  = 0.01 # uth the std of the (unitless) momentum
+ions.uz_m  = 100.  # Mean momentum along z (unitless)
diff --git a/Tools/performance_tests/automated_test_4_labdiags_2ppc b/Tools/performance_tests/automated_test_4_labdiags_2ppc
new file mode 100644
index 000000000..54512001c
--- /dev/null
+++ b/Tools/performance_tests/automated_test_4_labdiags_2ppc
@@ -0,0 +1,100 @@
+# Maximum number of time steps
+max_step = 100
+
+# number of grid points
+amr.n_cell =   64 64 512
+
+# Maximum allowable size of each subdomain in the problem domain;
+#    this is used to decompose the domain for parallel calculations.
+
+amr.max_grid_size = 32
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+amr.plot_int = 10   # How often to write plotfiles.  "<= 0" means no plotfiles.
+amr.check_int = 10
+
+# Geometry
+geometry.coord_sys   = 0                  # 0: Cartesian
+geometry.is_periodic = 1     1     0      # Is periodic?
+geometry.prob_lo     = -150.e-6   -150.e-6   -0.6e-3    # physical domain
+geometry.prob_hi     =  150.e-6    150.e-6    0.
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 2
+algo.charge_deposition = 0
+algo.field_gathering = 0
+algo.particle_pusher = 0
+
+# Numerics
+interpolation.nox = 3
+interpolation.noy = 3
+interpolation.noz = 3
+warpx.use_filter = 1
+warpx.cfl = 1.0
+warpx.do_pml = 0
+
+# Moving window
+warpx.do_moving_window = 1
+warpx.moving_window_dir = z
+warpx.moving_window_v = 1.0 # in units of the speed of light
+
+# Boosted frame
+warpx.gamma_boost = 15.
+warpx.boost_direction = z
+
+# Diagnostics
+warpx.do_boosted_frame_diagnostic = 1
+warpx.num_snapshots_lab = 20
+warpx.dt_snapshots_lab = 7.0e-14
+
+# Particle Injection
+warpx.do_plasma_injection = 1
+warpx.num_injected_species = 2
+warpx.injected_plasma_species = 0 1
+
+# Species
+particles.nspecies = 2
+particles.species_names = electrons ions
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "NUniformPerCell"
+electrons.xmin = -150.e-6
+electrons.xmax =  150.e-6
+electrons.ymin = -150.e-6
+electrons.ymax =  150.e-6
+electrons.zmin =   0.e-6
+electrons.num_particles_per_cell_each_dim = 1 1 1
+electrons.profile = constant
+electrons.density = 1.
+electrons.momentum_distribution_type = "constant"
+
+ions.charge = q_e
+ions.mass = m_p
+ions.injection_style = "NUniformPerCell"
+ions.xmin = -150.e-6
+ions.xmax =  150.e-6
+ions.ymin = -150.e-6
+ions.ymax =  150.e-6
+ions.zmin =   0.e-6
+ions.num_particles_per_cell_each_dim = 1 1 1
+ions.profile = constant
+ions.density = 1.
+ions.momentum_distribution_type = "constant"
+
+# Laser
+warpx.use_laser = 1
+laser.profile      = Gaussian
+laser.position     = 0. 0. -1.e-6 # This point is on the laser plane
+laser.direction    = 0. 0. 1.    # The plane normal direction
+laser.polarization = 1. 0. 0.    # The main polarization vector
+laser.e_max        = 8.e12       # Maximum amplitude of the laser field (in V/m)
+laser.profile_waist = 5.e-5      # The waist of the laser (in meters)
+laser.profile_duration = 16.7e-15  # The duration of the laser (in seconds)
+laser.profile_t_peak = 33.4e-15   # The time at which the laser reaches its peak (in seconds)
+laser.profile_focal_distance = 0.e-6  # Focal distance from the antenna (in meters)
+laser.wavelength = 0.8e-6         # The wavelength of the laser (in meters)
diff --git a/Tools/performance_tests/automated_test_5_loadimbalance b/Tools/performance_tests/automated_test_5_loadimbalance
new file mode 100644
index 000000000..6546f6804
--- /dev/null
+++ b/Tools/performance_tests/automated_test_5_loadimbalance
@@ -0,0 +1,76 @@
+# Maximum number of time steps
+max_step = 100
+
+# number of grid points
+amr.n_cell =  256 256 256
+
+amr.plot_int = -1   # How often to write plotfiles.
+
+# Maximum allowable size of each subdomain in the problem domain;
+#    this is used to decompose the domain for parallel calculations.
+amr.max_grid_size = 32
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+
+# Geometry
+geometry.coord_sys   = 0                  # 0: Cartesian
+geometry.is_periodic = 1     1     1      # Is periodic?
+geometry.prob_lo     = -20.e-6   -20.e-6   -20.e-6    # physical domain
+geometry.prob_hi     =  20.e-6    20.e-6    20.e-6
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 2
+algo.charge_deposition = 0
+algo.field_gathering = 0
+algo.particle_pusher = 0
+interpolation.nox = 1
+interpolation.noy = 1
+interpolation.noz = 1
+
+# CFL
+warpx.cfl = 1.0
+
+particles.nspecies = 2
+particles.species_names = electrons ions
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "gaussian_beam"
+electrons.x_rms = 2.e-6
+electrons.y_rms = 2.e-6
+electrons.z_rms = 5.e-6
+electrons.x_m = 0.
+electrons.y_m = 0.
+electrons.z_m = 0.
+electrons.npart = 500000
+electrons.q_tot = -1.602e-9
+electrons.profile = "constant"
+electrons.density = 1.e25
+electrons.momentum_distribution_type = "gaussian"
+electrons.ux_m = 0.0
+electrons.uy_m = 0.0
+electrons.uz_m = 0.0
+electrons.u_th = 0.01
+
+ions.charge = q_e
+ions.mass = m_p
+ions.injection_style = "gaussian_beam"
+ions.x_rms = 2.e-6
+ions.y_rms = 2.e-6
+ions.z_rms = 5.e-6
+ions.x_m = 0.
+ions.y_m = 0.
+ions.z_m = 0.
+ions.npart = 500000
+ions.q_tot = -1.602e-9
+ions.profile = "constant"
+ions.density = 1.e25
+ions.momentum_distribution_type = "gaussian"
+ions.ux_m = 0.0
+ions.uy_m = 0.0
+ions.uz_m = 0.0
+ions.u_th = 0.01
diff --git a/Tools/performance_tests/automated_test_6_output_2ppc b/Tools/performance_tests/automated_test_6_output_2ppc
new file mode 100644
index 000000000..a1c4172fe
--- /dev/null
+++ b/Tools/performance_tests/automated_test_6_output_2ppc
@@ -0,0 +1,58 @@
+# Maximum number of time steps
+max_step = 10
+
+# number of grid points
+amr.n_cell =  128 128 128
+
+amr.plot_int = 2   # How often to write plotfiles.
+
+# Maximum allowable size of each subdomain in the problem domain;
+#    this is used to decompose the domain for parallel calculations.
+amr.max_grid_size = 32
+
+# Maximum level in hierarchy (for now must be 0, i.e., one level in total)
+amr.max_level = 0
+
+# Geometry
+geometry.coord_sys   = 0                  # 0: Cartesian
+geometry.is_periodic = 1     1     1      # Is periodic?
+geometry.prob_lo     = -20.e-6   -20.e-6   -20.e-6    # physical domain
+geometry.prob_hi     =  20.e-6    20.e-6    20.e-6
+
+# Verbosity
+warpx.verbose = 1
+
+# Algorithms
+algo.current_deposition = 2
+algo.charge_deposition = 0
+algo.field_gathering = 0
+algo.particle_pusher = 0
+interpolation.nox = 1
+interpolation.noy = 1
+interpolation.noz = 1
+
+# CFL
+warpx.cfl = 1.0
+
+particles.nspecies = 2
+particles.species_names = electrons ions
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "NUniformPerCell"
+electrons.num_particles_per_cell_each_dim = 1 1 1
+electrons.profile = constant
+electrons.density = 1.e20  # number of electrons per m^3
+electrons.momentum_distribution_type = "gaussian"
+electrons.u_th  = 0.01 # uth the std of the (unitless) momentum
+electrons.uz_m  = 0.  # Mean momentum along z (unitless)
+
+ions.charge = q_e
+ions.mass = m_p
+ions.injection_style = "NUniformPerCell"
+ions.num_particles_per_cell_each_dim = 1 1 1
+ions.profile = constant
+ions.density = 1.e20  # number of electrons per m^3
+ions.momentum_distribution_type = "gaussian"
+ions.u_th  = 0.01 # uth the std of the (unitless) momentum
+ions.uz_m  = 0.  # Mean momentum along z (unitless)
diff --git a/Tools/performance_tests/functions_perftest.py b/Tools/performance_tests/functions_perftest.py
new file mode 100644
index 000000000..2085367c7
--- /dev/null
+++ b/Tools/performance_tests/functions_perftest.py
@@ -0,0 +1,189 @@
+import os, shutil, re
+
+def run_batch_nnode(test_list, res_dir, bin_name, config_command, architecture='knl', Cname='knl', n_node=1):
+    # Clean res_dir
+    if os.path.exists(res_dir):
+        shutil.rmtree(res_dir)
+    os.makedirs(res_dir)
+    # Copy files to res_dir
+    cwd = os.environ['WARPX'] + '/Tools/performance_tests/'
+    bin_dir = cwd + 'Bin/'
+    shutil.copy(bin_dir + bin_name, res_dir)
+    os.chdir(res_dir)
+    # Calculate simulation time. Take 10 min + 10 min / simulation
+    job_time_min = 5. + len(test_list)*5.
+    job_time_str = str(int(job_time_min/60)) + ':' + str(int(job_time_min%60)) + ':00'
+    batch_string = ''
+    batch_string += '#!/bin/bash\n'
+    batch_string += '#SBATCH --job-name=' + test_list[0][0] + '\n'
+    batch_string += '#SBATCH --time=' + job_time_str + '\n'
+    batch_string += '#SBATCH -C ' + Cname + '\n'
+    batch_string += '#SBATCH -N ' + str(n_node) + '\n'
+    batch_string += '#SBATCH -q regular\n'
+    batch_string += '#SBATCH -e error.txt\n'
+    batch_string += '#SBATCH --account=m2852\n'
+
+    for count, test_item in enumerate(test_list):
+        # test_item reads [input_file, int n_mpi, int n_omp]                                                                                                                                                       
+        input_file = test_item[0];
+        shutil.copy(cwd + input_file, res_dir)
+        # test_item[1] is not read since it contain the number of node, which is an 
+        # global parameter. However, we keep it for compatibility with run_alltests.py
+        n_mpi = test_item[2]
+        n_omp = test_item[3]
+        srun_string = ''
+        srun_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n'
+        # number of logical cores per MPI process
+        if architecture == 'cpu':
+            cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives
+        elif architecture == 'knl':
+            cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives
+        output_filename = 'out_' + '_'.join([input_file, str(n_node), str(n_mpi), str(n_omp), str(count)]) + '.txt'
+        srun_string += 'srun --cpu_bind=cores '+ \
+                       ' -n ' + str(n_node*n_mpi) + \
+                       ' -c ' + str(cflag_value)   + \
+                       ' ./'  + bin_name + \
+                       ' ' + input_file + \
+                       ' > ' + output_filename + '\n'
+        batch_string += srun_string
+        batch_string += 'rm -rf plt*\n'
+        batch_string += 'rm -rf chk*\n'
+        batch_string += 'rm -rf lab_frame_data\n'
+    batch_file = 'slurm'
+    f_exe = open(batch_file,'w')
+    f_exe.write(batch_string)
+    f_exe.close()
+    os.system('chmod 700 ' + bin_name)
+    os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt')
+    return 0
+
+def run_batch(run_name, res_dir, bin_name, config_command, architecture='knl',\
+              Cname='knl', n_node=1, n_mpi=1, n_omp=1):
+    # Clean res_dir
+    if os.path.exists(res_dir):
+        shutil.rmtree(res_dir)
+    os.makedirs(res_dir)
+    # Copy files to res_dir
+    # Copy files to res_dir
+    cwd = os.environ['WARPX'] + '/Tools/performance_tests/'
+    bin_dir = cwd + 'Bin/'
+    shutil.copy(bin_dir + bin_name, res_dir)
+    shutil.copyfile(cwd + run_name, res_dir + 'inputs')
+    os.chdir(res_dir)
+    batch_string = ''
+    batch_string += '#!/bin/bash\n'
+    batch_string += '#SBATCH --job-name=' + run_name + str(n_node) + str(n_mpi) + str(n_omp) + '\n'
+    batch_string += '#SBATCH --time=00:20:00\n'
+    batch_string += '#SBATCH -C ' + Cname + '\n'
+    batch_string += '#SBATCH -N ' + str(n_node) + '\n'
+    batch_string += '#SBATCH -q regular\n'
+    batch_string += '#SBATCH -e error.txt\n'
+    batch_string += '#SBATCH --account=m2852\n'
+    batch_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n'
+    if architecture == 'cpu':
+        cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives
+        batch_string += 'srun --cpu_bind=cores '+ \
+                    ' -n ' + str(n_node*n_mpi) + \
+                    ' -c ' + str(cflag_value)   + \
+                    ' ./'  + bin_name + ' inputs > perf_output.txt'
+    elif architecture == 'knl':
+        # number of logical cores per MPI process
+        cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives
+        batch_string += 'srun --cpu_bind=cores '     + \
+                        ' -n ' + str(n_node*n_mpi) + \
+                        ' -c ' + str(cflag_value)   + \
+                        ' ./'  + bin_name + ' inputs > perf_output.txt\n'
+    batch_file = 'slurm'
+    f_exe = open(batch_file,'w')
+    f_exe.write(batch_string)
+    f_exe.close()
+    os.system('chmod 700 ' + bin_name)
+    os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt')
+    return 0
+
+# Read output file and return init time and 1-step time
+def read_run_perf(filename, n_steps):
+    timing_list = []
+    # Search inclusive time to get simulation step time
+    partition_limit = 'NCalls  Incl. Min  Incl. Avg  Incl. Max   Max %'
+    with open(filename) as file_handler:
+        output_text = file_handler.read()
+    # Get total simulation time
+    line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text)
+    total_time = float(line_match_totaltime.group(0).split()[8])
+    search_area = output_text.partition(partition_limit)[2]
+    line_match_looptime = re.search('\nWarpX::Evolve().*', search_area)
+    time_wo_initialization = float(line_match_looptime.group(0).split()[3])
+    timing_list += [str(total_time - time_wo_initialization)]
+    timing_list += [str(time_wo_initialization/n_steps)]
+    partition_limit1 = 'NCalls  Excl. Min  Excl. Avg  Excl. Max   Max %'
+    partition_limit2 = 'NCalls  Incl. Min  Incl. Avg  Incl. Max   Max %'
+    file_handler.close()
+    with open(filename) as file_handler:
+        output_text = file_handler.read()
+    # Search EXCLISUSIVE routine timings
+    search_area = output_text.partition(partition_limit1)[2].partition(partition_limit2)[0]
+    pattern_list = ['\nParticleContainer::Redistribute().*',\
+                    '\nFabArray::FillBoundary().*',\
+                    '\nFabArray::ParallelCopy().*',\
+                    '\nPICSAR::CurrentDeposition.*',\
+                    '\nPICSAR::FieldGather.*',\
+                    '\nPICSAR::ParticlePush.*',\
+                    '\nPPC::Evolve::Copy.*',\
+                    '\nWarpX::EvolveEM().*',\
+                    'Checkpoint().*',\
+                    'WriteParticles().*',\
+                    '\nVisMF::Write(FabArray).*',\
+                    '\nWriteMultiLevelPlotfile().*',\
+                    '\nParticleContainer::RedistributeMPI().*']
+    for pattern in pattern_list:
+        timing = '0'
+        line_match = re.search(pattern, search_area)
+        if line_match is not None:
+            timing = [str(float(line_match.group(0).split()[3])/n_steps)]
+        timing_list += timing
+    return timing_list
+
+# Write time into logfile
+def write_perf_logfile(log_file, log_line):
+    f_log = open(log_file, 'a')
+    f_log.write(log_line)
+    f_log.close()
+    return 0
+
+def get_nsteps(run_name):
+    with open(run_name) as file_handler:
+        run_name_text = file_handler.read()
+    line_match_nsteps = re.search('\nmax_step.*', run_name_text)
+    nsteps = float(line_match_nsteps.group(0).split()[2])
+    return nsteps
+
+
+# Run a performance test in an interactive allocation                                                                                                                                                                                                                            
+# def run_interactive(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1):
+#     # Clean res_dir                                                                                                                                                                                                                                                           #  
+#     if os.path.exists(res_dir):
+#         shutil.rmtree(res_dir)
+#     os.makedirs(res_dir)
+#     # Copy files to res_dir                                                                                                                                                                                                                                                   #  
+#     shutil.copyfile(bin_dir + bin_name, res_dir + bin_name)
+#     shutil.copyfile(cwd  + run_name, res_dir + 'inputs')
+#     os.chdir(res_dir)
+#     if args.architecture == 'cpu':
+#         cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives                                                                                                                                                                                                     #  
+#         exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
+#                        'srun --cpu_bind=cores '     + \
+#                        ' -n ' + str(n_node*n_mpi) + \
+#                        ' -c ' + str(cflag_value)   + \
+#                        ' ./'  + bin_name + ' inputs > perf_output.txt'
+#     elif args.architecture == 'knl':
+#         # number of logical cores per MPI process                                                                                                                                                                                                                             #  
+#         cflag_value = max(1,int(68/n_mpi) * 4) # Follow NERSC directives                                                                                                                                                                                                      #  
+#         exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
+#                        'srun --cpu_bind=cores '     + \
+#                        ' -n ' + str(n_node*n_mpi) + \
+#                        ' -c ' + str(cflag_value)   + \
+#                        ' ./'  + bin_name + ' inputs > perf_output.txt'
+#     os.system('chmod 700 ' + bin_name)
+#     os.system(config_command + exec_command)
+#     return 0
diff --git a/Tools/performance_tests/performance_log.txt b/Tools/performance_tests/performance_log.txt
index cb38025d9..543d257a0 100644
--- a/Tools/performance_tests/performance_log.txt
+++ b/Tools/performance_tests/performance_log.txt
@@ -1,37 +1,33 @@
-## year month day run_name compiler architecture n_node n_mpi n_omp time_initialization(s) time_one_iteration(s)
-2017 10 13 uniform_plasma intel knl 1 1 1 1.88 0.8257 
-2017 10 13 uniform_plasma intel knl 1 1 1 1.87 0.8229 
-2017 10 13 uniform_plasma intel knl 1 1 1 1.87 0.8244 
-2017 10 13 uniform_plasma intel knl 1 1 2 1.87 0.4372 
-2017 10 13 uniform_plasma intel knl 1 1 2 1.89 0.4374 
-2017 10 13 uniform_plasma intel knl 1 1 2 1.9 0.4366 
-2017 10 13 uniform_plasma intel knl 2 2 1 0.54 0.2152 
-2017 10 13 uniform_plasma intel knl 2 2 1 0.6 0.2163 
-2017 10 13 uniform_plasma intel knl 2 2 1 0.6 0.217 
-2017 10 13 uniform_plasma gnu knl 1 1 1 1.17 0.8062 
-2017 10 13 uniform_plasma gnu knl 1 1 1 1.39 0.8062 
-2017 10 13 uniform_plasma gnu knl 1 1 1 1.4 0.8067 
-2017 10 13 uniform_plasma gnu knl 1 1 2 1.07 0.4271 
-2017 10 13 uniform_plasma gnu knl 1 1 2 1.02 0.4249 
-2017 10 13 uniform_plasma gnu knl 1 1 2 1.06 0.425 
-2017 10 13 uniform_plasma gnu knl 2 2 1 0.75 0.2119 
-2017 10 13 uniform_plasma gnu knl 2 2 1 0.86 0.2111 
-2017 10 13 uniform_plasma gnu knl 2 2 1 0.71 0.2121 
-2017 10 13 uniform_plasma gnu cpu 1 1 1 0.66 0.1667 
-2017 10 13 uniform_plasma gnu cpu 1 1 1 0.71 0.1653 
-2017 10 13 uniform_plasma gnu cpu 1 1 1 0.8 0.1667 
-2017 10 13 uniform_plasma gnu cpu 1 1 2 0.579 0.09701 
-2017 10 13 uniform_plasma gnu cpu 1 1 2 0.629 0.09651 
-2017 10 13 uniform_plasma gnu cpu 1 1 2 0.892 0.09718 
-2017 10 13 uniform_plasma gnu cpu 2 2 1 0.466 0.04317 
-2017 10 13 uniform_plasma gnu cpu 2 2 1 0.535 0.04414 
-2017 10 13 uniform_plasma gnu cpu 2 2 1 0.542 0.04404 
-2017 10 15 uniform_plasma gnu knl 1 8 16 0.94 0.1971 
-2017 10 15 uniform_plasma gnu knl 1 8 16 0.76 0.1795 
-2017 10 15 uniform_plasma gnu knl 1 8 16 1.07 0.1799 
-2017 10 15 uniform_plasma gnu knl 1 4 32 1.17 0.2019 
-2017 10 15 uniform_plasma gnu knl 1 4 32 1.09 0.2055 
-2017 10 15 uniform_plasma gnu knl 1 4 32 1.4 0.1926 
-2017 10 15 uniform_plasma gnu knl 2 4 32 0.97 0.1313 
-2017 10 15 uniform_plasma gnu knl 2 4 32 1.05 0.1402 
-2017 10 15 uniform_plasma gnu knl 2 4 32 1.07 0.1429 
+## year month day run_name compiler architecture n_node n_mpi n_omp time_initialization time_one_iteration Redistribute FillBoundary ParallelCopy CurrentDeposition FieldGather ParthiclePush Copy EvolveEM Checkpoint WriteParticles Write_FabArray WriteMultiLevelPlotfile(unit: second) RedistributeMPI
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.14 0.3986 0.1713 0.01719 0.01615 0.06987 0.03636 0.01901 0.01999 0.003602 0 0 0 0 0.007262 
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.39 0.4009 0.1712 0.01676 0.01583 0.07061 0.03684 0.01926 0.02011 0.003687 0 0 0 0 0.007841 
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 2.91 0.4024 0.1716 0.01826 0.01918 0.0703 0.0363 0.01912 0.01989 0.003017 0 0 0 0 0.007256 
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.21 0.3997 0.1717 0.01706 0.0162 0.07026 0.03655 0.01928 0.01999 0.003687 0 0 0 0 0.006799 
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 0.89 0.4779 0.04441 0.1143 0.09117 0.1072 0.01254 0.003702 0.004217 0.01247 0 0 0 0 0.003441 
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.58 0.4626 0.04424 0.1048 0.0851 0.1073 0.01259 0.003767 0.004282 0.01311 0 0 0 0 0.002798 
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.63 0.4616 0.04441 0.1033 0.08398 0.1079 0.01312 0.003802 0.004224 0.01278 0 0 0 0 0.003188 
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.72 0.461 0.04419 0.1038 0.08424 0.1074 0.01257 0.003799 0.0043 0.01318 0 0 0 0 0.002816 
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.32 0.3986 0.1712 0.01804 0.01697 0.06999 0.03615 0.01842 0.01896 0.003445 0 0 0 0 0.00738 
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.17 0.3974 0.1711 0.01722 0.01587 0.07016 0.03642 0.01844 0.01902 0.003431 0 0 0 0 0.007332 
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 2.88 0.3946 0.1709 0.01686 0.01562 0.06972 0.03595 0.01848 0.01916 0.003269 0 0 0 0 0.006887 
+2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 2.95 0.4094 0.1708 0.01761 0.01632 0.07001 0.03651 0.01863 0.01906 0.003314 0 0 0 0 0.01898 
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.3 0.4787 0.04447 0.1139 0.09124 0.108 0.01287 0.003811 0.004205 0.01249 0 0 0 0 0.003045 
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 3.16 0.4578 0.04412 0.1015 0.08339 0.1078 0.01301 0.003919 0.004182 0.0125 0 0 0 0 0.002701 
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 2.78 0.4679 0.04418 0.1035 0.08456 0.1079 0.01303 0.003902 0.004214 0.0127 0 0 0 0 0.009118 
+2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.12 0.4613 0.04425 0.1043 0.08517 0.1073 0.01242 0.003797 0.004221 0.01239 0 0 0 0 0.003665 
+2018 01 31 automated_test_3_uniform_drift_4ppc intel knl 1 16 8 0.48 0.1237 0.03056 0.01622 0.01468 0.02039 0.005016 0.003737 0.002632 0.00326 0 0 0 0 0.006871 
+2018 01 31 automated_test_3_uniform_drift_4ppc intel knl 1 16 8 0.79 0.1287 0.0308 0.01706 0.01715 0.02042 0.005452 0.003636 0.002797 0.003143 0 0 0 0 0.007324 
+2018 01 31 automated_test_3_uniform_drift_4ppc intel knl 1 16 8 0.9 0.1296 0.03084 0.01711 0.01731 0.02053 0.005379 0.003641 0.002843 0.003137 0 0 0 0 0.008151 
+2018 01 31 automated_test_3_uniform_drift_4ppc intel knl 1 16 8 0.9 0.1323 0.03081 0.01703 0.01736 0.02065 0.005339 0.003638 0.002751 0.004008 0 0 0 0 0.01015 
+2018 01 31 automated_test_4_labdiags_2ppc intel knl 1 16 8 0.85 0.2896 0.03832 0.06449 0.07493 0.003507 0.002987 0.0001515 0.0001762 0.007921 0.0371 0.001537 0 0.0004387 0.03832 
+2018 01 31 automated_test_4_labdiags_2ppc intel knl 1 16 8 1.12 0.2895 0.03845 0.06423 0.07481 0.003489 0.002994 0.000152 0.0001779 0.00834 0.0357 0.001545 0 0.0005249 0.03845 
+2018 01 31 automated_test_4_labdiags_2ppc intel knl 1 16 8 0.76 0.3243 0.03804 0.0646 0.07462 0.003483 0.002991 0.0001508 0.0001769 0.008051 0.05983 0.001565 0 0.005392 0.03804 
+2018 01 31 automated_test_4_labdiags_2ppc intel knl 1 16 8 0.74 0.3143 0.03941 0.06478 0.07547 0.003486 0.003007 0.0001518 0.0001808 0.007845 0.05079 0.001543 0 0.0007033 0.03941 
+2018 01 31 automated_test_5_loadimbalance intel knl 1 16 8 9.2 0.3845 0.08558 0.1042 0.1332 0 0 0 0 0.01226 0 0 0 0 0.08558 
+2018 01 31 automated_test_5_loadimbalance intel knl 1 16 8 9.19 0.3864 0.085 0.1051 0.134 0 0 0 0 0.01202 0 0 0 0 0.085 
+2018 01 31 automated_test_5_loadimbalance intel knl 1 16 8 8.98 0.3912 0.08665 0.1061 0.1356 0 0 0 0 0.01193 0 0 0 0 0.08665 
+2018 01 31 automated_test_5_loadimbalance intel knl 1 16 8 9.03 0.3826 0.08484 0.1031 0.1329 0 0 0 0 0.01205 0 0 0 0 0.08484 
+2018 01 31 automated_test_6_output_2ppc intel knl 1 16 8 3.6 1.086 0.0898 0.1311 0.09441 0.1345 0.027 0.008783 0.009792 0.02151 0.08454 0.04962 0 0.0008218 0.005303 
+2018 01 31 automated_test_6_output_2ppc intel knl 1 16 8 4.7 1.136 0.09059 0.1437 0.09535 0.1358 0.02915 0.009238 0.01002 0.02315 0.09088 0.05006 0 0.01081 0.005381 
+2018 01 31 automated_test_6_output_2ppc intel knl 1 16 8 4.0 1.132 0.09145 0.1377 0.09592 0.1365 0.02817 0.009353 0.0103 0.02447 0.066 0.05309 0 0.02047 0.009196 
+2018 01 31 automated_test_6_output_2ppc intel knl 1 16 8 3.8 1.135 0.09088 0.1308 0.09623 0.135 0.02762 0.008839 0.009758 0.02561 0.1144 0.04874 0 0.0008693 0.008112 
diff --git a/Tools/performance_tests/run_alltests.py b/Tools/performance_tests/run_alltests.py
index 7c02481fb..440da363d 100644
--- a/Tools/performance_tests/run_alltests.py
+++ b/Tools/performance_tests/run_alltests.py
@@ -1,5 +1,6 @@
 import os, sys, shutil
 import argparse, re, time
+from functions_perftest import *
 
 # This script runs automated performance tests for WarpX.
 # It runs tests in list test_list defined below, and write
@@ -9,7 +10,7 @@ import argparse, re, time
 # Before running performance tests, make sure you have the latest version 
 # of performance_log.txt
 # A typical execution reads:
-# > python run_alltests.py --no-recompile --compiler=gnu --architecture=cpu --mode=run --no-commit --log_file='my_performance_log.txt'
+# > python run_alltests.py --no-recompile --compiler=gnu --architecture=cpu --mode=run --log_file='my_performance_log.txt'
 # These are default values, and will give the same result as 
 # > python run_alltests.py
 # To add a new test item, extent the test_list with a line like
@@ -33,9 +34,53 @@ import argparse, re, time
 #         write data into the performance log file
 #         push file performance_log.txt on the repo
 
+# Define the list of tests to run
+# -------------------------------
+# each element of test_list contains
+# [str runname, int n_node, int n_mpi PER NODE, int n_omp]
+test_list = []
+n_repeat = 3
+basename1 = 'uniform_t0.01_'
+
+test_list.extend([[basename1 +  '128',    1, 16, 8]]*n_repeat)
+test_list.extend([[basename1 +  '128',    1, 32, 16]]*n_repeat)
+
+# test_list.extend([[basename1 +  '128',    1, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 +  '256',    8, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 +  '512',   64, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 + '1024',  512, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 + '2048', 4096, 16, 8]]*n_repeat)
+
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_128', 1, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_128', 1, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_128', 1, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_128', 1, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_256', 8, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_256', 8, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_256', 8, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_256', 8, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_512', 64, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_512', 64, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_512', 64, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_512', 64, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_1024', 512, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_1024', 512, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_1024', 512, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_1024', 512, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_2048', 4096, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_2048', 4096, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_2048', 4096, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_2048', 4096, 16, 8]]*n_repeat)
+
+n_tests   = len(test_list)
+
 # Read command-line arguments
 # ---------------------------
-
 # Create parser and read arguments
 parser = argparse.ArgumentParser(
     description='Run performance tests and write results in files')
@@ -115,136 +160,6 @@ if args.recompile == True:
 
 # Define functions to run a test and analyse results
 # --------------------------------------------------
-
-# Run a performance test in an interactive allocation
-def run_interactive(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1):
-    # Clean res_dir
-    if os.path.exists(res_dir):
-        shutil.rmtree(res_dir)
-    os.makedirs(res_dir)
-    # Copy files to res_dir
-    shutil.copyfile(bin_dir + bin_name, res_dir + bin_name)
-    shutil.copyfile(cwd  + run_name, res_dir + 'inputs')
-    os.chdir(res_dir)
-    if args.architecture == 'cpu':
-        cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives
-        exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
-                       'srun --cpu_bind=cores '     + \
-                       ' -n ' + str(n_node*n_mpi) + \
-                       ' -c ' + str(cflag_value)   + \
-                       ' ./'  + bin_name + ' inputs > perf_output.txt'
-    elif args.architecture == 'knl':
-        # number of logical cores per MPI process
-        cflag_value = max(1,int(68/n_mpi) * 4) # Follow NERSC directives
-        exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
-                       'srun --cpu_bind=cores '     + \
-                       ' -n ' + str(n_node*n_mpi) + \
-                       ' -c ' + str(cflag_value)   + \
-                       ' ./'  + bin_name + ' inputs > perf_output.txt'
-    os.system('chmod 700 ' + bin_name)
-    os.system(config_command + exec_command)
-    return 0
-
-def run_batch(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1):
-    # Clean res_dir
-    if os.path.exists(res_dir):
-        shutil.rmtree(res_dir)
-    os.makedirs(res_dir)
-    # Copy files to res_dir
-    shutil.copyfile(bin_dir + bin_name, res_dir + bin_name)
-    shutil.copyfile(cwd + run_name, res_dir + 'inputs')
-    os.chdir(res_dir)
-    batch_string = ''
-    batch_string += '#!/bin/bash\n'
-    batch_string += '#SBATCH --job-name=' + run_name + str(n_node) + str(n_mpi) + str(n_omp) + '\n'
-    batch_string += '#SBATCH --time=00:30:00\n'
-    batch_string += '#SBATCH -C ' + module_Cname[args.architecture] + '\n'
-    batch_string += '#SBATCH -N ' + str(n_node) + '\n'
-    batch_string += '#SBATCH --partition=regular\n'
-    batch_string += '#SBATCH --qos=normal\n'
-    batch_string += '#SBATCH -e error.txt\n'
-    batch_string += '#SBATCH --account=m2852\n'
-    batch_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n'
-    if args.architecture == 'cpu':
-        cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives
-        batch_string += 'srun --cpu_bind=cores '+ \
-                    ' -n ' + str(n_node*n_mpi) + \
-                    ' -c ' + str(cflag_value)   + \
-                    ' ./'  + bin_name + ' inputs > perf_output.txt'
-    elif args.architecture == 'knl':
-        # number of logical cores per MPI process
-        cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives
-        batch_string += 'srun --cpu_bind=cores '     + \
-                        ' -n ' + str(n_node*n_mpi) + \
-                        ' -c ' + str(cflag_value)   + \
-                        ' ./'  + bin_name + ' inputs > perf_output.txt\n'
-    batch_file = 'slurm'
-    f_exe = open(batch_file,'w')
-    f_exe.write(batch_string)
-    f_exe.close()
-    os.system('chmod 700 ' + bin_name)
-    os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt')
-    return 0
-
-# Read output file and return init time and 1-step time
-def read_run_perf(filename):
-    timing_list = []
-    # Search inclusive time to get simulation step time
-    partition_limit = 'NCalls  Incl. Min  Incl. Avg  Incl. Max   Max %'
-    with open(filename) as file_handler:
-        output_text = file_handler.read()
-    # Get total simulation time
-    line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text)
-    total_time = float(line_match_totaltime.group(0).split()[8])
-    search_area = output_text.partition(partition_limit)[2]
-    line_match_looptime = re.search('\nWarpX::Evolve().*', search_area)
-    time_wo_initialization = float(line_match_looptime.group(0).split()[3])
-    timing_list += [str(total_time - time_wo_initialization)]
-    timing_list += [str(time_wo_initialization/n_steps)]
-    # Search exclusive time to get routines timing
-    partition_limit1 = 'NCalls  Excl. Min  Excl. Avg  Excl. Max   Max %'
-    partition_limit2 = 'NCalls  Incl. Min  Incl. Avg  Incl. Max   Max %'
-    file_handler.close()
-    with open(filename) as file_handler:
-        output_text = file_handler.read()
-    search_area = output_text.partition(partition_limit1)[2].partition(partition_limit2)[0]
-    pattern_list = ['\nParticleContainer::Redistribute().*',\
-                    '\nFabArray::FillBoundary().*',\
-                    '\nFabArray::ParallelCopy().*',\
-                    '\nPICSAR::CurrentDeposition.*',\
-                    '\nPICSAR::FieldGather.*',\
-                    '\nPICSAR::ParticlePush.*',\
-                    '\nPPC::Evolve::Copy.*',\
-                    '\nWarpX::EvolveEM().*',\
-                    'NArrayInt>::Checkpoint().*',\
-                    'NArrayInt>::WriteParticles().*',\
-                    '\nVisMF::Write_FabArray.*',\
-                    '\nWriteMultiLevelPlotfile().*']
-    for pattern in pattern_list:
-        timing = '0'
-        line_match = re.search(pattern, search_area)
-        if line_match is not None:
-            timing = [str(float(line_match.group(0).split()[3])/n_steps)]
-        timing_list += timing
-    return timing_list
-
-# Write time into logfile
-def write_perf_logfile(log_file):
-    log_line = ' '.join([year, month, day, run_name, args.compiler,\
-                         args.architecture, str(n_node), str(n_mpi),\
-                         str(n_omp)] +  timing_list + ['\n'])
-    f_log = open(log_file, 'a')
-    f_log.write(log_line)
-    f_log.close()
-    return 0
-
-def get_nsteps(runname):
-    with open(runname) as file_handler:
-        runname_text = file_handler.read()
-    line_match_nsteps = re.search('\nmax_step.*', runname_text)
-    nsteps = float(line_match_nsteps.group(0).split()[2])
-    return nsteps
-
 def process_analysis():
     dependencies = ''
     f_log = open(cwd + 'log_jobids_tmp.txt','r')
@@ -254,18 +169,19 @@ def process_analysis():
         dependencies += line.split()[3] + ':'
     batch_string = ''
     batch_string += '#!/bin/bash\n'
-    batch_string += '#SBATCH --job-name=perftests_read\n'
+    batch_string += '#SBATCH --job-name=warpx_read\n'
     batch_string += '#SBATCH --time=00:05:00\n'
     batch_string += '#SBATCH -C ' + module_Cname[args.architecture] + '\n'
     batch_string += '#SBATCH -N 1\n'
     batch_string += '#SBATCH -S 4\n'
-    batch_string += '#SBATCH --partition=regular\n'
-    batch_string += '#SBATCH --qos=normal\n'
+    batch_string += '#SBATCH -q regular\n'
     batch_string += '#SBATCH -e read_error.txt\n'
     batch_string += '#SBATCH -o read_output.txt\n'
     batch_string += '#SBATCH --mail-type=end\n'
     batch_string += '#SBATCH --account=m2852\n'
-    batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + args.compiler + ' --architecture=' + args.architecture + ' --mode=read' + ' --log_file=' + log_file
+    batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + \
+                    args.compiler + ' --architecture=' + args.architecture + \
+                    ' --mode=read' + ' --log_file=' + log_file
     if args.commit == True:
         batch_string += ' --commit'
     batch_string += '\n'
@@ -279,20 +195,6 @@ def process_analysis():
  
 # Loop over the tests and return run time + details
 # -------------------------------------------------
-
-# each element of test_list contains
-# [str runname, int n_node, int n_mpi PER NODE, int n_omp]
-
-test_list = []
-n_repeat = 1
-filename1 = 'uniform_plasma'
-
-test_list.extend([[filename1, 1, 8, 16]]*3)
-test_list.extend([[filename1, 1, 4, 32]]*3)
-test_list.extend([[filename1, 2, 4, 32]]*3)
-
-n_tests   = len(test_list)
-
 if args.mode == 'run':
     # Remove file log_jobids_tmp.txt if exists.
     # This file contains the jobid of every perf test
@@ -308,13 +210,14 @@ if args.mode == 'run':
         n_omp    = current_run[3]
         n_steps  = get_nsteps(cwd + run_name)
         res_dir = res_dir_base
-        res_dir += '_'.join([year, month, day, run_name, args.compiler,\
+        res_dir += '_'.join([run_name, args.compiler,\
                          args.architecture, str(n_node), str(n_mpi),\
-                         str(n_omp)]) + '/'
+                             str(n_omp), str(count)]) + '/'
         # Run the simulation.
         # If you are currently in an interactive session and want to run interactive,
         # just replace run_batch with run_interactive
-        run_batch(run_name, res_dir, n_node=n_node, n_mpi=n_mpi, n_omp=n_omp)
+        run_batch(run_name, res_dir, bin_name, config_command, architecture=args.architecture, \
+                  Cname=module_Cname[args.architecture], n_node=n_node, n_mpi=n_mpi, n_omp=n_omp)
     os.chdir(cwd)
     process_analysis()
 
@@ -326,7 +229,8 @@ if args.mode == 'read':
                    'FillBoundary ParallelCopy CurrentDeposition FieldGather '+\
                    'ParthiclePush Copy EvolveEM Checkpoint '+\
                    'WriteParticles Write_FabArray '+\
-                   'WriteMultiLevelPlotfile(unit: second)\n'
+                   'WriteMultiLevelPlotfile '+\
+                   'RedistributeMPI(unit: second)\n'
         f_log = open(log_dir + log_file, 'a')
         f_log.write(log_line)
         f_log.close()
@@ -340,13 +244,20 @@ if args.mode == 'read':
         n_steps  = get_nsteps(cwd  + run_name)
         print('n_steps = ' + str(n_steps))
         res_dir = res_dir_base
-        res_dir += '_'.join([year, month, day, run_name, args.compiler,\
+        res_dir += '_'.join([run_name, args.compiler,\
                              args.architecture, str(n_node), str(n_mpi),\
-                             str(n_omp)]) + '/'
-        # Read performance data from the output file
-        timing_list = read_run_perf(res_dir + 'perf_output.txt')
+                             str(n_omp), str(count)]) + '/'
+#        res_dir += '_'.join([year, month, '25', run_name, args.compiler,\
+#                             args.architecture, str(n_node), str(n_mpi), \
+#                             str(n_omp)]) + '/'
+      # Read performance data from the output file
+        output_filename = 'perf_output.txt'
+        timing_list = read_run_perf(res_dir + output_filename, n_steps)
         # Write performance data to the performance log file
-        write_perf_logfile(log_dir + log_file)
+        log_line = ' '.join([year, month, day, run_name, args.compiler,\
+                             args.architecture, str(n_node), str(n_mpi),\
+                             str(n_omp)] +  timing_list + ['\n'])
+        write_perf_logfile(log_dir + log_file, log_line)
 
     # Store test parameters fot record
     dir_record_base = './perf_warpx_record/'
@@ -363,6 +274,21 @@ if args.mode == 'read':
     for count, current_run in enumerate(test_list):
         shutil.copy(current_run[0], dir_record)
 
+    for count, current_run in enumerate(test_list):
+        run_name = current_run[0]
+        n_node   = current_run[1]
+        n_mpi    = current_run[2]
+        n_omp    = current_run[3]
+        res_dir = res_dir_base
+        res_dir += '_'.join([run_name, args.compiler,\
+                             args.architecture, str(n_node), str(n_mpi),\
+                             str(n_omp), str(count)]) + '/'
+        res_dir_arch = res_dir_base
+        res_dir_arch += '_'.join([year, month, day, run_name, args.compiler,\
+                                  args.architecture, str(n_node), str(n_mpi), \
+                                  str(n_omp), str(count)]) + '/'
+        os.rename(res_dir, res_dir_arch)
+
     # Commit results to the Repo
     if args.commit == True:
         os.system('git add ' + log_dir + log_file + ';'\
diff --git a/Tools/performance_tests/run_alltests_1node.py b/Tools/performance_tests/run_alltests_1node.py
new file mode 100644
index 000000000..4c6849c3b
--- /dev/null
+++ b/Tools/performance_tests/run_alltests_1node.py
@@ -0,0 +1,333 @@
+import os, sys, shutil
+import argparse, re, time
+from functions_perftest import *
+
+# This script runs automated performance tests for WarpX.
+# It runs tests in list test_list defined below, and write
+# results in file performance_log.txt in warpx/performance_tests/
+
+# ---- User's manual ----
+# Before running performance tests, make sure you have the latest version 
+# of performance_log.txt
+
+# ---- Running a custom set of performance tests ----
+# > python run_alltests_1node.py --no-recompile --compiler=intel 
+# > --architecture=knl --mode=run --input_file=uniform_plasma 
+# > --n_node=1 --log_file='my_performance_log.txt'
+
+# ---- Running the pre-drefined automated tests ----
+# Compile and run:
+# > python run_alltests_1node.py --automated --recompile
+# Just run:
+# > python run_alltests_1node.py --automated 
+
+# To add a new test item, extent the test_list with a line like
+# test_list.extend([['my_input_file', n_node, n_mpi, n_omp]]*n_repeat)
+# - my_input_file must be in warpx/performance_tests
+
+# ---- Developer's manual ----
+# This script can run in two modes:
+# - 'run' mode: for each test item, a batch job is executed.
+#     create folder '$SCRATCH/performance_warpx/'
+#     recompile the code if option --recompile is used
+#     loop over test_list and submit one batch script per item
+#     Submit a batch job that executes the script in read mode
+#     This last job runs once all others are completed
+# - 'read' mode: Get performance data from all test items
+#     create performance log file if does not exist
+#     loop over test_file 
+#         read initialization time and step time
+#         write data into the performance log file
+#         push file performance_log.txt on the repo
+
+# Read command-line arguments
+# ---------------------------
+# Create parser and read arguments
+parser = argparse.ArgumentParser(
+    description='Run performance tests and write results in files')
+parser.add_argument('--recompile', dest='recompile', action='store_true', default=False)
+parser.add_argument('--no-recompile', dest='recompile', action='store_false', default=False)
+parser.add_argument('--commit', dest='commit', action='store_true', default=False)
+parser.add_argument( '--compiler', choices=['gnu', 'intel'], default='intel',
+    help='which compiler to use')
+parser.add_argument( '--architecture', choices=['cpu', 'knl'], default='knl',
+    help='which architecture to cross-compile for NERSC machines')
+parser.add_argument( '--mode', choices=['run', 'read'], default='run',
+    help='whether to run perftests or read their perf output. run calls read')
+parser.add_argument( '--log_file', dest = 'log_file', default='my_performance_log.txt',
+    help='name of log file where data will be written. ignored if option --commit is used')
+parser.add_argument('--n_node', dest='n_node', default=1, help='nomber of nodes for the runs')
+parser.add_argument('--input_file', dest='input_file', default='input_file.pixr', 
+    type=str, help='input file to run')
+parser.add_argument('--automated', dest='automated', action='store_true', default=False, 
+                    help='Use to run the automated test list')
+
+args = parser.parse_args()
+log_file = args.log_file
+do_commit = args.commit
+run_name = args.input_file
+
+# list of tests to run and analyse. 
+# Note: This is overwritten if is_automated
+# each element of test_list contains
+# [str input_file, int n_node, int n_mpi PER NODE, int n_omp]
+test_list = []
+n_repeat = 2
+filename1 = args.input_file
+test_list.extend([[filename1, 1, 128, 1]]*n_repeat)
+test_list.extend([[filename1, 1, 64, 2]]*n_repeat)
+# test_list.extend([[filename1, 1, 32, 4]]*n_repeat)
+# test_list.extend([[filename1, 1, 16, 8]]*n_repeat)
+# test_list.extend([[filename1, 1, 8, 16]]*n_repeat)
+# test_list.extend([[filename1, 1, 4, 32]]*n_repeat)
+# test_list.extend([[filename1, 1, 2, 64]]*n_repeat)
+# test_list.extend([[filename1, 1, 1, 128]]*n_repeat)
+
+# Nothing should be changed after this line
+# if flag --automated is used, test_list and do_commit are 
+# overwritten
+
+if args.automated == True:
+    test_list = []
+    n_repeat = 4
+    test_list.extend([['automated_test_1_uniform_rest_32ppc', 1, 16, 8]]*n_repeat)
+    test_list.extend([['automated_test_2_uniform_rest_1ppc',  1, 16, 8]]*n_repeat)
+    test_list.extend([['automated_test_3_uniform_drift_4ppc', 1, 16, 8]]*n_repeat)
+    test_list.extend([['automated_test_4_labdiags_2ppc',      1, 16, 8]]*n_repeat)
+    test_list.extend([['automated_test_5_loadimbalance',      1, 16, 8]]*n_repeat)
+    test_list.extend([['automated_test_6_output_2ppc',        1, 16, 8]]*n_repeat)     
+    do_commit = False
+    run_name = 'automated_tests'
+
+n_tests   = len(test_list)
+if do_commit == True:
+    log_file = 'performance_log.txt'
+
+# Dictionaries
+# compiler names. Used for WarpX executable name
+compiler_name = {'intel': 'intel', 'gnu': 'gcc'}
+# architecture. Used for WarpX executable name
+module_name = {'cpu': 'haswell', 'knl': 'mic-knl'}
+# architecture. Used in batch scripts
+module_Cname = {'cpu': 'haswell', 'knl': 'knl,quad,cache'}
+# Define environment variables
+cwd = os.getcwd() + '/'
+res_dir_base = os.environ['SCRATCH'] + '/performance_warpx/'
+bin_dir = cwd + 'Bin/'
+bin_name = 'perf_tests3d.' + args.compiler + '.' + module_name[args.architecture] + '.TPROF.MPI.OMP.ex'
+log_dir  = cwd
+
+day = time.strftime('%d')
+month = time.strftime('%m')
+year = time.strftime('%Y')
+n_node   = int(args.n_node)
+
+# Initialize tests
+# ----------------
+if args.mode == 'run':
+# Set default options for compilation and execution
+    config_command = ''
+    config_command += 'module unload darshan;' 
+    config_command += 'module load craype-hugepages4M;'
+    if args.architecture == 'knl':
+        if args.compiler == 'intel':
+            config_command += 'module unload PrgEnv-gnu;'
+            config_command += 'module load PrgEnv-intel;'
+        elif args.compiler == 'gnu':
+            config_command += 'module unload PrgEnv-intel;'
+            config_command += 'module load PrgEnv-gnu;'
+        config_command += 'module unload craype-haswell;'
+        config_command += 'module load craype-mic-knl;'
+    elif args.architecture == 'cpu':
+        if args.compiler == 'intel':
+            config_command += 'module unload PrgEnv-gnu;'
+            config_command += 'module load PrgEnv-intel;'
+        elif args.compiler == 'gnu':
+            config_command += 'module unload PrgEnv-intel;'
+            config_command += 'module load PrgEnv-gnu;'
+        config_command += 'module unload craype-mic-knl;'
+        config_command += 'module load craype-haswell;'
+    # Create main result directory if does not exist
+    if not os.path.exists(res_dir_base):
+        os.mkdir(res_dir_base)    
+
+# Recompile if requested
+if args.recompile == True:
+    with open(cwd + 'GNUmakefile_perftest') as makefile_handler:
+        makefile_text = makefile_handler.read()
+    makefile_text = re.sub('\nCOMP.*', '\nCOMP=%s' %compiler_name[args.compiler], makefile_text)
+    with open(cwd + 'GNUmakefile_perftest', 'w') as makefile_handler:
+        makefile_handler.write( makefile_text )
+    os.system(config_command + " make -f GNUmakefile_perftest realclean ; " + " rm -r tmp_build_dir *.mod; make -j 8 -f GNUmakefile_perftest")
+
+# This function runs a batch script with dependencies to perform the analysis 
+# when performance runs are done.
+def process_analysis():
+    dependencies = ''
+    f_log = open(cwd + 'log_jobids_tmp.txt','r')
+    line = f_log.readline()
+    print(line)
+    dependencies += line.split()[3] + ':'
+    batch_string = ''
+    batch_string += '#!/bin/bash\n'
+    batch_string += '#SBATCH --job-name=warpx_1node_read\n'
+    batch_string += '#SBATCH --time=00:05:00\n'
+    batch_string += '#SBATCH -C haswell\n'
+    batch_string += '#SBATCH -N 1\n'
+    batch_string += '#SBATCH -S 4\n'
+    batch_string += '#SBATCH -q regular\n'
+    batch_string += '#SBATCH -e read_error.txt\n'
+    batch_string += '#SBATCH -o read_output.txt\n'
+    batch_string += '#SBATCH --mail-type=end\n'
+    batch_string += '#SBATCH --account=m2852\n'
+    batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + \
+                    args.compiler + ' --architecture=' + args.architecture + \
+                    ' --mode=read' + ' --log_file=' + log_file + \
+                ' --input_file=' + args.input_file
+    if do_commit == True:
+        batch_string += ' --commit'
+    if args.automated == True:
+        batch_string += ' --automated'
+    batch_string += '\n'
+    batch_file = 'slurm_perfread'
+    f_exe = open(batch_file,'w')
+    f_exe.write(batch_string)
+    f_exe.close()
+    os.system('chmod 700 ' + batch_file)
+    os.system('sbatch  --dependency afterok:' + dependencies[0:-1] + ' ' + batch_file)
+    return 0
+ 
+# Loop over the tests and return run time + details
+# -------------------------------------------------
+if args.mode == 'run':
+    # Remove file log_jobids_tmp.txt if exists.
+    # This file contains the jobid of every perf test
+    # It is used to manage the analysis script dependencies
+    if os.path.isfile(cwd + 'log_jobids_tmp.txt'):
+        os.remove(cwd + 'log_jobids_tmp.txt')
+    res_dir = res_dir_base
+    res_dir += '_'.join([run_name, args.compiler,\
+                         args.architecture, str(n_node)]) + '/'
+    # Run the simulation.
+    run_batch_nnode(test_list, res_dir, bin_name, config_command,\
+                    architecture=args.architecture, Cname=module_Cname[args.architecture], \
+                    n_node=n_node)
+    os.chdir(cwd)
+    process_analysis()
+
+if args.mode == 'read':
+    # Create log_file for performance tests if does not exist
+    if not os.path.isfile(log_dir + log_file):
+        log_line = '## year month day input_file compiler architecture n_node n_mpi ' +\
+                   'n_omp time_initialization time_one_iteration Redistribute '+\
+                   'FillBoundary ParallelCopy CurrentDeposition FieldGather '+\
+                   'ParthiclePush Copy EvolveEM Checkpoint '+\
+                   'WriteParticles Write_FabArray '+\
+                   'WriteMultiLevelPlotfile(unit: second) '+\
+                   'RedistributeMPI\n'
+        f_log = open(log_dir + log_file, 'a')
+        f_log.write(log_line)
+        f_log.close()
+    for count, current_run in enumerate(test_list):
+        # Results folder
+        print('read ' + str(current_run))
+        input_file = current_run[0]
+        # Do not read n_node = current_run[1], it is an external parameter
+        n_mpi    = current_run[2]
+        n_omp    = current_run[3]
+        n_steps  = get_nsteps(cwd  + input_file)
+        print('n_steps = ' + str(n_steps))
+        res_dir = res_dir_base
+        res_dir += '_'.join([run_name, args.compiler,\
+                             args.architecture, str(n_node)]) + '/'
+        # Read performance data from the output file
+        output_filename = 'out_' + '_'.join([input_file, str(n_node), str(n_mpi), str(n_omp), str(count)]) + '.txt'
+        timing_list = read_run_perf(res_dir + output_filename, n_steps)
+        # Write performance data to the performance log file
+        log_line = ' '.join([year, month, day, input_file, args.compiler,\
+                             args.architecture, str(n_node), str(n_mpi),\
+                             str(n_omp)] +  timing_list + ['\n'])
+        write_perf_logfile(log_dir + log_file, log_line)
+
+    # Store test parameters fot record
+    dir_record_base = './perf_warpx_record/'
+    if not os.path.exists(dir_record_base):
+        os.mkdir(dir_record_base)
+    count = 0
+    dir_record = dir_record_base + '_'.join([year, month, day]) + '_0'
+    while os.path.exists(dir_record):
+        count += 1
+        dir_record = dir_record[:-1] + str(count)
+    os.mkdir(dir_record)
+    shutil.copy(__file__, dir_record)
+    shutil.copy(log_dir + log_file, dir_record)
+    for count, current_run in enumerate(test_list):
+        shutil.copy(current_run[0], dir_record)
+
+    # Rename directory with precise date for archive purpose
+    res_dir_arch = res_dir_base
+    res_dir_arch += '_'.join([year, month, day, run_name, args.compiler,\
+                              args.architecture, str(n_node)]) + '/'
+    os.rename(res_dir, res_dir_arch)
+
+    # Commit results to the Repo
+    if do_commit == True:
+        os.system('git add ' + log_dir + log_file + ';'\
+                  'git commit -m "performance tests";'\
+                  'git push -u origin master')
+        
+    # Plot file
+    import numpy as np
+    import matplotlib
+    matplotlib.use('Agg')
+    import matplotlib.pyplot as plt
+    filename0 = 'performance_log'
+    filename = filename0 + '.txt'
+    fontsize = 14
+    matplotlib.rcParams.update({'font.size': fontsize})
+    nsteps = 100.
+    nrepeat = 4
+    legends = [ 'n_node', 'n_mpi', 'n_omp', 'time_initialization', 'time_one_iteration', \
+                'Redistribute', 'FillBoundary', 'ParallelCopy', 'CurrentDeposition', \
+                'FieldGather', 'ParthiclePush', 'Copy', 'EvolveEM', 'Checkpoint', \
+                'WriteParticles', 'Write_FabArray', 'WriteMultiLevelPlotfile', \
+                'RedistributeMPI']
+    date = np.loadtxt( filename, usecols = np.arange(0, 3 ))
+    data = np.loadtxt( filename, usecols = np.arange(6, 6+len(legends)) )
+    # Read run name
+    with open(filename) as f:
+        namelist_tmp = zip(*[line.split() for line in f])[3]
+        # Remove first line = comments
+        namelist = list(namelist_tmp[1:])
+    selector_list = ['automated_test_1_uniform_rest_32ppc',\
+                     'automated_test_2_uniform_rest_1ppc',\
+                     'automated_test_3_uniform_drift_4ppc',\
+                     'automated_test_4_labdiags_2ppc',\
+                     'automated_test_5_loadimbalance',\
+                     'automated_test_6_output_2ppc']
+    selector_string = selector_list[0]
+    selector = [idx for idx in range(len(namelist)) if selector_string in namelist[idx]]
+    lin_date = date[:,0]+date[:,1]/12.+date[:,2]/366.
+    unique_lin_date = np.unique(lin_date)
+    my_xticks = unique_lin_date
+#     cmap = plt.get_cmap("tab20")
+    cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
+    for selector_string in selector_list:
+        selector = [idx for idx in range(len(namelist)) if selector_string in namelist[idx]]
+        plt.figure(num=0, figsize=(8,4))
+        plt.clf()
+        plt.title('warpx ' + selector_string)
+        for i in np.arange(data.shape[1]):
+            icolors = i-3
+            if i>3 and (data[selector,i] > 5./100*data[selector,4]).any():
+                plt.plot(lin_date[selector], data[selector,i],'+', ms=6, \
+                         mew=2, label=legends[i] )
+                # plt.plot(lin_date[selector], data[selector,i],'+', ms=6, \
+                #          mew=2, label=legends[i], color=cmap(i) )
+            plt.xlabel('date')
+            plt.ylabel('time/step (s)')
+            plt.grid()
+            plt.legend(loc='best')
+            plt.legend(bbox_to_anchor=(1.1, 1.05))
+            plt.savefig( selector_string + '.pdf', bbox_inches='tight')
+            plt.savefig( selector_string + '.png', bbox_inches='tight')