1 files changed, 85 insertions, 159 deletions
diff --git a/Tools/performance_tests/run_alltests.py b/Tools/performance_tests/run_alltests.py
index 7c02481fb..440da363d 100644
--- a/Tools/performance_tests/run_alltests.py
+++ b/Tools/performance_tests/run_alltests.py
@@ -1,5 +1,6 @@
 import os, sys, shutil
 import argparse, re, time
+from functions_perftest import *
 
 # This script runs automated performance tests for WarpX.
 # It runs tests in list test_list defined below, and write
@@ -9,7 +10,7 @@ import argparse, re, time
 # Before running performance tests, make sure you have the latest version 
 # of performance_log.txt
 # A typical execution reads:
-# > python run_alltests.py --no-recompile --compiler=gnu --architecture=cpu --mode=run --no-commit --log_file='my_performance_log.txt'
+# > python run_alltests.py --no-recompile --compiler=gnu --architecture=cpu --mode=run --log_file='my_performance_log.txt'
 # These are default values, and will give the same result as 
 # > python run_alltests.py
 # To add a new test item, extent the test_list with a line like
@@ -33,9 +34,53 @@ import argparse, re, time
 #         write data into the performance log file
 #         push file performance_log.txt on the repo
 
+# Define the list of tests to run
+# -------------------------------
+# each element of test_list contains
+# [str runname, int n_node, int n_mpi PER NODE, int n_omp]
+test_list = []
+n_repeat = 3
+basename1 = 'uniform_t0.01_'
+
+test_list.extend([[basename1 +  '128',    1, 16, 8]]*n_repeat)
+test_list.extend([[basename1 +  '128',    1, 32, 16]]*n_repeat)
+
+# test_list.extend([[basename1 +  '128',    1, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 +  '256',    8, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 +  '512',   64, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 + '1024',  512, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 + '2048', 4096, 16, 8]]*n_repeat)
+
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_128', 1, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_128', 1, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_128', 1, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_128', 1, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_256', 8, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_256', 8, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_256', 8, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_256', 8, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_512', 64, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_512', 64, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_512', 64, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_512', 64, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_1024', 512, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_1024', 512, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_1024', 512, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_1024', 512, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_2048', 4096, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_2048', 4096, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_2048', 4096, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_2048', 4096, 16, 8]]*n_repeat)
+
+n_tests   = len(test_list)
+
 # Read command-line arguments
 # ---------------------------
-
 # Create parser and read arguments
 parser = argparse.ArgumentParser(
     description='Run performance tests and write results in files')
@@ -115,136 +160,6 @@ if args.recompile == True:
 
 # Define functions to run a test and analyse results
 # --------------------------------------------------
-
-# Run a performance test in an interactive allocation
-def run_interactive(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1):
-    # Clean res_dir
-    if os.path.exists(res_dir):
-        shutil.rmtree(res_dir)
-    os.makedirs(res_dir)
-    # Copy files to res_dir
-    shutil.copyfile(bin_dir + bin_name, res_dir + bin_name)
-    shutil.copyfile(cwd  + run_name, res_dir + 'inputs')
-    os.chdir(res_dir)
-    if args.architecture == 'cpu':
-        cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives
-        exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
-                       'srun --cpu_bind=cores '     + \
-                       ' -n ' + str(n_node*n_mpi) + \
-                       ' -c ' + str(cflag_value)   + \
-                       ' ./'  + bin_name + ' inputs > perf_output.txt'
-    elif args.architecture == 'knl':
-        # number of logical cores per MPI process
-        cflag_value = max(1,int(68/n_mpi) * 4) # Follow NERSC directives
-        exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
-                       'srun --cpu_bind=cores '     + \
-                       ' -n ' + str(n_node*n_mpi) + \
-                       ' -c ' + str(cflag_value)   + \
-                       ' ./'  + bin_name + ' inputs > perf_output.txt'
-    os.system('chmod 700 ' + bin_name)
-    os.system(config_command + exec_command)
-    return 0
-
-def run_batch(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1):
-    # Clean res_dir
-    if os.path.exists(res_dir):
-        shutil.rmtree(res_dir)
-    os.makedirs(res_dir)
-    # Copy files to res_dir
-    shutil.copyfile(bin_dir + bin_name, res_dir + bin_name)
-    shutil.copyfile(cwd + run_name, res_dir + 'inputs')
-    os.chdir(res_dir)
-    batch_string = ''
-    batch_string += '#!/bin/bash\n'
-    batch_string += '#SBATCH --job-name=' + run_name + str(n_node) + str(n_mpi) + str(n_omp) + '\n'
-    batch_string += '#SBATCH --time=00:30:00\n'
-    batch_string += '#SBATCH -C ' + module_Cname[args.architecture] + '\n'
-    batch_string += '#SBATCH -N ' + str(n_node) + '\n'
-    batch_string += '#SBATCH --partition=regular\n'
-    batch_string += '#SBATCH --qos=normal\n'
-    batch_string += '#SBATCH -e error.txt\n'
-    batch_string += '#SBATCH --account=m2852\n'
-    batch_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n'
-    if args.architecture == 'cpu':
-        cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives
-        batch_string += 'srun --cpu_bind=cores '+ \
-                    ' -n ' + str(n_node*n_mpi) + \
-                    ' -c ' + str(cflag_value)   + \
-                    ' ./'  + bin_name + ' inputs > perf_output.txt'
-    elif args.architecture == 'knl':
-        # number of logical cores per MPI process
-        cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives
-        batch_string += 'srun --cpu_bind=cores '     + \
-                        ' -n ' + str(n_node*n_mpi) + \
-                        ' -c ' + str(cflag_value)   + \
-                        ' ./'  + bin_name + ' inputs > perf_output.txt\n'
-    batch_file = 'slurm'
-    f_exe = open(batch_file,'w')
-    f_exe.write(batch_string)
-    f_exe.close()
-    os.system('chmod 700 ' + bin_name)
-    os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt')
-    return 0
-
-# Read output file and return init time and 1-step time
-def read_run_perf(filename):
-    timing_list = []
-    # Search inclusive time to get simulation step time
-    partition_limit = 'NCalls  Incl. Min  Incl. Avg  Incl. Max   Max %'
-    with open(filename) as file_handler:
-        output_text = file_handler.read()
-    # Get total simulation time
-    line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text)
-    total_time = float(line_match_totaltime.group(0).split()[8])
-    search_area = output_text.partition(partition_limit)[2]
-    line_match_looptime = re.search('\nWarpX::Evolve().*', search_area)
-    time_wo_initialization = float(line_match_looptime.group(0).split()[3])
-    timing_list += [str(total_time - time_wo_initialization)]
-    timing_list += [str(time_wo_initialization/n_steps)]
-    # Search exclusive time to get routines timing
-    partition_limit1 = 'NCalls  Excl. Min  Excl. Avg  Excl. Max   Max %'
-    partition_limit2 = 'NCalls  Incl. Min  Incl. Avg  Incl. Max   Max %'
-    file_handler.close()
-    with open(filename) as file_handler:
-        output_text = file_handler.read()
-    search_area = output_text.partition(partition_limit1)[2].partition(partition_limit2)[0]
-    pattern_list = ['\nParticleContainer::Redistribute().*',\
-                    '\nFabArray::FillBoundary().*',\
-                    '\nFabArray::ParallelCopy().*',\
-                    '\nPICSAR::CurrentDeposition.*',\
-                    '\nPICSAR::FieldGather.*',\
-                    '\nPICSAR::ParticlePush.*',\
-                    '\nPPC::Evolve::Copy.*',\
-                    '\nWarpX::EvolveEM().*',\
-                    'NArrayInt>::Checkpoint().*',\
-                    'NArrayInt>::WriteParticles().*',\
-                    '\nVisMF::Write_FabArray.*',\
-                    '\nWriteMultiLevelPlotfile().*']
-    for pattern in pattern_list:
-        timing = '0'
-        line_match = re.search(pattern, search_area)
-        if line_match is not None:
-            timing = [str(float(line_match.group(0).split()[3])/n_steps)]
-        timing_list += timing
-    return timing_list
-
-# Write time into logfile
-def write_perf_logfile(log_file):
-    log_line = ' '.join([year, month, day, run_name, args.compiler,\
-                         args.architecture, str(n_node), str(n_mpi),\
-                         str(n_omp)] +  timing_list + ['\n'])
-    f_log = open(log_file, 'a')
-    f_log.write(log_line)
-    f_log.close()
-    return 0
-
-def get_nsteps(runname):
-    with open(runname) as file_handler:
-        runname_text = file_handler.read()
-    line_match_nsteps = re.search('\nmax_step.*', runname_text)
-    nsteps = float(line_match_nsteps.group(0).split()[2])
-    return nsteps
-
 def process_analysis():
     dependencies = ''
     f_log = open(cwd + 'log_jobids_tmp.txt','r')
@@ -254,18 +169,19 @@ def process_analysis():
         dependencies += line.split()[3] + ':'
     batch_string = ''
     batch_string += '#!/bin/bash\n'
-    batch_string += '#SBATCH --job-name=perftests_read\n'
+    batch_string += '#SBATCH --job-name=warpx_read\n'
     batch_string += '#SBATCH --time=00:05:00\n'
     batch_string += '#SBATCH -C ' + module_Cname[args.architecture] + '\n'
     batch_string += '#SBATCH -N 1\n'
     batch_string += '#SBATCH -S 4\n'
-    batch_string += '#SBATCH --partition=regular\n'
-    batch_string += '#SBATCH --qos=normal\n'
+    batch_string += '#SBATCH -q regular\n'
     batch_string += '#SBATCH -e read_error.txt\n'
     batch_string += '#SBATCH -o read_output.txt\n'
     batch_string += '#SBATCH --mail-type=end\n'
     batch_string += '#SBATCH --account=m2852\n'
-    batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + args.compiler + ' --architecture=' + args.architecture + ' --mode=read' + ' --log_file=' + log_file
+    batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + \
+                    args.compiler + ' --architecture=' + args.architecture + \
+                    ' --mode=read' + ' --log_file=' + log_file
     if args.commit == True:
         batch_string += ' --commit'
     batch_string += '\n'
@@ -279,20 +195,6 @@ def process_analysis():
  
 # Loop over the tests and return run time + details
 # -------------------------------------------------
-
-# each element of test_list contains
-# [str runname, int n_node, int n_mpi PER NODE, int n_omp]
-
-test_list = []
-n_repeat = 1
-filename1 = 'uniform_plasma'
-
-test_list.extend([[filename1, 1, 8, 16]]*3)
-test_list.extend([[filename1, 1, 4, 32]]*3)
-test_list.extend([[filename1, 2, 4, 32]]*3)
-
-n_tests   = len(test_list)
-
 if args.mode == 'run':
     # Remove file log_jobids_tmp.txt if exists.
     # This file contains the jobid of every perf test
@@ -308,13 +210,14 @@ if args.mode == 'run':
         n_omp    = current_run[3]
         n_steps  = get_nsteps(cwd + run_name)
         res_dir = res_dir_base
-        res_dir += '_'.join([year, month, day, run_name, args.compiler,\
+        res_dir += '_'.join([run_name, args.compiler,\
                          args.architecture, str(n_node), str(n_mpi),\
-                         str(n_omp)]) + '/'
+                             str(n_omp), str(count)]) + '/'
         # Run the simulation.
         # If you are currently in an interactive session and want to run interactive,
         # just replace run_batch with run_interactive
-        run_batch(run_name, res_dir, n_node=n_node, n_mpi=n_mpi, n_omp=n_omp)
+        run_batch(run_name, res_dir, bin_name, config_command, architecture=args.architecture, \
+                  Cname=module_Cname[args.architecture], n_node=n_node, n_mpi=n_mpi, n_omp=n_omp)
     os.chdir(cwd)
     process_analysis()
 
@@ -326,7 +229,8 @@ if args.mode == 'read':
                    'FillBoundary ParallelCopy CurrentDeposition FieldGather '+\
                    'ParthiclePush Copy EvolveEM Checkpoint '+\
                    'WriteParticles Write_FabArray '+\
-                   'WriteMultiLevelPlotfile(unit: second)\n'
+                   'WriteMultiLevelPlotfile '+\
+                   'RedistributeMPI(unit: second)\n'
         f_log = open(log_dir + log_file, 'a')
         f_log.write(log_line)
         f_log.close()
@@ -340,13 +244,20 @@ if args.mode == 'read':
         n_steps  = get_nsteps(cwd  + run_name)
         print('n_steps = ' + str(n_steps))
         res_dir = res_dir_base
-        res_dir += '_'.join([year, month, day, run_name, args.compiler,\
+        res_dir += '_'.join([run_name, args.compiler,\
                              args.architecture, str(n_node), str(n_mpi),\
-                             str(n_omp)]) + '/'
-        # Read performance data from the output file
-        timing_list = read_run_perf(res_dir + 'perf_output.txt')
+                             str(n_omp), str(count)]) + '/'
+#        res_dir += '_'.join([year, month, '25', run_name, args.compiler,\
+#                             args.architecture, str(n_node), str(n_mpi), \
+#                             str(n_omp)]) + '/'
+      # Read performance data from the output file
+        output_filename = 'perf_output.txt'
+        timing_list = read_run_perf(res_dir + output_filename, n_steps)
         # Write performance data to the performance log file
-        write_perf_logfile(log_dir + log_file)
+        log_line = ' '.join([year, month, day, run_name, args.compiler,\
+                             args.architecture, str(n_node), str(n_mpi),\
+                             str(n_omp)] +  timing_list + ['\n'])
+        write_perf_logfile(log_dir + log_file, log_line)
 
     # Store test parameters fot record
     dir_record_base = './perf_warpx_record/'
@@ -363,6 +274,21 @@ if args.mode == 'read':
     for count, current_run in enumerate(test_list):
         shutil.copy(current_run[0], dir_record)
 
+    for count, current_run in enumerate(test_list):
+        run_name = current_run[0]
+        n_node   = current_run[1]
+        n_mpi    = current_run[2]
+        n_omp    = current_run[3]
+        res_dir = res_dir_base
+        res_dir += '_'.join([run_name, args.compiler,\
+                             args.architecture, str(n_node), str(n_mpi),\
+                             str(n_omp), str(count)]) + '/'
+        res_dir_arch = res_dir_base
+        res_dir_arch += '_'.join([year, month, day, run_name, args.compiler,\
+                                  args.architecture, str(n_node), str(n_mpi), \
+                                  str(n_omp), str(count)]) + '/'
+        os.rename(res_dir, res_dir_arch)
+
     # Commit results to the Repo
     if args.commit == True:
         os.system('git add ' + log_dir + log_file + ';'\