aboutsummaryrefslogtreecommitdiff
path: root/Tools/performance_tests/run_alltests.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/performance_tests/run_alltests.py')
-rw-r--r--Tools/performance_tests/run_alltests.py244
1 files changed, 85 insertions, 159 deletions
diff --git a/Tools/performance_tests/run_alltests.py b/Tools/performance_tests/run_alltests.py
index 7c02481fb..440da363d 100644
--- a/Tools/performance_tests/run_alltests.py
+++ b/Tools/performance_tests/run_alltests.py
@@ -1,5 +1,6 @@
import os, sys, shutil
import argparse, re, time
+from functions_perftest import *
# This script runs automated performance tests for WarpX.
# It runs tests in list test_list defined below, and write
@@ -9,7 +10,7 @@ import argparse, re, time
# Before running performance tests, make sure you have the latest version
# of performance_log.txt
# A typical execution reads:
-# > python run_alltests.py --no-recompile --compiler=gnu --architecture=cpu --mode=run --no-commit --log_file='my_performance_log.txt'
+# > python run_alltests.py --no-recompile --compiler=gnu --architecture=cpu --mode=run --log_file='my_performance_log.txt'
# These are default values, and will give the same result as
# > python run_alltests.py
# To add a new test item, extent the test_list with a line like
@@ -33,9 +34,53 @@ import argparse, re, time
# write data into the performance log file
# push file performance_log.txt on the repo
+# Define the list of tests to run
+# -------------------------------
+# each element of test_list contains
+# [str runname, int n_node, int n_mpi PER NODE, int n_omp]
+test_list = []
+n_repeat = 3
+basename1 = 'uniform_t0.01_'
+
+test_list.extend([[basename1 + '128', 1, 16, 8]]*n_repeat)
+test_list.extend([[basename1 + '128', 1, 32, 16]]*n_repeat)
+
+# test_list.extend([[basename1 + '128', 1, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 + '256', 8, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 + '512', 64, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 + '1024', 512, 16, 8]]*n_repeat)
+# test_list.extend([[basename1 + '2048', 4096, 16, 8]]*n_repeat)
+
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_128', 1, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_128', 1, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_128', 1, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_128', 1, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_256', 8, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_256', 8, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_256', 8, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_256', 8, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_512', 64, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_512', 64, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_512', 64, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_512', 64, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_1024', 512, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_1024', 512, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_1024', 512, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_1024', 512, 16, 8]]*n_repeat)
+
+# test_list.extend([['uniform_t0.01_direct1_1ppc_2048', 4096, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_direct3_1ppc_2048', 4096, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk1_1ppc_2048', 4096, 16, 8]]*n_repeat)
+# test_list.extend([['uniform_t0.01_esirk3_1ppc_2048', 4096, 16, 8]]*n_repeat)
+
+n_tests = len(test_list)
+
# Read command-line arguments
# ---------------------------
-
# Create parser and read arguments
parser = argparse.ArgumentParser(
description='Run performance tests and write results in files')
@@ -115,136 +160,6 @@ if args.recompile == True:
# Define functions to run a test and analyse results
# --------------------------------------------------
-
-# Run a performance test in an interactive allocation
-def run_interactive(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1):
- # Clean res_dir
- if os.path.exists(res_dir):
- shutil.rmtree(res_dir)
- os.makedirs(res_dir)
- # Copy files to res_dir
- shutil.copyfile(bin_dir + bin_name, res_dir + bin_name)
- shutil.copyfile(cwd + run_name, res_dir + 'inputs')
- os.chdir(res_dir)
- if args.architecture == 'cpu':
- cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives
- exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
- 'srun --cpu_bind=cores ' + \
- ' -n ' + str(n_node*n_mpi) + \
- ' -c ' + str(cflag_value) + \
- ' ./' + bin_name + ' inputs > perf_output.txt'
- elif args.architecture == 'knl':
- # number of logical cores per MPI process
- cflag_value = max(1,int(68/n_mpi) * 4) # Follow NERSC directives
- exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
- 'srun --cpu_bind=cores ' + \
- ' -n ' + str(n_node*n_mpi) + \
- ' -c ' + str(cflag_value) + \
- ' ./' + bin_name + ' inputs > perf_output.txt'
- os.system('chmod 700 ' + bin_name)
- os.system(config_command + exec_command)
- return 0
-
-def run_batch(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1):
- # Clean res_dir
- if os.path.exists(res_dir):
- shutil.rmtree(res_dir)
- os.makedirs(res_dir)
- # Copy files to res_dir
- shutil.copyfile(bin_dir + bin_name, res_dir + bin_name)
- shutil.copyfile(cwd + run_name, res_dir + 'inputs')
- os.chdir(res_dir)
- batch_string = ''
- batch_string += '#!/bin/bash\n'
- batch_string += '#SBATCH --job-name=' + run_name + str(n_node) + str(n_mpi) + str(n_omp) + '\n'
- batch_string += '#SBATCH --time=00:30:00\n'
- batch_string += '#SBATCH -C ' + module_Cname[args.architecture] + '\n'
- batch_string += '#SBATCH -N ' + str(n_node) + '\n'
- batch_string += '#SBATCH --partition=regular\n'
- batch_string += '#SBATCH --qos=normal\n'
- batch_string += '#SBATCH -e error.txt\n'
- batch_string += '#SBATCH --account=m2852\n'
- batch_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n'
- if args.architecture == 'cpu':
- cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives
- batch_string += 'srun --cpu_bind=cores '+ \
- ' -n ' + str(n_node*n_mpi) + \
- ' -c ' + str(cflag_value) + \
- ' ./' + bin_name + ' inputs > perf_output.txt'
- elif args.architecture == 'knl':
- # number of logical cores per MPI process
- cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives
- batch_string += 'srun --cpu_bind=cores ' + \
- ' -n ' + str(n_node*n_mpi) + \
- ' -c ' + str(cflag_value) + \
- ' ./' + bin_name + ' inputs > perf_output.txt\n'
- batch_file = 'slurm'
- f_exe = open(batch_file,'w')
- f_exe.write(batch_string)
- f_exe.close()
- os.system('chmod 700 ' + bin_name)
- os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt')
- return 0
-
-# Read output file and return init time and 1-step time
-def read_run_perf(filename):
- timing_list = []
- # Search inclusive time to get simulation step time
- partition_limit = 'NCalls Incl. Min Incl. Avg Incl. Max Max %'
- with open(filename) as file_handler:
- output_text = file_handler.read()
- # Get total simulation time
- line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text)
- total_time = float(line_match_totaltime.group(0).split()[8])
- search_area = output_text.partition(partition_limit)[2]
- line_match_looptime = re.search('\nWarpX::Evolve().*', search_area)
- time_wo_initialization = float(line_match_looptime.group(0).split()[3])
- timing_list += [str(total_time - time_wo_initialization)]
- timing_list += [str(time_wo_initialization/n_steps)]
- # Search exclusive time to get routines timing
- partition_limit1 = 'NCalls Excl. Min Excl. Avg Excl. Max Max %'
- partition_limit2 = 'NCalls Incl. Min Incl. Avg Incl. Max Max %'
- file_handler.close()
- with open(filename) as file_handler:
- output_text = file_handler.read()
- search_area = output_text.partition(partition_limit1)[2].partition(partition_limit2)[0]
- pattern_list = ['\nParticleContainer::Redistribute().*',\
- '\nFabArray::FillBoundary().*',\
- '\nFabArray::ParallelCopy().*',\
- '\nPICSAR::CurrentDeposition.*',\
- '\nPICSAR::FieldGather.*',\
- '\nPICSAR::ParticlePush.*',\
- '\nPPC::Evolve::Copy.*',\
- '\nWarpX::EvolveEM().*',\
- 'NArrayInt>::Checkpoint().*',\
- 'NArrayInt>::WriteParticles().*',\
- '\nVisMF::Write_FabArray.*',\
- '\nWriteMultiLevelPlotfile().*']
- for pattern in pattern_list:
- timing = '0'
- line_match = re.search(pattern, search_area)
- if line_match is not None:
- timing = [str(float(line_match.group(0).split()[3])/n_steps)]
- timing_list += timing
- return timing_list
-
-# Write time into logfile
-def write_perf_logfile(log_file):
- log_line = ' '.join([year, month, day, run_name, args.compiler,\
- args.architecture, str(n_node), str(n_mpi),\
- str(n_omp)] + timing_list + ['\n'])
- f_log = open(log_file, 'a')
- f_log.write(log_line)
- f_log.close()
- return 0
-
-def get_nsteps(runname):
- with open(runname) as file_handler:
- runname_text = file_handler.read()
- line_match_nsteps = re.search('\nmax_step.*', runname_text)
- nsteps = float(line_match_nsteps.group(0).split()[2])
- return nsteps
-
def process_analysis():
dependencies = ''
f_log = open(cwd + 'log_jobids_tmp.txt','r')
@@ -254,18 +169,19 @@ def process_analysis():
dependencies += line.split()[3] + ':'
batch_string = ''
batch_string += '#!/bin/bash\n'
- batch_string += '#SBATCH --job-name=perftests_read\n'
+ batch_string += '#SBATCH --job-name=warpx_read\n'
batch_string += '#SBATCH --time=00:05:00\n'
batch_string += '#SBATCH -C ' + module_Cname[args.architecture] + '\n'
batch_string += '#SBATCH -N 1\n'
batch_string += '#SBATCH -S 4\n'
- batch_string += '#SBATCH --partition=regular\n'
- batch_string += '#SBATCH --qos=normal\n'
+ batch_string += '#SBATCH -q regular\n'
batch_string += '#SBATCH -e read_error.txt\n'
batch_string += '#SBATCH -o read_output.txt\n'
batch_string += '#SBATCH --mail-type=end\n'
batch_string += '#SBATCH --account=m2852\n'
- batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + args.compiler + ' --architecture=' + args.architecture + ' --mode=read' + ' --log_file=' + log_file
+ batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + \
+ args.compiler + ' --architecture=' + args.architecture + \
+ ' --mode=read' + ' --log_file=' + log_file
if args.commit == True:
batch_string += ' --commit'
batch_string += '\n'
@@ -279,20 +195,6 @@ def process_analysis():
# Loop over the tests and return run time + details
# -------------------------------------------------
-
-# each element of test_list contains
-# [str runname, int n_node, int n_mpi PER NODE, int n_omp]
-
-test_list = []
-n_repeat = 1
-filename1 = 'uniform_plasma'
-
-test_list.extend([[filename1, 1, 8, 16]]*3)
-test_list.extend([[filename1, 1, 4, 32]]*3)
-test_list.extend([[filename1, 2, 4, 32]]*3)
-
-n_tests = len(test_list)
-
if args.mode == 'run':
# Remove file log_jobids_tmp.txt if exists.
# This file contains the jobid of every perf test
@@ -308,13 +210,14 @@ if args.mode == 'run':
n_omp = current_run[3]
n_steps = get_nsteps(cwd + run_name)
res_dir = res_dir_base
- res_dir += '_'.join([year, month, day, run_name, args.compiler,\
+ res_dir += '_'.join([run_name, args.compiler,\
args.architecture, str(n_node), str(n_mpi),\
- str(n_omp)]) + '/'
+ str(n_omp), str(count)]) + '/'
# Run the simulation.
# If you are currently in an interactive session and want to run interactive,
# just replace run_batch with run_interactive
- run_batch(run_name, res_dir, n_node=n_node, n_mpi=n_mpi, n_omp=n_omp)
+ run_batch(run_name, res_dir, bin_name, config_command, architecture=args.architecture, \
+ Cname=module_Cname[args.architecture], n_node=n_node, n_mpi=n_mpi, n_omp=n_omp)
os.chdir(cwd)
process_analysis()
@@ -326,7 +229,8 @@ if args.mode == 'read':
'FillBoundary ParallelCopy CurrentDeposition FieldGather '+\
'ParthiclePush Copy EvolveEM Checkpoint '+\
'WriteParticles Write_FabArray '+\
- 'WriteMultiLevelPlotfile(unit: second)\n'
+ 'WriteMultiLevelPlotfile '+\
+ 'RedistributeMPI(unit: second)\n'
f_log = open(log_dir + log_file, 'a')
f_log.write(log_line)
f_log.close()
@@ -340,13 +244,20 @@ if args.mode == 'read':
n_steps = get_nsteps(cwd + run_name)
print('n_steps = ' + str(n_steps))
res_dir = res_dir_base
- res_dir += '_'.join([year, month, day, run_name, args.compiler,\
+ res_dir += '_'.join([run_name, args.compiler,\
args.architecture, str(n_node), str(n_mpi),\
- str(n_omp)]) + '/'
- # Read performance data from the output file
- timing_list = read_run_perf(res_dir + 'perf_output.txt')
+ str(n_omp), str(count)]) + '/'
+# res_dir += '_'.join([year, month, '25', run_name, args.compiler,\
+# args.architecture, str(n_node), str(n_mpi), \
+# str(n_omp)]) + '/'
+ # Read performance data from the output file
+ output_filename = 'perf_output.txt'
+ timing_list = read_run_perf(res_dir + output_filename, n_steps)
# Write performance data to the performance log file
- write_perf_logfile(log_dir + log_file)
+ log_line = ' '.join([year, month, day, run_name, args.compiler,\
+ args.architecture, str(n_node), str(n_mpi),\
+ str(n_omp)] + timing_list + ['\n'])
+ write_perf_logfile(log_dir + log_file, log_line)
# Store test parameters fot record
dir_record_base = './perf_warpx_record/'
@@ -363,6 +274,21 @@ if args.mode == 'read':
for count, current_run in enumerate(test_list):
shutil.copy(current_run[0], dir_record)
+ for count, current_run in enumerate(test_list):
+ run_name = current_run[0]
+ n_node = current_run[1]
+ n_mpi = current_run[2]
+ n_omp = current_run[3]
+ res_dir = res_dir_base
+ res_dir += '_'.join([run_name, args.compiler,\
+ args.architecture, str(n_node), str(n_mpi),\
+ str(n_omp), str(count)]) + '/'
+ res_dir_arch = res_dir_base
+ res_dir_arch += '_'.join([year, month, day, run_name, args.compiler,\
+ args.architecture, str(n_node), str(n_mpi), \
+ str(n_omp), str(count)]) + '/'
+ os.rename(res_dir, res_dir_arch)
+
# Commit results to the Repo
if args.commit == True:
os.system('git add ' + log_dir + log_file + ';'\