diff options
Diffstat (limited to 'Tools/performance_tests/run_alltests.py')
-rw-r--r-- | Tools/performance_tests/run_alltests.py | 244 |
1 files changed, 85 insertions, 159 deletions
diff --git a/Tools/performance_tests/run_alltests.py b/Tools/performance_tests/run_alltests.py index 7c02481fb..440da363d 100644 --- a/Tools/performance_tests/run_alltests.py +++ b/Tools/performance_tests/run_alltests.py @@ -1,5 +1,6 @@ import os, sys, shutil import argparse, re, time +from functions_perftest import * # This script runs automated performance tests for WarpX. # It runs tests in list test_list defined below, and write @@ -9,7 +10,7 @@ import argparse, re, time # Before running performance tests, make sure you have the latest version # of performance_log.txt # A typical execution reads: -# > python run_alltests.py --no-recompile --compiler=gnu --architecture=cpu --mode=run --no-commit --log_file='my_performance_log.txt' +# > python run_alltests.py --no-recompile --compiler=gnu --architecture=cpu --mode=run --log_file='my_performance_log.txt' # These are default values, and will give the same result as # > python run_alltests.py # To add a new test item, extent the test_list with a line like @@ -33,9 +34,53 @@ import argparse, re, time # write data into the performance log file # push file performance_log.txt on the repo +# Define the list of tests to run +# ------------------------------- +# each element of test_list contains +# [str runname, int n_node, int n_mpi PER NODE, int n_omp] +test_list = [] +n_repeat = 3 +basename1 = 'uniform_t0.01_' + +test_list.extend([[basename1 + '128', 1, 16, 8]]*n_repeat) +test_list.extend([[basename1 + '128', 1, 32, 16]]*n_repeat) + +# test_list.extend([[basename1 + '128', 1, 16, 8]]*n_repeat) +# test_list.extend([[basename1 + '256', 8, 16, 8]]*n_repeat) +# test_list.extend([[basename1 + '512', 64, 16, 8]]*n_repeat) +# test_list.extend([[basename1 + '1024', 512, 16, 8]]*n_repeat) +# test_list.extend([[basename1 + '2048', 4096, 16, 8]]*n_repeat) + + +# test_list.extend([['uniform_t0.01_direct1_1ppc_128', 1, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_direct3_1ppc_128', 1, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk1_1ppc_128', 1, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk3_1ppc_128', 1, 16, 8]]*n_repeat) + +# test_list.extend([['uniform_t0.01_direct1_1ppc_256', 8, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_direct3_1ppc_256', 8, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk1_1ppc_256', 8, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk3_1ppc_256', 8, 16, 8]]*n_repeat) + +# test_list.extend([['uniform_t0.01_direct1_1ppc_512', 64, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_direct3_1ppc_512', 64, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk1_1ppc_512', 64, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk3_1ppc_512', 64, 16, 8]]*n_repeat) + +# test_list.extend([['uniform_t0.01_direct1_1ppc_1024', 512, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_direct3_1ppc_1024', 512, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk1_1ppc_1024', 512, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk3_1ppc_1024', 512, 16, 8]]*n_repeat) + +# test_list.extend([['uniform_t0.01_direct1_1ppc_2048', 4096, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_direct3_1ppc_2048', 4096, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk1_1ppc_2048', 4096, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk3_1ppc_2048', 4096, 16, 8]]*n_repeat) + +n_tests = len(test_list) + # Read command-line arguments # --------------------------- - # Create parser and read arguments parser = argparse.ArgumentParser( description='Run performance tests and write results in files') @@ -115,136 +160,6 @@ if args.recompile == True: # Define functions to run a test and analyse results # -------------------------------------------------- - -# Run a performance test in an interactive allocation -def run_interactive(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1): - # Clean res_dir - if os.path.exists(res_dir): - shutil.rmtree(res_dir) - os.makedirs(res_dir) - # Copy files to res_dir - shutil.copyfile(bin_dir + bin_name, res_dir + bin_name) - shutil.copyfile(cwd + run_name, res_dir + 'inputs') - os.chdir(res_dir) - if args.architecture == 'cpu': - cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives - exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\ - 'srun --cpu_bind=cores ' + \ - ' -n ' + str(n_node*n_mpi) + \ - ' -c ' + str(cflag_value) + \ - ' ./' + bin_name + ' inputs > perf_output.txt' - elif args.architecture == 'knl': - # number of logical cores per MPI process - cflag_value = max(1,int(68/n_mpi) * 4) # Follow NERSC directives - exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\ - 'srun --cpu_bind=cores ' + \ - ' -n ' + str(n_node*n_mpi) + \ - ' -c ' + str(cflag_value) + \ - ' ./' + bin_name + ' inputs > perf_output.txt' - os.system('chmod 700 ' + bin_name) - os.system(config_command + exec_command) - return 0 - -def run_batch(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1): - # Clean res_dir - if os.path.exists(res_dir): - shutil.rmtree(res_dir) - os.makedirs(res_dir) - # Copy files to res_dir - shutil.copyfile(bin_dir + bin_name, res_dir + bin_name) - shutil.copyfile(cwd + run_name, res_dir + 'inputs') - os.chdir(res_dir) - batch_string = '' - batch_string += '#!/bin/bash\n' - batch_string += '#SBATCH --job-name=' + run_name + str(n_node) + str(n_mpi) + str(n_omp) + '\n' - batch_string += '#SBATCH --time=00:30:00\n' - batch_string += '#SBATCH -C ' + module_Cname[args.architecture] + '\n' - batch_string += '#SBATCH -N ' + str(n_node) + '\n' - batch_string += '#SBATCH --partition=regular\n' - batch_string += '#SBATCH --qos=normal\n' - batch_string += '#SBATCH -e error.txt\n' - batch_string += '#SBATCH --account=m2852\n' - batch_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n' - if args.architecture == 'cpu': - cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives - batch_string += 'srun --cpu_bind=cores '+ \ - ' -n ' + str(n_node*n_mpi) + \ - ' -c ' + str(cflag_value) + \ - ' ./' + bin_name + ' inputs > perf_output.txt' - elif args.architecture == 'knl': - # number of logical cores per MPI process - cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives - batch_string += 'srun --cpu_bind=cores ' + \ - ' -n ' + str(n_node*n_mpi) + \ - ' -c ' + str(cflag_value) + \ - ' ./' + bin_name + ' inputs > perf_output.txt\n' - batch_file = 'slurm' - f_exe = open(batch_file,'w') - f_exe.write(batch_string) - f_exe.close() - os.system('chmod 700 ' + bin_name) - os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt') - return 0 - -# Read output file and return init time and 1-step time -def read_run_perf(filename): - timing_list = [] - # Search inclusive time to get simulation step time - partition_limit = 'NCalls Incl. Min Incl. Avg Incl. Max Max %' - with open(filename) as file_handler: - output_text = file_handler.read() - # Get total simulation time - line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text) - total_time = float(line_match_totaltime.group(0).split()[8]) - search_area = output_text.partition(partition_limit)[2] - line_match_looptime = re.search('\nWarpX::Evolve().*', search_area) - time_wo_initialization = float(line_match_looptime.group(0).split()[3]) - timing_list += [str(total_time - time_wo_initialization)] - timing_list += [str(time_wo_initialization/n_steps)] - # Search exclusive time to get routines timing - partition_limit1 = 'NCalls Excl. Min Excl. Avg Excl. Max Max %' - partition_limit2 = 'NCalls Incl. Min Incl. Avg Incl. Max Max %' - file_handler.close() - with open(filename) as file_handler: - output_text = file_handler.read() - search_area = output_text.partition(partition_limit1)[2].partition(partition_limit2)[0] - pattern_list = ['\nParticleContainer::Redistribute().*',\ - '\nFabArray::FillBoundary().*',\ - '\nFabArray::ParallelCopy().*',\ - '\nPICSAR::CurrentDeposition.*',\ - '\nPICSAR::FieldGather.*',\ - '\nPICSAR::ParticlePush.*',\ - '\nPPC::Evolve::Copy.*',\ - '\nWarpX::EvolveEM().*',\ - 'NArrayInt>::Checkpoint().*',\ - 'NArrayInt>::WriteParticles().*',\ - '\nVisMF::Write_FabArray.*',\ - '\nWriteMultiLevelPlotfile().*'] - for pattern in pattern_list: - timing = '0' - line_match = re.search(pattern, search_area) - if line_match is not None: - timing = [str(float(line_match.group(0).split()[3])/n_steps)] - timing_list += timing - return timing_list - -# Write time into logfile -def write_perf_logfile(log_file): - log_line = ' '.join([year, month, day, run_name, args.compiler,\ - args.architecture, str(n_node), str(n_mpi),\ - str(n_omp)] + timing_list + ['\n']) - f_log = open(log_file, 'a') - f_log.write(log_line) - f_log.close() - return 0 - -def get_nsteps(runname): - with open(runname) as file_handler: - runname_text = file_handler.read() - line_match_nsteps = re.search('\nmax_step.*', runname_text) - nsteps = float(line_match_nsteps.group(0).split()[2]) - return nsteps - def process_analysis(): dependencies = '' f_log = open(cwd + 'log_jobids_tmp.txt','r') @@ -254,18 +169,19 @@ def process_analysis(): dependencies += line.split()[3] + ':' batch_string = '' batch_string += '#!/bin/bash\n' - batch_string += '#SBATCH --job-name=perftests_read\n' + batch_string += '#SBATCH --job-name=warpx_read\n' batch_string += '#SBATCH --time=00:05:00\n' batch_string += '#SBATCH -C ' + module_Cname[args.architecture] + '\n' batch_string += '#SBATCH -N 1\n' batch_string += '#SBATCH -S 4\n' - batch_string += '#SBATCH --partition=regular\n' - batch_string += '#SBATCH --qos=normal\n' + batch_string += '#SBATCH -q regular\n' batch_string += '#SBATCH -e read_error.txt\n' batch_string += '#SBATCH -o read_output.txt\n' batch_string += '#SBATCH --mail-type=end\n' batch_string += '#SBATCH --account=m2852\n' - batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + args.compiler + ' --architecture=' + args.architecture + ' --mode=read' + ' --log_file=' + log_file + batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + \ + args.compiler + ' --architecture=' + args.architecture + \ + ' --mode=read' + ' --log_file=' + log_file if args.commit == True: batch_string += ' --commit' batch_string += '\n' @@ -279,20 +195,6 @@ def process_analysis(): # Loop over the tests and return run time + details # ------------------------------------------------- - -# each element of test_list contains -# [str runname, int n_node, int n_mpi PER NODE, int n_omp] - -test_list = [] -n_repeat = 1 -filename1 = 'uniform_plasma' - -test_list.extend([[filename1, 1, 8, 16]]*3) -test_list.extend([[filename1, 1, 4, 32]]*3) -test_list.extend([[filename1, 2, 4, 32]]*3) - -n_tests = len(test_list) - if args.mode == 'run': # Remove file log_jobids_tmp.txt if exists. # This file contains the jobid of every perf test @@ -308,13 +210,14 @@ if args.mode == 'run': n_omp = current_run[3] n_steps = get_nsteps(cwd + run_name) res_dir = res_dir_base - res_dir += '_'.join([year, month, day, run_name, args.compiler,\ + res_dir += '_'.join([run_name, args.compiler,\ args.architecture, str(n_node), str(n_mpi),\ - str(n_omp)]) + '/' + str(n_omp), str(count)]) + '/' # Run the simulation. # If you are currently in an interactive session and want to run interactive, # just replace run_batch with run_interactive - run_batch(run_name, res_dir, n_node=n_node, n_mpi=n_mpi, n_omp=n_omp) + run_batch(run_name, res_dir, bin_name, config_command, architecture=args.architecture, \ + Cname=module_Cname[args.architecture], n_node=n_node, n_mpi=n_mpi, n_omp=n_omp) os.chdir(cwd) process_analysis() @@ -326,7 +229,8 @@ if args.mode == 'read': 'FillBoundary ParallelCopy CurrentDeposition FieldGather '+\ 'ParthiclePush Copy EvolveEM Checkpoint '+\ 'WriteParticles Write_FabArray '+\ - 'WriteMultiLevelPlotfile(unit: second)\n' + 'WriteMultiLevelPlotfile '+\ + 'RedistributeMPI(unit: second)\n' f_log = open(log_dir + log_file, 'a') f_log.write(log_line) f_log.close() @@ -340,13 +244,20 @@ if args.mode == 'read': n_steps = get_nsteps(cwd + run_name) print('n_steps = ' + str(n_steps)) res_dir = res_dir_base - res_dir += '_'.join([year, month, day, run_name, args.compiler,\ + res_dir += '_'.join([run_name, args.compiler,\ args.architecture, str(n_node), str(n_mpi),\ - str(n_omp)]) + '/' - # Read performance data from the output file - timing_list = read_run_perf(res_dir + 'perf_output.txt') + str(n_omp), str(count)]) + '/' +# res_dir += '_'.join([year, month, '25', run_name, args.compiler,\ +# args.architecture, str(n_node), str(n_mpi), \ +# str(n_omp)]) + '/' + # Read performance data from the output file + output_filename = 'perf_output.txt' + timing_list = read_run_perf(res_dir + output_filename, n_steps) # Write performance data to the performance log file - write_perf_logfile(log_dir + log_file) + log_line = ' '.join([year, month, day, run_name, args.compiler,\ + args.architecture, str(n_node), str(n_mpi),\ + str(n_omp)] + timing_list + ['\n']) + write_perf_logfile(log_dir + log_file, log_line) # Store test parameters fot record dir_record_base = './perf_warpx_record/' @@ -363,6 +274,21 @@ if args.mode == 'read': for count, current_run in enumerate(test_list): shutil.copy(current_run[0], dir_record) + for count, current_run in enumerate(test_list): + run_name = current_run[0] + n_node = current_run[1] + n_mpi = current_run[2] + n_omp = current_run[3] + res_dir = res_dir_base + res_dir += '_'.join([run_name, args.compiler,\ + args.architecture, str(n_node), str(n_mpi),\ + str(n_omp), str(count)]) + '/' + res_dir_arch = res_dir_base + res_dir_arch += '_'.join([year, month, day, run_name, args.compiler,\ + args.architecture, str(n_node), str(n_mpi), \ + str(n_omp), str(count)]) + '/' + os.rename(res_dir, res_dir_arch) + # Commit results to the Repo if args.commit == True: os.system('git add ' + log_dir + log_file + ';'\ |