diff options
Diffstat (limited to 'Tools/performance_tests')
-rw-r--r-- | Tools/performance_tests/GNUmakefile_perftest | 16 | ||||
-rw-r--r-- | Tools/performance_tests/automated_test_1_uniform_rest_32ppc | 1 | ||||
-rw-r--r-- | Tools/performance_tests/automated_test_2_uniform_rest_1ppc | 1 | ||||
-rw-r--r-- | Tools/performance_tests/automated_test_3_uniform_drift_4ppc | 1 | ||||
-rw-r--r-- | Tools/performance_tests/automated_test_5_loadimbalance | 1 | ||||
-rw-r--r-- | Tools/performance_tests/automated_test_6_output_2ppc | 1 | ||||
-rw-r--r-- | Tools/performance_tests/cori.py | 159 | ||||
-rw-r--r-- | Tools/performance_tests/functions_perftest.py | 99 | ||||
-rw-r--r-- | Tools/performance_tests/run_automated.py | 270 | ||||
-rw-r--r-- | Tools/performance_tests/summit.py | 135 |
10 files changed, 459 insertions, 225 deletions
diff --git a/Tools/performance_tests/GNUmakefile_perftest b/Tools/performance_tests/GNUmakefile_perftest deleted file mode 100644 index 38275332d..000000000 --- a/Tools/performance_tests/GNUmakefile_perftest +++ /dev/null @@ -1,16 +0,0 @@ -WARPX_HOME := ../.. -AMREX_HOME ?= $(WARPX_HOME)/../amrex -PICSAR_HOME ?= $(WARPX_HOME)/../picsar -OPENBC_HOME ?= $(WARPX_HOME)/../openbc_poisson -DEBUG = FALSE -DIM = 3 -COMP=intel -TINY_PROFILE = TRUE -USE_OMP = TRUE -USE_CUDA = FALSE -USE_ACC = FALSE -USE_SENSEI_INSITU = FALSE -EBASE = perf_tests -USE_PYTHON_MAIN = FALSE -WarpxBinDir = Bin -include $(WARPX_HOME)/Source/Make.WarpX diff --git a/Tools/performance_tests/automated_test_1_uniform_rest_32ppc b/Tools/performance_tests/automated_test_1_uniform_rest_32ppc index 55c1a6061..500e10859 100644 --- a/Tools/performance_tests/automated_test_1_uniform_rest_32ppc +++ b/Tools/performance_tests/automated_test_1_uniform_rest_32ppc @@ -22,6 +22,7 @@ warpx.verbose = 1 interpolation.nox = 3 interpolation.noy = 3 interpolation.noz = 3 +warpx.do_pml = 0 # CFL warpx.cfl = 1.0 diff --git a/Tools/performance_tests/automated_test_2_uniform_rest_1ppc b/Tools/performance_tests/automated_test_2_uniform_rest_1ppc index 8e17042c9..2af282db8 100644 --- a/Tools/performance_tests/automated_test_2_uniform_rest_1ppc +++ b/Tools/performance_tests/automated_test_2_uniform_rest_1ppc @@ -22,6 +22,7 @@ warpx.verbose = 1 interpolation.nox = 3 interpolation.noy = 3 interpolation.noz = 3 +warpx.do_pml = 1 # CFL warpx.cfl = 1.0 diff --git a/Tools/performance_tests/automated_test_3_uniform_drift_4ppc b/Tools/performance_tests/automated_test_3_uniform_drift_4ppc index 13af8aaff..93d224061 100644 --- a/Tools/performance_tests/automated_test_3_uniform_drift_4ppc +++ b/Tools/performance_tests/automated_test_3_uniform_drift_4ppc @@ -23,6 +23,7 @@ warpx.verbose = 1 interpolation.nox = 3 interpolation.noy = 3 interpolation.noz = 3 +warpx.do_pml = 0 # CFL warpx.cfl = 1.0 diff --git a/Tools/performance_tests/automated_test_5_loadimbalance b/Tools/performance_tests/automated_test_5_loadimbalance index 22c9ec4b6..d2e03372b 100644 --- a/Tools/performance_tests/automated_test_5_loadimbalance +++ b/Tools/performance_tests/automated_test_5_loadimbalance @@ -21,6 +21,7 @@ warpx.load_balance_int = 5 interpolation.nox = 3 interpolation.noy = 3 interpolation.noz = 3 +warpx.do_pml = 0 # CFL warpx.cfl = 1.0 diff --git a/Tools/performance_tests/automated_test_6_output_2ppc b/Tools/performance_tests/automated_test_6_output_2ppc index f4498c410..9e8a839cc 100644 --- a/Tools/performance_tests/automated_test_6_output_2ppc +++ b/Tools/performance_tests/automated_test_6_output_2ppc @@ -22,6 +22,7 @@ warpx.verbose = 1 interpolation.nox = 3 interpolation.noy = 3 interpolation.noz = 3 +warpx.do_pml = 0 # CFL warpx.cfl = 1.0 diff --git a/Tools/performance_tests/cori.py b/Tools/performance_tests/cori.py new file mode 100644 index 000000000..dbe3a1e2a --- /dev/null +++ b/Tools/performance_tests/cori.py @@ -0,0 +1,159 @@ +import os, copy + +from functions_perftest import test_element + +module_name = {'cpu': 'haswell.', 'knl': 'mic-knl.', 'gpu':'.'} + +def executable_name(compiler, architecture): + return 'perf_tests3d.' + compiler + \ + '.' + module_name[architecture] + 'TPROF.MPI.OMP.ex' + +def get_config_command(compiler, architecture): + config_command = '' + config_command += 'module unload darshan;' + if architecture == 'knl': + if compiler == 'intel': + config_command += 'module unload PrgEnv-gnu;' + config_command += 'module load PrgEnv-intel;' + elif compiler == 'gnu': + config_command += 'module unload PrgEnv-intel;' + config_command += 'module load PrgEnv-gnu;' + config_command += 'module unload craype-haswell;' + config_command += 'module load craype-mic-knl;' + elif architecture == 'cpu': + if compiler == 'intel': + config_command += 'module unload PrgEnv-gnu;' + config_command += 'module load PrgEnv-intel;' + elif compiler == 'gnu': + config_command += 'module unload PrgEnv-intel;' + config_command += 'module load PrgEnv-gnu;' + config_command += 'module unload craype-mic-knl;' + config_command += 'module load craype-haswell;' + return config_command + +# This function runs a batch script with +# dependencies to perform the analysis +# after all performance tests are done. +def process_analysis(automated, cwd, compiler, architecture, n_node_list, start_date): + dependencies = '' + f_log = open(cwd + 'log_jobids_tmp.txt' ,'r') + for line in f_log.readlines(): + dependencies += line.split()[3] + ':' + + batch_string = '''#!/bin/bash +#SBATCH --job-name=warpx_1node_read +#SBATCH --time=00:07:00 +#SBATCH -C knl +#SBATCH -N 1 +#SBATCH -S 4 +#SBATCH -q regular +#SBATCH -e read_error.txt +#SBATCH -o read_output.txt +#SBATCH --mail-type=end +#SBATCH --account=m2852 +module load h5py-parallel +''' + batch_string += 'python run_automated.py --compiler=' + \ + compiler + ' --architecture=' + architecture + \ + ' --mode=read' + \ + ' --n_node_list=' + '"' + n_node_list + '"' + \ + ' --start_date=' + start_date + if automated == True: + batch_string += ' --automated' + batch_string += '\n' + batch_file = 'slurm_perfread' + f_exe = open(batch_file,'w') + f_exe.write(batch_string) + f_exe.close() + os.system('chmod 700 ' + batch_file) + print( 'process_analysis line: ' + 'sbatch --dependency afterok:' + dependencies[0:-1] + ' ' + batch_file) + os.system('sbatch --dependency afterok:' + dependencies[0:-1] + ' ' + batch_file) + +# Calculate simulation time. Take 5 min + 5 min / simulation +def time_min(nb_simulations): + return 5. + nb_simulations*5. + +def get_submit_job_command(): + return ' sbatch ' + +def get_batch_string(test_list, job_time_min, Cname, n_node): + + job_time_str = str(int(job_time_min/60)) + ':' + str(int(job_time_min%60)) + ':00' + + batch_string = '' + batch_string += '#!/bin/bash\n' + batch_string += '#SBATCH --job-name=' + test_list[0].input_file + '\n' + batch_string += '#SBATCH --time=' + job_time_str + '\n' + batch_string += '#SBATCH -C ' + Cname + '\n' + batch_string += '#SBATCH -N ' + str(n_node) + '\n' + batch_string += '#SBATCH -q regular\n' + batch_string += '#SBATCH -e error.txt\n' + batch_string += '#SBATCH --account=m2852\n' + return batch_string + +def get_run_string(current_test, architecture, n_node, count, bin_name, runtime_param_string): + srun_string = '' + srun_string += 'export OMP_NUM_THREADS=' + str(current_test.n_omp) + '\n' + # number of logical cores per MPI process + if architecture == 'cpu': + cflag_value = max(1, int(32/current_test.n_mpi_per_node) * 2) # Follow NERSC directives + elif architecture == 'knl': + cflag_value = max(1, int(64/current_test.n_mpi_per_node) * 4) # Follow NERSC directives + output_filename = 'out_' + '_'.join([current_test.input_file, str(n_node), str(current_test.n_mpi_per_node), str(current_test.n_omp), str(count)]) + '.txt' + srun_string += 'srun --cpu_bind=cores '+ \ + ' -n ' + str(n_node*current_test.n_mpi_per_node) + \ + ' -c ' + str(cflag_value) + \ + ' ./' + bin_name + \ + ' ' + current_test.input_file + \ + runtime_param_string + \ + ' > ' + output_filename + '\n' + return srun_string + +def get_test_list(n_repeat): + test_list_unq = [] + # n_node is kept to None and passed in functions as an external argument + # That way, several test_element_instance run with the same n_node on the same batch job + test_list_unq.append( test_element(input_file='automated_test_1_uniform_rest_32ppc', + n_mpi_per_node=8, + n_omp=8, + n_cell=[128, 128, 128], + max_grid_size=64, + blocking_factor=32, + n_step=10) ) + test_list_unq.append( test_element(input_file='automated_test_2_uniform_rest_1ppc', + n_mpi_per_node=8, + n_omp=8, + n_cell=[256, 256, 512], + max_grid_size=64, + blocking_factor=32, + n_step=10) ) + test_list_unq.append( test_element(input_file='automated_test_3_uniform_drift_4ppc', + n_mpi_per_node=8, + n_omp=8, + n_cell=[128, 128, 128], + max_grid_size=64, + blocking_factor=32, + n_step=10) ) + test_list_unq.append( test_element(input_file='automated_test_4_labdiags_2ppc', + n_mpi_per_node=8, + n_omp=8, + n_cell=[64, 64, 128], + max_grid_size=64, + blocking_factor=32, + n_step=50) ) + test_list_unq.append( test_element(input_file='automated_test_5_loadimbalance', + n_mpi_per_node=8, + n_omp=8, + n_cell=[128, 128, 128], + max_grid_size=64, + blocking_factor=32, + n_step=10) ) + test_list_unq.append( test_element(input_file='automated_test_6_output_2ppc', + n_mpi_per_node=8, + n_omp=8, + n_cell=[128, 256, 256], + max_grid_size=64, + blocking_factor=32, + n_step=0) ) + test_list = [copy.deepcopy(item) for item in test_list_unq for _ in range(n_repeat) ] + return test_list diff --git a/Tools/performance_tests/functions_perftest.py b/Tools/performance_tests/functions_perftest.py index 5e026bf12..67622317a 100644 --- a/Tools/performance_tests/functions_perftest.py +++ b/Tools/performance_tests/functions_perftest.py @@ -1,7 +1,32 @@ -import os, shutil, re +import os, shutil, re, copy import pandas as pd import numpy as np import git +# import cori +# import summit + +# Each instance of this class contains information for a single test. +class test_element(): + def __init__(self, input_file=None, n_node=None, n_mpi_per_node=None, + n_omp=None, n_cell=None, n_step=None, max_grid_size=None, + blocking_factor=None): + self.input_file = input_file + self.n_node = n_node + self.n_mpi_per_node = n_mpi_per_node + self.n_omp = n_omp + self.n_cell = n_cell + self.n_step = n_step + self.max_grid_size = max_grid_size + self.blocking_factor = blocking_factor + + def scale_n_cell(self, n_node=0): + n_cell_scaled = copy.deepcopy(self.n_cell) + index_dim = 0 + while n_node > 1: + n_cell_scaled[index_dim] *= 2 + n_node /= 2 + index_dim = (index_dim+1) % 3 + self.n_cell = n_cell_scaled def scale_n_cell(ncell, n_node): ncell_scaled = ncell[:] @@ -25,56 +50,6 @@ def get_file_content(filename=None): file_handler.close() return file_content -def run_batch_nnode(test_list, res_dir, bin_name, config_command, architecture='knl', Cname='knl', n_node=1, runtime_param_list=[]): - # Clean res_dir - if os.path.exists(res_dir): - shutil.rmtree(res_dir, ignore_errors=True) - os.makedirs(res_dir) - # Copy files to res_dir - cwd = os.environ['AUTOMATED_PERF_TESTS'] + '/WarpX/Tools/performance_tests/' - bin_dir = cwd + 'Bin/' - shutil.copy(bin_dir + bin_name, res_dir) - os.chdir(res_dir) - # Calculate simulation time. Take 5 min + 2 min / simulation - job_time_min = 5. + len(test_list)*5. - job_time_str = str(int(job_time_min/60)) + ':' + str(int(job_time_min%60)) + ':00' - batch_string = '' - batch_string += '#!/bin/bash\n' - batch_string += '#SBATCH --job-name=' + test_list[0].input_file + '\n' - batch_string += '#SBATCH --time=' + job_time_str + '\n' - batch_string += '#SBATCH -C ' + Cname + '\n' - batch_string += '#SBATCH -N ' + str(n_node) + '\n' - batch_string += '#SBATCH -q regular\n' - batch_string += '#SBATCH -e error.txt\n' - batch_string += '#SBATCH --account=m2852\n' - - for count, current_test in enumerate(test_list): - shutil.copy(cwd + current_test.input_file, res_dir) - srun_string = '' - srun_string += 'export OMP_NUM_THREADS=' + str(current_test.n_omp) + '\n' - # number of logical cores per MPI process - if architecture == 'cpu': - cflag_value = max(1, int(32/current_test.n_mpi_per_node) * 2) # Follow NERSC directives - elif architecture == 'knl': - cflag_value = max(1, int(64/current_test.n_mpi_per_node) * 4) # Follow NERSC directives - output_filename = 'out_' + '_'.join([current_test.input_file, str(n_node), str(current_test.n_mpi_per_node), str(current_test.n_omp), str(count)]) + '.txt' - srun_string += 'srun --cpu_bind=cores '+ \ - ' -n ' + str(n_node*current_test.n_mpi_per_node) + \ - ' -c ' + str(cflag_value) + \ - ' ./' + bin_name + \ - ' ' + current_test.input_file + \ - runtime_param_list[ count ] + \ - ' > ' + output_filename + '\n' - batch_string += srun_string - batch_string += 'rm -rf plotfiles ; rm -rf lab_frame_data\n' - batch_file = 'slurm' - f_exe = open(batch_file,'w') - f_exe.write(batch_string) - f_exe.close() - os.system('chmod 700 ' + bin_name) - os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt') - return 0 - def run_batch(run_name, res_dir, bin_name, config_command, architecture='knl',\ Cname='knl', n_node=1, n_mpi=1, n_omp=1): # Clean res_dir @@ -82,7 +57,6 @@ def run_batch(run_name, res_dir, bin_name, config_command, architecture='knl',\ shutil.rmtree(res_dir) os.makedirs(res_dir) # Copy files to res_dir - # Copy files to res_dir cwd = os.environ['WARPX'] + '/Tools/performance_tests/' bin_dir = cwd + 'Bin/' shutil.copy(bin_dir + bin_name, res_dir) @@ -119,6 +93,27 @@ def run_batch(run_name, res_dir, bin_name, config_command, architecture='knl',\ os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt') return 0 +def run_batch_nnode(test_list, res_dir, bin_name, config_command, batch_string, submit_job_command): + # Clean res_dir + if os.path.exists(res_dir): + shutil.rmtree(res_dir, ignore_errors=True) + os.makedirs(res_dir) + # Copy files to res_dir + cwd = os.environ['AUTOMATED_PERF_TESTS'] + '/warpx/Tools/performance_tests/' + bin_dir = cwd + 'Bin/' + shutil.copy(bin_dir + bin_name, res_dir) + os.chdir(res_dir) + + for count, current_test in enumerate(test_list): + shutil.copy(cwd + current_test.input_file, res_dir) + batch_file = 'batch_script.sh' + f_exe = open(batch_file,'w') + f_exe.write(batch_string) + f_exe.close() + os.system('chmod 700 ' + bin_name) + os.system(config_command + submit_job_command + batch_file +\ + ' >> ' + cwd + 'log_jobids_tmp.txt') + # Read output file and return init time and 1-step time def read_run_perf(filename, n_steps): timing_list = [] diff --git a/Tools/performance_tests/run_automated.py b/Tools/performance_tests/run_automated.py index 8f79750d4..fd771faac 100644 --- a/Tools/performance_tests/run_automated.py +++ b/Tools/performance_tests/run_automated.py @@ -1,15 +1,41 @@ -#!/usr/common/software/python/2.7-anaconda-4.4/bin/python - import os, sys, shutil, datetime, git import argparse, re, time, copy import pandas as pd from functions_perftest import store_git_hash, get_file_content, \ - run_batch_nnode, extract_dataframe + run_batch_nnode, extract_dataframe + +# Get name of supercomputer and import configuration functions from +# machine-specific file +if os.getenv("LMOD_SYSTEM_NAME") == 'summit': + machine = 'summit' + from summit import executable_name, process_analysis, \ + get_config_command, time_min, get_submit_job_command, \ + get_batch_string, get_run_string, get_test_list +if os.getenv("NERSC_HOST") == 'cori': + machine = 'cori' + from cori import executable_name, process_analysis, \ + get_config_command, time_min, get_submit_job_command, \ + get_batch_string, get_run_string, get_test_list # typical use: python run_automated.py --n_node_list='1,8,16,32' --automated # Assume warpx, picsar, amrex and perf_logs repos ar in the same directory and # environment variable AUTOMATED_PERF_TESTS contains the path to this directory +# requirements: +# - python packages: gitpython and pandas +# - AUTOMATED_PERF_TESTS: environment variables where warpx, +# amrex and picsar are installed ($AUTOMATED_PERF_TESTS/warpx etc.) +# - SCRATCH: environment variable where performance results are written. +# This script will create folder $SCRATCH/performance_warpx/ + +if "AUTOMATED_PERF_TESTS" not in os.environ: + raise ValueError("environment variable AUTOMATED_PERF_TESTS is not defined.\n" + "It should contain the path to the directory where WarpX, " + "AMReX and PICSAR repos are.") +if "SCRATCH" not in os.environ: + raise ValueError("environment variable SCRATCH is not defined.\n" + "This script will create $SCRATCH/performance_warpx/ " + "to store performance results.") # Handle parser ############### parser = argparse.ArgumentParser( description='Run performance tests and write results in files' ) @@ -33,11 +59,11 @@ parser.add_argument('--n_node_list', parser.add_argument('--start_date', dest='start_date' ) parser.add_argument('--compiler', - choices=['gnu', 'intel'], + choices=['gnu', 'intel', 'pgi'], default='intel', help='which compiler to use') parser.add_argument('--architecture', - choices=['cpu', 'knl'], + choices=['cpu', 'knl', 'gpu'], default='knl', help='which architecture to cross-compile for NERSC machines') parser.add_argument('--mode', @@ -48,6 +74,8 @@ args = parser.parse_args() n_node_list_string = args.n_node_list.split(',') n_node_list = [int(i) for i in n_node_list_string] start_date = args.start_date +compiler = args.compiler +architecture = args.architecture # Set behavior variables ######################## @@ -71,70 +99,21 @@ if args.automated == True: push_on_perf_log_repo = False pull_3_repos = True recompile = True - -# Each instance of this class contains information for a single test. -class test_element(): - def __init__(self, input_file=None, n_node=None, n_mpi_per_node=None, - n_omp=None, n_cell=None, n_step=None): - self.input_file = input_file - self.n_node = n_node - self.n_mpi_per_node = n_mpi_per_node - self.n_omp = n_omp - self.n_cell = n_cell - self.n_step = n_step - - def scale_n_cell(self, n_node=0): - n_cell_scaled = copy.deepcopy(self.n_cell) - index_dim = 0 - while n_node > 1: - n_cell_scaled[index_dim] *= 2 - n_node /= 2 - index_dim = (index_dim+1) % 3 - self.n_cell = n_cell_scaled + if machine == 'summit': + compiler = 'pgi' + architecture = 'gpu' # List of tests to perform # ------------------------ -test_list_unq = [] # Each test runs n_repeat times n_repeat = 2 -# n_node is kept to None and passed in functions as an external argument -# That way, several test_element_instance run with the same n_node on the same batch job -test_list_unq.append( test_element(input_file='automated_test_1_uniform_rest_32ppc', - n_mpi_per_node=8, - n_omp=8, - n_cell=[128, 128, 128], - n_step=10) ) -test_list_unq.append( test_element(input_file='automated_test_2_uniform_rest_1ppc', - n_mpi_per_node=8, - n_omp=8, - n_cell=[256, 256, 512], - n_step=10) ) -test_list_unq.append( test_element(input_file='automated_test_3_uniform_drift_4ppc', - n_mpi_per_node=8, - n_omp=8, - n_cell=[128, 128, 128], - n_step=10) ) -test_list_unq.append( test_element(input_file='automated_test_4_labdiags_2ppc', - n_mpi_per_node=8, - n_omp=8, - n_cell=[64, 64, 128], - n_step=50) ) -test_list_unq.append( test_element(input_file='automated_test_5_loadimbalance', - n_mpi_per_node=8, - n_omp=8, - n_cell=[128, 128, 128], - n_step=10) ) -test_list_unq.append( test_element(input_file='automated_test_6_output_2ppc', - n_mpi_per_node=8, - n_omp=8, - n_cell=[128, 256, 256], - n_step=0) ) -test_list = [copy.deepcopy(item) for item in test_list_unq for _ in range(n_repeat) ] +# test_list is machine-specific +test_list = get_test_list(n_repeat) # Define directories # ------------------ source_dir_base = os.environ['AUTOMATED_PERF_TESTS'] -warpx_dir = source_dir_base + '/WarpX/' +warpx_dir = source_dir_base + '/warpx/' picsar_dir = source_dir_base + '/picsar/' amrex_dir = source_dir_base + '/amrex/' res_dir_base = os.environ['SCRATCH'] + '/performance_warpx/' @@ -142,12 +121,13 @@ perf_logs_repo = source_dir_base + 'perf_logs/' # Define dictionaries # ------------------- -compiler_name = {'intel': 'intel', 'gnu': 'gcc'} -module_name = {'cpu': 'haswell', 'knl': 'mic-knl'} -module_Cname = {'cpu': 'haswell', 'knl': 'knl,quad,cache'} +compiler_name = {'intel': 'intel', 'gnu': 'gcc', 'pgi':'pgi'} +module_Cname = {'cpu': 'haswell', 'knl': 'knl,quad,cache', 'gpu':''} +csv_file = {'cori':'cori_knl.csv', 'summit':'summit.csv'} cwd = os.getcwd() + '/' bin_dir = cwd + 'Bin/' -bin_name = 'perf_tests3d.' + args.compiler + '.' + module_name[args.architecture] + '.TPROF.MPI.OMP.ex' +bin_name = executable_name(compiler, architecture) + log_dir = cwd perf_database_file = cwd + perf_database_file day = time.strftime('%d') @@ -159,27 +139,7 @@ year = time.strftime('%Y') if args.mode == 'run': start_date = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") # Set default options for compilation and execution - config_command = '' - config_command += 'module unload darshan;' - config_command += 'module load craype-hugepages4M;' - if args.architecture == 'knl': - if args.compiler == 'intel': - config_command += 'module unload PrgEnv-gnu;' - config_command += 'module load PrgEnv-intel;' - elif args.compiler == 'gnu': - config_command += 'module unload PrgEnv-intel;' - config_command += 'module load PrgEnv-gnu;' - config_command += 'module unload craype-haswell;' - config_command += 'module load craype-mic-knl;' - elif args.architecture == 'cpu': - if args.compiler == 'intel': - config_command += 'module unload PrgEnv-gnu;' - config_command += 'module load PrgEnv-intel;' - elif args.compiler == 'gnu': - config_command += 'module unload PrgEnv-intel;' - config_command += 'module load PrgEnv-gnu;' - config_command += 'module unload craype-mic-knl;' - config_command += 'module load craype-haswell;' + config_command = get_config_command(compiler, architecture) # Create main result directory if does not exist if not os.path.exists(res_dir_base): os.mkdir(res_dir_base) @@ -194,56 +154,31 @@ if args.mode == 'run': git_repo.pull() git_repo = git.cmd.Git( warpx_dir ) git_repo.pull() - with open(cwd + 'GNUmakefile_perftest') as makefile_handler: - makefile_text = makefile_handler.read() - makefile_text = re.sub('\nCOMP.*', '\nCOMP=%s' %compiler_name[args.compiler], makefile_text) - with open(cwd + 'GNUmakefile_perftest', 'w') as makefile_handler: - makefile_handler.write( makefile_text ) - os.system(config_command + " make -f GNUmakefile_perftest realclean ; " + " rm -r tmp_build_dir *.mod; make -j 8 -f GNUmakefile_perftest") + + # Copy WarpX/GNUmakefile to current directory and recompile + # with specific options for automated performance tests. + # This way, performance test compilation does not mess with user's + # compilation + shutil.copyfile("../../GNUmakefile","./GNUmakefile") + make_realclean_command = " make realclean WARPX_HOME=../.. " \ + "AMREX_HOME=../../../amrex/ PICSAR_HOME=../../../picsar/ " \ + "EBASE=perf_tests COMP=%s" %compiler_name[compiler] + ";" + make_command = "make -j 16 WARPX_HOME=../.. " \ + "AMREX_HOME=../../../amrex/ PICSAR_HOME=../../../picsar/ " \ + "EBASE=perf_tests COMP=%s" %compiler_name[compiler] + if machine == 'summit': + make_command += ' USE_GPU=TRUE ' + os.system(config_command + make_realclean_command + \ + "rm -r tmp_build_dir *.mod; " + make_command ) + + # Store git hashes for WarpX, AMReX and PICSAR into file, so that + # they can be read when running the analysis. if os.path.exists( cwd + 'store_git_hashes.txt' ): os.remove( cwd + 'store_git_hashes.txt' ) store_git_hash(repo_path=picsar_dir, filename=cwd + 'store_git_hashes.txt', name='picsar') store_git_hash(repo_path=amrex_dir , filename=cwd + 'store_git_hashes.txt', name='amrex' ) store_git_hash(repo_path=warpx_dir , filename=cwd + 'store_git_hashes.txt', name='warpx' ) -# This function runs a batch script with -# dependencies to perform the analysis -# after all performance tests are done. -def process_analysis(): - dependencies = '' - f_log = open(cwd + 'log_jobids_tmp.txt' ,'r') - for line in f_log.readlines(): - dependencies += line.split()[3] + ':' - batch_string = '' - batch_string += '#!/bin/bash\n' - batch_string += '#SBATCH --job-name=warpx_1node_read\n' - batch_string += '#SBATCH --time=00:07:00\n' - batch_string += '#SBATCH -C knl\n' - batch_string += '#SBATCH -N 1\n' - batch_string += '#SBATCH -S 4\n' - batch_string += '#SBATCH -q regular\n' - batch_string += '#SBATCH -e read_error.txt\n' - batch_string += '#SBATCH -o read_output.txt\n' - batch_string += '#SBATCH --mail-type=end\n' - batch_string += '#SBATCH --account=m2852\n' - batch_string += 'module load h5py-parallel\n' - batch_string += 'python ' + __file__ + ' --compiler=' + \ - args.compiler + ' --architecture=' + args.architecture + \ - ' --mode=read' + \ - ' --n_node_list=' + '"' + args.n_node_list + '"' + \ - ' --start_date=' + start_date - if args.automated == True: - batch_string += ' --automated' - batch_string += '\n' - batch_file = 'slurm_perfread' - f_exe = open(batch_file,'w') - f_exe.write(batch_string) - f_exe.close() - os.system('chmod 700 ' + batch_file) - print( 'process_analysis line: ' + 'sbatch --dependency afterok:' + dependencies[0:-1] + ' ' + batch_file) - os.system('sbatch --dependency afterok:' + dependencies[0:-1] + ' ' + batch_file) - return 0 - # Loop over the tests and run all simulations: # One batch job submitted per n_node. Several # tests run within the same batch job. @@ -254,24 +189,35 @@ if args.mode == 'run': # loop on n_node. One batch script per n_node for n_node in n_node_list: res_dir = res_dir_base - res_dir += '_'.join([run_name, args.compiler, args.architecture, str(n_node)]) + '/' + res_dir += '_'.join([run_name, compiler, architecture, str(n_node)]) + '/' runtime_param_list = [] # Deep copy as we change the attribute n_cell of # each instance of class test_element test_list_n_node = copy.deepcopy(test_list) + job_time_min = time_min(len(test_list)) + batch_string = get_batch_string(test_list_n_node, job_time_min, module_Cname[architecture], n_node) # Loop on tests - for current_run in test_list_n_node: + for count, current_run in enumerate(test_list_n_node): current_run.scale_n_cell(n_node) runtime_param_string = ' amr.n_cell=' + ' '.join(str(i) for i in current_run.n_cell) + runtime_param_string += ' amr.max_grid_size=' + str(current_run.max_grid_size) + runtime_param_string += ' amr.blocking_factor=' + str(current_run.blocking_factor) runtime_param_string += ' max_step=' + str( current_run.n_step ) - runtime_param_list.append( runtime_param_string ) + # runtime_param_list.append( runtime_param_string ) + run_string = get_run_string(current_run, architecture, n_node, count, bin_name, runtime_param_string) + batch_string += run_string + batch_string += 'rm -rf plotfiles lab_frame_data diags\n' + + submit_job_command = get_submit_job_command() # Run the simulations. - run_batch_nnode(test_list_n_node, res_dir, bin_name, config_command,\ - architecture=args.architecture, Cname=module_Cname[args.architecture], \ - n_node=n_node, runtime_param_list=runtime_param_list) + run_batch_nnode(test_list_n_node, res_dir, bin_name, config_command, batch_string, submit_job_command) os.chdir(cwd) # submit batch for analysis - process_analysis() + if os.path.exists( 'read_error.txt' ): + os.remove( 'read_error.txt' ) + if os.path.exists( 'read_output.txt' ): + os.remove( 'read_output.txt' ) + process_analysis(args.automated, cwd, compiler, architecture, args.n_node_list, start_date) # read the output file from each test and store timers in # hdf5 file with pandas format @@ -279,10 +225,10 @@ if args.mode == 'run': for n_node in n_node_list: print(n_node) if browse_output_files: + res_dir = res_dir_base + res_dir += '_'.join([run_name, compiler,\ + architecture, str(n_node)]) + '/' for count, current_run in enumerate(test_list): - res_dir = res_dir_base - res_dir += '_'.join([run_name, args.compiler,\ - args.architecture, str(n_node)]) + '/' # Read performance data from the output file output_filename = 'out_' + '_'.join([current_run.input_file, str(n_node), str(current_run.n_mpi_per_node), str(current_run.n_omp), str(count)]) + '.txt' # Read data for all test to put in hdf5 a database @@ -305,8 +251,8 @@ for n_node in n_node_list: # Load file perf_database_file if exists, and # append with results from this scan if os.path.exists(perf_database_file): - df_base = pd.read_hdf(perf_database_file, 'all_data', format='table') - # df_base = pd.read_hdf(perf_database_file, 'all_data') + # df_base = pd.read_hdf(perf_database_file, 'all_data', format='table') + df_base = pd.read_hdf(perf_database_file, 'all_data') updated_df = df_base.append(df_newline, ignore_index=True) else: updated_df = df_newline @@ -314,19 +260,6 @@ for n_node in n_node_list: # (overwrite if file exists) updated_df.to_hdf(perf_database_file, key='all_data', mode='w') - # Rename directory with precise date+hour for archive purpose - if rename_archive == True: - loc_counter = 0 - res_dir_arch = res_dir_base - res_dir_arch += '_'.join([year, month, day, run_name, args.compiler,\ - args.architecture, str(n_node), str(loc_counter)]) + '/' - while os.path.exists( res_dir_arch ): - loc_counter += 1 - res_dir_arch = res_dir_base - res_dir_arch += '_'.join([year, month, day, run_name, args.compiler,\ - args.architecture, str(n_node), str(loc_counter)]) + '/' - os.rename( res_dir, res_dir_arch ) - # Extract sub-set of pandas data frame, write it to # csv file and copy this file to perf_logs repo # ------------------------------------------------- @@ -342,19 +275,42 @@ if write_csv: df_small[ df_small['input_file']=='automated_test_6_output_2ppc' ]['time_WritePlotFile'] df_small = df_small.loc[:, ['date', 'input_file', 'git_hashes', 'n_node', 'n_mpi_per_node', 'n_omp', 'rep', 'start_date', 'time_initialization', 'step_time'] ] # Write to csv - df_small.to_csv( 'cori_knl.csv' ) + df_small.to_csv( csv_file[machine] ) # Errors may occur depending on the version of pandas. I had errors with v0.21.0 solved with 0.23.0 # Second, move files to perf_logs repo if update_perf_log_repo: + # get perf_logs repo git_repo = git.Repo( perf_logs_repo ) if push_on_perf_log_repo: git_repo.git.stash('save') git_repo.git.pull() - shutil.move( 'cori_knl.csv', perf_logs_repo + '/logs_csv/cori_knl.csv' ) + # move csv file to perf_logs repon and commit the new version + shutil.move( csv_file[machine], perf_logs_repo + '/logs_csv/' + csv_file[machine] ) os.chdir( perf_logs_repo ) sys.path.append('./') import generate_index_html git_repo.git.add('./index.html') - git_repo.git.add('./logs_csv/cori_knl.csv') + git_repo.git.add('./logs_csv/' + csv_file[machine]) index = git_repo.index index.commit("automated tests") + +# Rename all result directories for archiving purposes: +# include date in the name, and a counter to avoid over-writing +for n_node in n_node_list: + if browse_output_files: + res_dir = res_dir_base + res_dir += '_'.join([run_name, compiler,\ + architecture, str(n_node)]) + '/' + # Rename directory with precise date+hour for archive purpose + if rename_archive == True: + loc_counter = 0 + res_dir_arch = res_dir_base + res_dir_arch += '_'.join([year, month, day, run_name, compiler,\ + architecture, str(n_node), str(loc_counter)]) + '/' + while os.path.exists( res_dir_arch ): + loc_counter += 1 + res_dir_arch = res_dir_base + res_dir_arch += '_'.join([year, month, day, run_name, compiler,\ + architecture, str(n_node), str(loc_counter)]) + '/' + print("renaming " + res_dir + " -> " + res_dir_arch) + os.rename( res_dir, res_dir_arch ) diff --git a/Tools/performance_tests/summit.py b/Tools/performance_tests/summit.py new file mode 100644 index 000000000..69598f1fd --- /dev/null +++ b/Tools/performance_tests/summit.py @@ -0,0 +1,135 @@ +# requirements: +# - module load python/3.7.0-anaconda3-5.3.0 + +import os, copy +from functions_perftest import test_element + +def executable_name(compiler,architecture): + return 'perf_tests3d.' + compiler + '.TPROF.MPI.ACC.CUDA.ex' + +def get_config_command(compiler, architecture): + config_command = '' + config_command += 'module load pgi;' + config_command += 'module load cuda;' + return config_command + +# This function runs a batch script with +# dependencies to perform the analysis +# after all performance tests are done. +def process_analysis(automated, cwd, compiler, architecture, n_node_list, start_date): + + batch_string = '''#!/bin/bash +#BSUB -P APH114 +#BSUB -W 00:10 +#BSUB -nnodes 1 +#BSUB -J perf_test +#BSUB -o read_output.txt +#BSUB -e read_error.txt +''' + f_log = open(cwd + 'log_jobids_tmp.txt' ,'r') + for line in f_log.readlines(): + dependency = line.split()[1][1:-1] + batch_string += '#BSUB -w ended(' + dependency + ')\n' + + batch_string += 'python run_automated.py --compiler=' + \ + compiler + ' --architecture=' + architecture + \ + ' --mode=read' + \ + ' --n_node_list=' + '"' + n_node_list + '"' + \ + ' --start_date=' + start_date + if automated == True: + batch_string += ' --automated' + batch_string += '\n' + batch_file = 'bsub_perfread' + f_exe = open(batch_file,'w') + f_exe.write(batch_string) + f_exe.close() + os.system('chmod 700 ' + batch_file) + + print( 'process_analysis line: ' + 'bsub ' + batch_file) + os.system('bsub ' + batch_file) + +# Calculate simulation time. Take 2 min + 2 min / simulation +def time_min(nb_simulations): + return 2. + nb_simulations*2. + +def get_submit_job_command(): + return ' bsub ' + +def get_batch_string(test_list, job_time_min, Cname, n_node): + + job_time_str = str(int(job_time_min/60)) + ':' + str(int(job_time_min%60)) + + batch_string = '' + batch_string += '#!/bin/bash\n' + batch_string += '#BSUB -P APH114\n' + batch_string += '#BSUB -W ' + job_time_str + '\n' + batch_string += '#BSUB -nnodes ' + str(n_node) + '\n' + batch_string += '#BSUB -J ' + test_list[0].input_file + '\n' + batch_string += '#BSUB -e error.txt\n' + batch_string += 'module load pgi\n' + batch_string += 'module load cuda\n' + return batch_string + +def get_run_string(current_test, architecture, n_node, count, bin_name, runtime_param_string): + + output_filename = 'out_' + '_'.join([current_test.input_file, str(n_node), str(current_test.n_mpi_per_node), str(current_test.n_omp), str(count)]) + '.txt' + + ngpu = str(current_test.n_mpi_per_node) + srun_string = '' + srun_string += 'jsrun ' + srun_string += ' -n ' + str(n_node) + srun_string += ' -a ' + ngpu + ' -g ' + ngpu + ' -c ' + ngpu + ' --bind=packed:1 ' + srun_string += ' ./' + bin_name + ' ' + srun_string += current_test.input_file + ' ' + srun_string += runtime_param_string + srun_string += ' > ' + output_filename + '\n' + return srun_string + +def get_test_list(n_repeat): + test_list_unq = [] + # n_node is kept to None and passed in functions as an external argument + # That way, several test_element_instance run with the same n_node on the same batch job + test_list_unq.append( test_element(input_file='automated_test_1_uniform_rest_32ppc', + n_mpi_per_node=6, + n_omp=1, + n_cell=[128, 128, 192], + max_grid_size=256, + blocking_factor=32, + n_step=10) ) + test_list_unq.append( test_element(input_file='automated_test_2_uniform_rest_1ppc', + n_mpi_per_node=6, + n_omp=1, + n_cell=[256, 512, 768], + max_grid_size=512, + blocking_factor=256, + n_step=10) ) + test_list_unq.append( test_element(input_file='automated_test_3_uniform_drift_4ppc', + n_mpi_per_node=6, + n_omp=1, + n_cell=[128, 128, 384], + max_grid_size=256, + blocking_factor=64, + n_step=10) ) + test_list_unq.append( test_element(input_file='automated_test_4_labdiags_2ppc', + n_mpi_per_node=6, + n_omp=1, + n_cell=[384, 512, 512], + max_grid_size=256, + blocking_factor=128, + n_step=50) ) + test_list_unq.append( test_element(input_file='automated_test_5_loadimbalance', + n_mpi_per_node=6, + n_omp=1, + n_cell=[64, 128, 192], + max_grid_size=64, + blocking_factor=32, + n_step=10) ) + test_list_unq.append( test_element(input_file='automated_test_6_output_2ppc', + n_mpi_per_node=6, + n_omp=1, + n_cell=[384, 256, 512], + max_grid_size=256, + blocking_factor=64, + n_step=0) ) + test_list = [copy.deepcopy(item) for item in test_list_unq for _ in range(n_repeat) ] + return test_list |