diff options
author | 2020-04-10 08:46:04 -0700 | |
---|---|---|
committer | 2020-04-10 08:46:04 -0700 | |
commit | 4660df45d956520d34be190e8ee64f0add0e2460 (patch) | |
tree | 11f309844d82bdfa2eeb89be897851cdf40491d6 /Tools/PerformanceTests/run_automated.py | |
parent | f19c0973b082f1b4a76387010ed5367f856ae9e9 (diff) | |
download | WarpX-4660df45d956520d34be190e8ee64f0add0e2460.tar.gz WarpX-4660df45d956520d34be190e8ee64f0add0e2460.tar.zst WarpX-4660df45d956520d34be190e8ee64f0add0e2460.zip |
Reorganize Tools/ into subfolders, in prevision of LibEnsemble scripts (#908)
* reorganize Tools/ into subfolders, in prevision of LibEnsemble scripts
* Oops, also need to let git know some files have been deleted
* caps for consistency
* few paths to fix
Diffstat (limited to 'Tools/PerformanceTests/run_automated.py')
-rw-r--r-- | Tools/PerformanceTests/run_automated.py | 325 |
1 files changed, 325 insertions, 0 deletions
diff --git a/Tools/PerformanceTests/run_automated.py b/Tools/PerformanceTests/run_automated.py new file mode 100644 index 000000000..323d69911 --- /dev/null +++ b/Tools/PerformanceTests/run_automated.py @@ -0,0 +1,325 @@ +# Copyright 2018-2019 Axel Huebl, Luca Fedeli, Maxence Thevenet +# +# +# This file is part of WarpX. +# +# License: BSD-3-Clause-LBNL + +import os, sys, shutil, datetime, git +import argparse, time, copy +import pandas as pd +from functions_perftest import store_git_hash, get_file_content, \ + run_batch_nnode, extract_dataframe + +# Get name of supercomputer and import configuration functions from +# machine-specific file +if os.getenv("LMOD_SYSTEM_NAME") == 'summit': + machine = 'summit' + from summit import executable_name, process_analysis, \ + get_config_command, time_min, get_submit_job_command, \ + get_batch_string, get_run_string, get_test_list +if os.getenv("NERSC_HOST") == 'cori': + machine = 'cori' + from cori import executable_name, process_analysis, \ + get_config_command, time_min, get_submit_job_command, \ + get_batch_string, get_run_string, get_test_list + +# typical use: python run_automated.py --n_node_list='1,8,16,32' --automated +# Assume warpx, picsar, amrex and perf_logs repos ar in the same directory and +# environment variable AUTOMATED_PERF_TESTS contains the path to this directory + +# requirements: +# - python packages: gitpython and pandas +# - AUTOMATED_PERF_TESTS: environment variables where warpx, +# amrex and picsar are installed ($AUTOMATED_PERF_TESTS/warpx etc.) +# - SCRATCH: environment variable where performance results are written. +# This script will create folder $SCRATCH/performance_warpx/ + +if "AUTOMATED_PERF_TESTS" not in os.environ: + raise ValueError("environment variable AUTOMATED_PERF_TESTS is not defined.\n" + "It should contain the path to the directory where WarpX, " + "AMReX and PICSAR repos are.") +if "SCRATCH" not in os.environ: + raise ValueError("environment variable SCRATCH is not defined.\n" + "This script will create $SCRATCH/performance_warpx/ " + "to store performance results.") +# Handle parser +############### +parser = argparse.ArgumentParser( description='Run performance tests and write results in files' ) +parser.add_argument('--recompile', + dest='recompile', + action='store_true', + default=False) +parser.add_argument('--commit', + dest='commit', + action='store_true', + default=False) +parser.add_argument('--automated', + dest='automated', + action='store_true', + default=False, + help='Use to run the automated test list') +parser.add_argument('--n_node_list', + dest='n_node_list', + default=[], + help='list ofnumber of nodes for the runs', type=str) +parser.add_argument('--start_date', + dest='start_date' ) +parser.add_argument('--compiler', + choices=['gnu', 'intel', 'pgi'], + default='intel', + help='which compiler to use') +parser.add_argument('--architecture', + choices=['cpu', 'knl', 'gpu'], + default='knl', + help='which architecture to cross-compile for NERSC machines') +parser.add_argument('--mode', + choices=['run', 'read', 'browse_output_files'], + default='run', + help='whether to run perftests or read their perf output. run calls read') +parser.add_argument('--path_source', + default=None, + help='path to parent folder containing amrex, picsar and warpx folders') +parser.add_argument('--path_results', + default=None, + help='path to result directory, where simulations run') + +args = parser.parse_args() +n_node_list_string = args.n_node_list.split(',') +n_node_list = [int(i) for i in n_node_list_string] +start_date = args.start_date + +# Set behavior variables +######################## +run_name = 'custom_perftest' +perf_database_file = 'my_tests_database.h5' +rename_archive = False +store_full_input = False +update_perf_log_repo = False +push_on_perf_log_repo = False +recompile = args.recompile +pull_3_repos = False +recompile = True +compiler = args.compiler +architecture = args.architecture +source_dir_base = args.path_source +res_dir_base = args.path_results + +browse_output_files = False +if args.mode == 'browse_output_files': + browse_output_file = True +if args.mode == 'read': + browse_output_files = True + +if args.automated == True: + run_name = 'automated_tests' + perf_database_file = machine + '_results.h5' + rename_archive = True + store_full_input = False + update_perf_log_repo = True + push_on_perf_log_repo = False + pull_3_repos = True + recompile = True + source_dir_base = os.environ['AUTOMATED_PERF_TESTS'] + res_dir_base = os.environ['SCRATCH'] + '/performance_warpx/' + if machine == 'summit': + compiler = 'gnu' + architecture = 'gpu' + +# List of tests to perform +# ------------------------ +# Each test runs n_repeat times +n_repeat = 2 +# test_list is machine-specific +test_list = get_test_list(n_repeat) + +# Define directories +# ------------------ +warpx_dir = source_dir_base + '/warpx/' +picsar_dir = source_dir_base + '/picsar/' +amrex_dir = source_dir_base + '/amrex/' +perf_logs_repo = source_dir_base + 'perf_logs/' + +# Define dictionaries +# ------------------- +compiler_name = {'intel': 'intel', 'gnu': 'gcc', 'pgi':'pgi'} +module_Cname = {'cpu': 'haswell', 'knl': 'knl,quad,cache', 'gpu':''} +csv_file = {'cori':'cori_knl.csv', 'summit':'summit.csv'} +# cwd = os.getcwd() + '/' +cwd = warpx_dir + 'Tools/performance_tests/' + +path_hdf5 = cwd +if args.automated: + path_hdf5 = perf_logs_repo + '/logs_hdf5/' + +bin_dir = cwd + 'Bin/' +bin_name = executable_name(compiler, architecture) + +log_dir = cwd +day = time.strftime('%d') +month = time.strftime('%m') +year = time.strftime('%Y') + +# Initialize tests +# ---------------- +if args.mode == 'run': + start_date = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + # Set default options for compilation and execution + config_command = get_config_command(compiler, architecture) + # Create main result directory if does not exist + if not os.path.exists(res_dir_base): + os.mkdir(res_dir_base) + + # Recompile if requested + # ---------------------- + if recompile == True: + if pull_3_repos == True: + git_repo = git.cmd.Git( picsar_dir ) + git_repo.pull() + git_repo = git.cmd.Git( amrex_dir ) + git_repo.pull() + git_repo = git.cmd.Git( warpx_dir ) + git_repo.pull() + + # Copy WarpX/GNUmakefile to current directory and recompile + # with specific options for automated performance tests. + # This way, performance test compilation does not mess with user's + # compilation + shutil.copyfile("../../GNUmakefile","./GNUmakefile") + make_realclean_command = " make realclean WARPX_HOME=../.. " \ + "AMREX_HOME=../../../amrex/ PICSAR_HOME=../../../picsar/ " \ + "EBASE=perf_tests COMP=%s" %compiler_name[compiler] + ";" + make_command = "make -j 16 WARPX_HOME=../.. " \ + "AMREX_HOME=../../../amrex/ PICSAR_HOME=../../../picsar/ " \ + "EBASE=perf_tests COMP=%s" %compiler_name[compiler] + if machine == 'summit': + make_command += ' USE_GPU=TRUE ' + os.system(config_command + make_realclean_command + \ + "rm -r tmp_build_dir *.mod; " + make_command ) + + # Store git hashes for WarpX, AMReX and PICSAR into file, so that + # they can be read when running the analysis. + if os.path.exists( cwd + 'store_git_hashes.txt' ): + os.remove( cwd + 'store_git_hashes.txt' ) + store_git_hash(repo_path=picsar_dir, filename=cwd + 'store_git_hashes.txt', name='picsar') + store_git_hash(repo_path=amrex_dir , filename=cwd + 'store_git_hashes.txt', name='amrex' ) + store_git_hash(repo_path=warpx_dir , filename=cwd + 'store_git_hashes.txt', name='warpx' ) + +# Loop over the tests and run all simulations: +# One batch job submitted per n_node. Several +# tests run within the same batch job. +# -------------------------------------------- +if args.mode == 'run': + if os.path.exists( 'log_jobids_tmp.txt' ): + os.remove( 'log_jobids_tmp.txt' ) + # loop on n_node. One batch script per n_node + for n_node in n_node_list: + res_dir = res_dir_base + res_dir += '_'.join([run_name, compiler, architecture, str(n_node)]) + '/' + runtime_param_list = [] + # Deep copy as we change the attribute n_cell of + # each instance of class test_element + test_list_n_node = copy.deepcopy(test_list) + job_time_min = time_min(len(test_list)) + batch_string = get_batch_string(test_list_n_node, job_time_min, module_Cname[architecture], n_node) + # Loop on tests + for count, current_run in enumerate(test_list_n_node): + current_run.scale_n_cell(n_node) + runtime_param_string = ' amr.n_cell=' + ' '.join(str(i) for i in current_run.n_cell) + runtime_param_string += ' amr.max_grid_size=' + str(current_run.max_grid_size) + runtime_param_string += ' amr.blocking_factor=' + str(current_run.blocking_factor) + runtime_param_string += ' max_step=' + str( current_run.n_step ) + # runtime_param_list.append( runtime_param_string ) + run_string = get_run_string(current_run, architecture, n_node, count, bin_name, runtime_param_string) + batch_string += run_string + batch_string += 'rm -rf plotfiles lab_frame_data diags\n' + + submit_job_command = get_submit_job_command() + # Run the simulations. + run_batch_nnode(test_list_n_node, res_dir, cwd, bin_name, config_command, batch_string, submit_job_command) + os.chdir(cwd) + # submit batch for analysis + if os.path.exists( 'read_error.txt' ): + os.remove( 'read_error.txt' ) + if os.path.exists( 'read_output.txt' ): + os.remove( 'read_output.txt' ) + process_analysis(args.automated, cwd, compiler, architecture, + args.n_node_list, start_date, source_dir_base, res_dir_base) + +# read the output file from each test and store timers in +# hdf5 file with pandas format +# ------------------------------------------------------- +for n_node in n_node_list: + print(n_node) + if browse_output_files: + res_dir = res_dir_base + res_dir += '_'.join([run_name, compiler,\ + architecture, str(n_node)]) + '/' + for count, current_run in enumerate(test_list): + # Read performance data from the output file + output_filename = 'out_' + '_'.join([current_run.input_file, str(n_node), str(current_run.n_mpi_per_node), str(current_run.n_omp), str(count)]) + '.txt' + # Read data for all test to put in hdf5 a database + # This is an hdf5 file containing ALL the simulation + # parameters and results. Might be too large for a repo + df_newline = extract_dataframe(res_dir + output_filename, current_run.n_step) + # Add all simulation parameters to the dataframe + df_newline['git_hashes'] = get_file_content(filename=cwd+'store_git_hashes.txt') + df_newline['start_date'] = start_date + df_newline['run_name'] = run_name + df_newline['input_file'] = current_run.input_file + df_newline['n_node'] = n_node + df_newline['n_mpi_per_node'] = current_run.n_mpi_per_node + df_newline['n_omp'] = current_run.n_omp + df_newline['n_steps'] = current_run.n_step + df_newline['rep'] = count%n_repeat + df_newline['date'] = datetime.datetime.now() + if store_full_input: + df_newline['inputs_content'] = get_file_content( filename=cwd+current_run.input_file ) + # Load file perf_database_file if exists, and + # append with results from this scan + if os.path.exists(path_hdf5 + perf_database_file): + df_base = pd.read_hdf(path_hdf5 + perf_database_file, 'all_data') + updated_df = df_base.append(df_newline, ignore_index=True) + else: + updated_df = df_newline + # Write dataframe to file perf_database_file + # (overwrite if file exists) + updated_df.to_hdf(path_hdf5 + perf_database_file, key='all_data', mode='w', format='table') + +# Extract sub-set of pandas data frame, write it to +# csv file and copy this file to perf_logs repo +# ------------------------------------------------- +if args.mode=='read' and update_perf_log_repo: + # get perf_logs repo + git_repo = git.Repo( perf_logs_repo ) + if push_on_perf_log_repo: + git_repo.git.stash('save') + git_repo.git.pull() + os.chdir( perf_logs_repo ) + sys.path.append('./') + import write_csv + git_repo.git.add('./logs_csv/' + csv_file[machine]) + git_repo.git.add('./logs_hdf5/' + perf_database_file) + index = git_repo.index + index.commit("automated tests") + +# Rename all result directories for archiving purposes: +# include date in the name, and a counter to avoid over-writing +for n_node in n_node_list: + if browse_output_files: + res_dir = res_dir_base + res_dir += '_'.join([run_name, compiler,\ + architecture, str(n_node)]) + '/' + # Rename directory with precise date+hour for archive purpose + if rename_archive == True: + loc_counter = 0 + res_dir_arch = res_dir_base + res_dir_arch += '_'.join([year, month, day, run_name, compiler,\ + architecture, str(n_node), str(loc_counter)]) + '/' + while os.path.exists( res_dir_arch ): + loc_counter += 1 + res_dir_arch = res_dir_base + res_dir_arch += '_'.join([year, month, day, run_name, compiler,\ + architecture, str(n_node), str(loc_counter)]) + '/' + print("renaming " + res_dir + " -> " + res_dir_arch) + os.rename( res_dir, res_dir_arch ) |