import os, sys, shutil import argparse, re, time # This script runs automated performance tests for WarpX. # It runs tests in list test_list defined below, and write # results in file performance_log.txt in warpx/performance_tests/ # ---- User's manual ---- # Before running performance tests, make sure you have the latest version # of performance_log.txt # A typical execution reads: # > python run_alltests.py --no-recompile --compiler=gnu --architecture=cpu --mode=run --no-commit --log_file='my_performance_log.txt' # These are default values, and will give the same result as # > python run_alltests.py # To add a new test item, extent the test_list with a line like # test_list.extend([['my_input_file', n_node, n_mpi, n_omp]]*3) # - my_input_file must be in warpx/performance_tests # - the test will run 3 times, to have some statistics # - the test must take <1h or it will timeout # ---- Developer's manual ---- # This script can run in two modes: # - 'run' mode: for each test item, a batch job is executed. # create folder '$SCRATCH/performance_warpx/' # recompile the code if option --recompile is used # loop over test_list and submit one batch script per item # Submit a batch job that executes the script in read mode # This last job runs once all others are completed # - 'read' mode: Get performance data from all test items # create performance log file if does not exist # loop over test_file # read initialization time and step time # write data into the performance log file # push file performance_log.txt on the repo # Read command-line arguments # --------------------------- # Create parser and read arguments parser = argparse.ArgumentParser( description='Run performance tests and write results in files') parser.add_argument('--recompile', dest='recompile', action='store_true', default=False) parser.add_argument('--no-recompile', dest='recompile', action='store_false', default=False) parser.add_argument('--commit', dest='commit', action='store_true', default=False) parser.add_argument( '--compiler', choices=['gnu', 'intel'], default='gnu', help='which compiler to use') parser.add_argument( '--architecture', choices=['cpu', 'knl'], default='cpu', help='which architecture to cross-compile for NERSC machines') parser.add_argument( '--mode', choices=['run', 'read'], default='run', help='whether to run perftests or read their perf output. run calls read') parser.add_argument( '--log_file', dest = 'log_file', default='my_performance_log.txt', help='name of log file where data will be written. ignored if option --commit is used') args = parser.parse_args() log_file = args.log_file if args.commit == True: log_file = 'performance_log.txt' # Dictionaries # compiler names. Used for WarpX executable name compiler_name = {'intel': 'intel', 'gnu': 'gcc'} # architecture. Used for WarpX executable name module_name = {'cpu': 'haswell', 'knl': 'mic-knl'} # architecture. Used in batch scripts module_Cname = {'cpu': 'haswell', 'knl': 'knl,quad,cache'} # Define environment variables cwd = os.getcwd() + '/' res_dir_base = os.environ['SCRATCH'] + '/performance_warpx/' bin_dir = cwd + 'Bin/' bin_name = 'perf_tests3d.' + args.compiler + '.' + module_name[args.architecture] + '.TPROF.MPI.OMP.ex' log_dir = cwd day = time.strftime('%d') month = time.strftime('%m') year = time.strftime('%Y') # Initialize tests # ---------------- if args.mode == 'run': # Set default options for compilation and execution config_command = '' config_command += 'module unload darshan;' config_command += 'module load craype-hugepages4M;' if args.architecture == 'knl': if args.compiler == 'intel': config_command += 'module unload PrgEnv-gnu;' config_command += 'module load PrgEnv-intel;' elif args.compiler == 'gnu': config_command += 'module unload PrgEnv-intel;' config_command += 'module load PrgEnv-gnu;' config_command += 'module unload craype-haswell;' config_command += 'module load craype-mic-knl;' elif args.architecture == 'cpu': if args.compiler == 'intel': config_command += 'module unload PrgEnv-gnu;' config_command += 'module load PrgEnv-intel;' elif args.compiler == 'gnu': config_command += 'module unload PrgEnv-intel;' config_command += 'module load PrgEnv-gnu;' config_command += 'module unload craype-mic-knl;' config_command += 'module load craype-haswell;' # Create main result directory if does not exist if not os.path.exists(res_dir_base): os.mkdir(res_dir_base) # Recompile if requested if args.recompile == True: with open(cwd + 'GNUmakefile_perftest') as makefile_handler: makefile_text = makefile_handler.read() makefile_text = re.sub('\nCOMP.*', '\nCOMP=%s' %compiler_name[args.compiler], makefile_text) with open(cwd + 'GNUmakefile_perftest', 'w') as makefile_handler: makefile_handler.write( makefile_text ) os.system(config_command + " make -f GNUmakefile_perftest realclean ; " + " rm -r tmp_build_dir *.mod; make -j 8 -f GNUmakefile_perftest") # Define functions to run a test and analyse results # -------------------------------------------------- # Run a performance test in an interactive allocation def run_interactive(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1): # Clean res_dir if os.path.exists(res_dir): shutil.rmtree(res_dir) os.makedirs(res_dir) # Copy files to res_dir shutil.copyfile(bin_dir + bin_name, res_dir + bin_name) shutil.copyfile(cwd + run_name, res_dir + 'inputs') os.chdir(res_dir) if args.architecture == 'cpu': cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\ 'srun --cpu_bind=cores ' + \ ' -n ' + str(n_node*n_mpi) + \ ' -c ' + str(cflag_value) + \ ' ./' + bin_name + ' inputs > perf_output.txt' elif args.architecture == 'knl': # number of logical cores per MPI process cflag_value = max(1,int(68/n_mpi) * 4) # Follow NERSC directives exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\ 'srun --cpu_bind=cores ' + \ ' -n ' + str(n_node*n_mpi) + \ ' -c ' + str(cflag_value) + \ ' ./' + bin_name + ' inputs > perf_output.txt' os.system('chmod 700 ' + bin_name) os.system(config_command + exec_command) return 0 def run_batch(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1): # Clean res_dir if os.path.exists(res_dir): shutil.rmtree(res_dir) os.makedirs(res_dir) # Copy files to res_dir shutil.copyfile(bin_dir + bin_name, res_dir + bin_name) shutil.copyfile(cwd + run_name, res_dir + 'inputs') os.chdir(res_dir) batch_string = '' batch_string += '#!/bin/bash\n' batch_string += '#SBATCH --job-name=' + run_name + str(n_node) + str(n_mpi) + str(n_omp) + '\n' batch_string += '#SBATCH --time=00:30:00\n' batch_string += '#SBATCH -C ' + module_Cname[args.architecture] + '\n' batch_string += '#SBATCH -N ' + str(n_node) + '\n' batch_string += '#SBATCH --partition=regular\n' batch_string += '#SBATCH --qos=normal\n' batch_string += '#SBATCH -e error.txt\n' batch_string += '#SBATCH --account=m2852\n' batch_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n' if args.architecture == 'cpu': cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives batch_string += 'srun --cpu_bind=cores '+ \ ' -n ' + str(n_node*n_mpi) + \ ' -c ' + str(cflag_value) + \ ' ./' + bin_name + ' inputs > perf_output.txt' elif args.architecture == 'knl': # number of logical cores per MPI process cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives batch_string += 'srun --cpu_bind=cores ' + \ ' -n ' + str(n_node*n_mpi) + \ ' -c ' + str(cflag_value) + \ ' ./' + bin_name + ' inputs > perf_output.txt\n' batch_file = 'slurm' f_exe = open(batch_file,'w') f_exe.write(batch_string) f_exe.close() os.system('chmod 700 ' + bin_name) os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt') return 0 # Read output file and return init time and 1-step time def read_run_perf(filename): timing_list = [] # Search inclusive time to get simulation step time partition_limit = 'NCalls Incl. Min Incl. Avg Incl. Max Max %' with open(filename) as file_handler: output_text = file_handler.read() # Get total simulation time line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text) total_time = float(line_match_totaltime.group(0).split()[8]) search_area = output_text.partition(partition_limit)[2] line_match_looptime = re.search('\nWarpX::Evolve().*', search_area) time_wo_initialization = float(line_match_looptime.group(0).split()[3]) timing_list += [str(total_time - time_wo_initialization)] timing_list += [str(time_wo_initialization/n_steps)] # Search exclusive time to get routines timing partition_limit1 = 'NCalls Excl. Min Excl. Avg Excl. Max Max %' partition_limit2 = 'NCalls Incl. Min Incl. Avg Incl. Max Max %' file_handler.close() with open(filename) as file_handler: output_text = file_handler.read() search_area = output_text.partition(partition_limit1)[2].partition(partition_limit2)[0] pattern_list = ['\nParticleContainer::Redistribute().*',\ '\nFabArray::FillBoundary().*',\ '\nFabArray::ParallelCopy().*',\ '\nPICSAR::CurrentDeposition.*',\ '\nPICSAR::FieldGather.*',\ '\nPICSAR::ParticlePush.*',\ '\nPPC::Evolve::Copy.*',\ '\nWarpX::EvolveEM().*',\ 'NArrayInt>::Checkpoint().*',\ 'NArrayInt>::WriteParticles().*',\ '\nVisMF::Write_FabArray.*',\ '\nWriteMultiLevelPlotfile().*'] for pattern in pattern_list: timing = '0' line_match = re.search(pattern, search_area) if line_match is not None: timing = [str(float(line_match.group(0).split()[3])/n_steps)] timing_list += timing return timing_list # Write time into logfile def write_perf_logfile(log_file): log_line = ' '.join([year, month, day, run_name, args.compiler,\ args.architecture, str(n_node), str(n_mpi),\ str(n_omp)] + timing_list + ['\n']) f_log = open(log_file, 'a') f_log.write(log_line) f_log.close() return 0 def get_nsteps(runname): with open(runname) as file_handler: runname_text = file_handler.read() line_match_nsteps = re.search('\nmax_step.*', runname_text) nsteps = float(line_match_nsteps.group(0).split()[2]) return nsteps def process_analysis(): dependencies = '' f_log = open(cwd + 'log_jobids_tmp.txt','r') for count, current_run in enumerate(test_list): line = f_log.readline() print(line) dependencies += line.split()[3] + ':' batch_string = '' batch_string += '#!/bin/bash\n' batch_string += '#SBATCH --job-name=perftests_read\n' batch_string += '#SBATCH --time=00:05:00\n' batch_string += '#SBATCH -C ' + module_Cname[args.architecture] + '\n' batch_string += '#SBATCH -N 1\n' batch_string += '#SBATCH -S 4\n' batch_string += '#SBATCH --partition=regular\n' batch_string += '#SBATCH --qos=normal\n' batch_string += '#SBATCH -e read_error.txt\n' batch_string += '#SBATCH -o read_output.txt\n' batch_string += '#SBATCH --mail-type=end\n' batch_string += '#SBATCH --account=m2852\n' batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + args.compiler + ' --architecture=' + args.architecture + ' --mode=read' + ' --log_file=' + log_file if args.commit == True: batch_string += ' --commit' batch_string += '\n' batch_file = 'slurm_perfread' f_exe = open(batch_file,'w') f_exe.write(batch_string) f_exe.close() os.system('chmod 700 ' + batch_file) os.system('sbatch --dependency afterok:' + dependencies[0:-1] + ' ' + batch_file) return 0 # Loop over the tests and return run time + details # ------------------------------------------------- # each element of test_list contains # [str runname, int n_node, int n_mpi PER NODE, int n_omp] test_list = [] n_repeat = 1 filename1 = 'uniform_plasma' test_list.extend([[filename1, 1, 8, 16]]*3) test_list.extend([[filename1, 1, 4, 32]]*3) test_list.extend([[filename1, 2, 4, 32]]*3) n_tests = len(test_list) if args.mode == 'run': # Remove file log_jobids_tmp.txt if exists. # This file contains the jobid of every perf test # It is used to manage the analysis script dependencies if os.path.isfile(cwd + 'log_jobids_tmp.txt'): os.remove(cwd + 'log_jobids_tmp.txt') for count, current_run in enumerate(test_list): # Results folder print('run ' + str(current_run)) run_name = current_run[0] n_node = current_run[1] n_mpi = current_run[2] n_omp = current_run[3] n_steps = get_nsteps(cwd + run_name) res_dir = res_dir_base res_dir += '_'.join([year, month, day, run_name, args.compiler,\ args.architecture, str(n_node), str(n_mpi),\ str(n_omp)]) + '/' # Run the simulation. # If you are currently in an interactive session and want to run interactive, # just replace run_batch with run_interactive run_batch(run_name, res_dir, n_node=n_node, n_mpi=n_mpi, n_omp=n_omp) os.chdir(cwd) process_analysis() if args.mode == 'read': # Create log_file for performance tests if does not exist if not os.path.isfile(log_dir + log_file): log_line = '## year month day run_name compiler architecture n_node n_mpi ' +\ 'n_omp time_initialization time_one_iteration Redistribute '+\ 'FillBoundary ParallelCopy CurrentDeposition FieldGather '+\ 'ParthiclePush Copy EvolveEM Checkpoint '+\ 'WriteParticles Write_FabArray '+\ 'WriteMultiLevelPlotfile(unit: second)\n' f_log = open(log_dir + log_file, 'a') f_log.write(log_line) f_log.close() for count, current_run in enumerate(test_list): # Results folder print('read ' + str(current_run)) run_name = current_run[0] n_node = current_run[1] n_mpi = current_run[2] n_omp = current_run[3] n_steps = get_nsteps(cwd + run_name) print('n_steps = ' + str(n_steps)) res_dir = res_dir_base res_dir += '_'.join([year, month, day, run_name, args.compiler,\ args.architecture, str(n_node), str(n_mpi),\ str(n_omp)]) + '/' # Read performance data from the output file timing_list = read_run_perf(res_dir + 'perf_output.txt') # Write performance data to the performance log file write_perf_logfile(log_dir + log_file) # Store test parameters fot record dir_record_base = './perf_warpx_record/' if not os.path.exists(dir_record_base): os.mkdir(dir_record_base) count = 0 dir_record = dir_record_base + '_'.join([year, month, day]) + '_0' while os.path.exists(dir_record): count += 1 dir_record = dir_record[:-1] + str(count) os.mkdir(dir_record) shutil.copy(__file__, dir_record) shutil.copy(log_dir + log_file, dir_record) for count, current_run in enumerate(test_list): shutil.copy(current_run[0], dir_record) # Commit results to the Repo if args.commit == True: os.system('git add ' + log_dir + log_file + ';'\ 'git commit -m "performance tests";'\ 'git push -u origin master')