diff options
Diffstat (limited to 'Tools/performance_tests/functions_perftest.py')
-rw-r--r-- | Tools/performance_tests/functions_perftest.py | 189 |
1 files changed, 189 insertions, 0 deletions
diff --git a/Tools/performance_tests/functions_perftest.py b/Tools/performance_tests/functions_perftest.py new file mode 100644 index 000000000..2085367c7 --- /dev/null +++ b/Tools/performance_tests/functions_perftest.py @@ -0,0 +1,189 @@ +import os, shutil, re + +def run_batch_nnode(test_list, res_dir, bin_name, config_command, architecture='knl', Cname='knl', n_node=1): + # Clean res_dir + if os.path.exists(res_dir): + shutil.rmtree(res_dir) + os.makedirs(res_dir) + # Copy files to res_dir + cwd = os.environ['WARPX'] + '/Tools/performance_tests/' + bin_dir = cwd + 'Bin/' + shutil.copy(bin_dir + bin_name, res_dir) + os.chdir(res_dir) + # Calculate simulation time. Take 10 min + 10 min / simulation + job_time_min = 5. + len(test_list)*5. + job_time_str = str(int(job_time_min/60)) + ':' + str(int(job_time_min%60)) + ':00' + batch_string = '' + batch_string += '#!/bin/bash\n' + batch_string += '#SBATCH --job-name=' + test_list[0][0] + '\n' + batch_string += '#SBATCH --time=' + job_time_str + '\n' + batch_string += '#SBATCH -C ' + Cname + '\n' + batch_string += '#SBATCH -N ' + str(n_node) + '\n' + batch_string += '#SBATCH -q regular\n' + batch_string += '#SBATCH -e error.txt\n' + batch_string += '#SBATCH --account=m2852\n' + + for count, test_item in enumerate(test_list): + # test_item reads [input_file, int n_mpi, int n_omp] + input_file = test_item[0]; + shutil.copy(cwd + input_file, res_dir) + # test_item[1] is not read since it contain the number of node, which is an + # global parameter. However, we keep it for compatibility with run_alltests.py + n_mpi = test_item[2] + n_omp = test_item[3] + srun_string = '' + srun_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n' + # number of logical cores per MPI process + if architecture == 'cpu': + cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives + elif architecture == 'knl': + cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives + output_filename = 'out_' + '_'.join([input_file, str(n_node), str(n_mpi), str(n_omp), str(count)]) + '.txt' + srun_string += 'srun --cpu_bind=cores '+ \ + ' -n ' + str(n_node*n_mpi) + \ + ' -c ' + str(cflag_value) + \ + ' ./' + bin_name + \ + ' ' + input_file + \ + ' > ' + output_filename + '\n' + batch_string += srun_string + batch_string += 'rm -rf plt*\n' + batch_string += 'rm -rf chk*\n' + batch_string += 'rm -rf lab_frame_data\n' + batch_file = 'slurm' + f_exe = open(batch_file,'w') + f_exe.write(batch_string) + f_exe.close() + os.system('chmod 700 ' + bin_name) + os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt') + return 0 + +def run_batch(run_name, res_dir, bin_name, config_command, architecture='knl',\ + Cname='knl', n_node=1, n_mpi=1, n_omp=1): + # Clean res_dir + if os.path.exists(res_dir): + shutil.rmtree(res_dir) + os.makedirs(res_dir) + # Copy files to res_dir + # Copy files to res_dir + cwd = os.environ['WARPX'] + '/Tools/performance_tests/' + bin_dir = cwd + 'Bin/' + shutil.copy(bin_dir + bin_name, res_dir) + shutil.copyfile(cwd + run_name, res_dir + 'inputs') + os.chdir(res_dir) + batch_string = '' + batch_string += '#!/bin/bash\n' + batch_string += '#SBATCH --job-name=' + run_name + str(n_node) + str(n_mpi) + str(n_omp) + '\n' + batch_string += '#SBATCH --time=00:20:00\n' + batch_string += '#SBATCH -C ' + Cname + '\n' + batch_string += '#SBATCH -N ' + str(n_node) + '\n' + batch_string += '#SBATCH -q regular\n' + batch_string += '#SBATCH -e error.txt\n' + batch_string += '#SBATCH --account=m2852\n' + batch_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n' + if architecture == 'cpu': + cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives + batch_string += 'srun --cpu_bind=cores '+ \ + ' -n ' + str(n_node*n_mpi) + \ + ' -c ' + str(cflag_value) + \ + ' ./' + bin_name + ' inputs > perf_output.txt' + elif architecture == 'knl': + # number of logical cores per MPI process + cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives + batch_string += 'srun --cpu_bind=cores ' + \ + ' -n ' + str(n_node*n_mpi) + \ + ' -c ' + str(cflag_value) + \ + ' ./' + bin_name + ' inputs > perf_output.txt\n' + batch_file = 'slurm' + f_exe = open(batch_file,'w') + f_exe.write(batch_string) + f_exe.close() + os.system('chmod 700 ' + bin_name) + os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt') + return 0 + +# Read output file and return init time and 1-step time +def read_run_perf(filename, n_steps): + timing_list = [] + # Search inclusive time to get simulation step time + partition_limit = 'NCalls Incl. Min Incl. Avg Incl. Max Max %' + with open(filename) as file_handler: + output_text = file_handler.read() + # Get total simulation time + line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text) + total_time = float(line_match_totaltime.group(0).split()[8]) + search_area = output_text.partition(partition_limit)[2] + line_match_looptime = re.search('\nWarpX::Evolve().*', search_area) + time_wo_initialization = float(line_match_looptime.group(0).split()[3]) + timing_list += [str(total_time - time_wo_initialization)] + timing_list += [str(time_wo_initialization/n_steps)] + partition_limit1 = 'NCalls Excl. Min Excl. Avg Excl. Max Max %' + partition_limit2 = 'NCalls Incl. Min Incl. Avg Incl. Max Max %' + file_handler.close() + with open(filename) as file_handler: + output_text = file_handler.read() + # Search EXCLISUSIVE routine timings + search_area = output_text.partition(partition_limit1)[2].partition(partition_limit2)[0] + pattern_list = ['\nParticleContainer::Redistribute().*',\ + '\nFabArray::FillBoundary().*',\ + '\nFabArray::ParallelCopy().*',\ + '\nPICSAR::CurrentDeposition.*',\ + '\nPICSAR::FieldGather.*',\ + '\nPICSAR::ParticlePush.*',\ + '\nPPC::Evolve::Copy.*',\ + '\nWarpX::EvolveEM().*',\ + 'Checkpoint().*',\ + 'WriteParticles().*',\ + '\nVisMF::Write(FabArray).*',\ + '\nWriteMultiLevelPlotfile().*',\ + '\nParticleContainer::RedistributeMPI().*'] + for pattern in pattern_list: + timing = '0' + line_match = re.search(pattern, search_area) + if line_match is not None: + timing = [str(float(line_match.group(0).split()[3])/n_steps)] + timing_list += timing + return timing_list + +# Write time into logfile +def write_perf_logfile(log_file, log_line): + f_log = open(log_file, 'a') + f_log.write(log_line) + f_log.close() + return 0 + +def get_nsteps(run_name): + with open(run_name) as file_handler: + run_name_text = file_handler.read() + line_match_nsteps = re.search('\nmax_step.*', run_name_text) + nsteps = float(line_match_nsteps.group(0).split()[2]) + return nsteps + + +# Run a performance test in an interactive allocation +# def run_interactive(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1): +# # Clean res_dir # +# if os.path.exists(res_dir): +# shutil.rmtree(res_dir) +# os.makedirs(res_dir) +# # Copy files to res_dir # +# shutil.copyfile(bin_dir + bin_name, res_dir + bin_name) +# shutil.copyfile(cwd + run_name, res_dir + 'inputs') +# os.chdir(res_dir) +# if args.architecture == 'cpu': +# cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives # +# exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\ +# 'srun --cpu_bind=cores ' + \ +# ' -n ' + str(n_node*n_mpi) + \ +# ' -c ' + str(cflag_value) + \ +# ' ./' + bin_name + ' inputs > perf_output.txt' +# elif args.architecture == 'knl': +# # number of logical cores per MPI process # +# cflag_value = max(1,int(68/n_mpi) * 4) # Follow NERSC directives # +# exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\ +# 'srun --cpu_bind=cores ' + \ +# ' -n ' + str(n_node*n_mpi) + \ +# ' -c ' + str(cflag_value) + \ +# ' ./' + bin_name + ' inputs > perf_output.txt' +# os.system('chmod 700 ' + bin_name) +# os.system(config_command + exec_command) +# return 0 |