1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
|
import os, sys, shutil, datetime, git
import argparse, re, time, copy
import pandas as pd
from functions_perftest import store_git_hash, get_file_content, \
run_batch_nnode, extract_dataframe
# Get name of supercomputer and import configuration functions from
# machine-specific file
if os.getenv("LMOD_SYSTEM_NAME") == 'summit':
machine = 'summit'
from summit import executable_name, process_analysis, \
get_config_command, time_min, get_submit_job_command, \
get_batch_string, get_run_string, get_test_list
if os.getenv("NERSC_HOST") == 'cori':
machine = 'cori'
from cori import executable_name, process_analysis, \
get_config_command, time_min, get_submit_job_command, \
get_batch_string, get_run_string, get_test_list
# typical use: python run_automated.py --n_node_list='1,8,16,32' --automated
# Assume warpx, picsar, amrex and perf_logs repos ar in the same directory and
# environment variable AUTOMATED_PERF_TESTS contains the path to this directory
# requirements:
# - python packages: gitpython and pandas
# - AUTOMATED_PERF_TESTS: environment variables where warpx,
# amrex and picsar are installed ($AUTOMATED_PERF_TESTS/warpx etc.)
# - SCRATCH: environment variable where performance results are written.
# This script will create folder $SCRATCH/performance_warpx/
if "AUTOMATED_PERF_TESTS" not in os.environ:
raise ValueError("environment variable AUTOMATED_PERF_TESTS is not defined.\n"
"It should contain the path to the directory where WarpX, "
"AMReX and PICSAR repos are.")
if "SCRATCH" not in os.environ:
raise ValueError("environment variable SCRATCH is not defined.\n"
"This script will create $SCRATCH/performance_warpx/ "
"to store performance results.")
# Handle parser
###############
parser = argparse.ArgumentParser( description='Run performance tests and write results in files' )
parser.add_argument('--recompile',
dest='recompile',
action='store_true',
default=False)
parser.add_argument('--commit',
dest='commit',
action='store_true',
default=False)
parser.add_argument('--automated',
dest='automated',
action='store_true',
default=False,
help='Use to run the automated test list')
parser.add_argument('--n_node_list',
dest='n_node_list',
default=[],
help='list ofnumber of nodes for the runs', type=str)
parser.add_argument('--start_date',
dest='start_date' )
parser.add_argument('--compiler',
choices=['gnu', 'intel', 'pgi'],
default='intel',
help='which compiler to use')
parser.add_argument('--architecture',
choices=['cpu', 'knl', 'gpu'],
default='knl',
help='which architecture to cross-compile for NERSC machines')
parser.add_argument('--mode',
choices=['run', 'read', 'browse_output_files', 'write_csv'],
default='run',
help='whether to run perftests or read their perf output. run calls read')
args = parser.parse_args()
n_node_list_string = args.n_node_list.split(',')
n_node_list = [int(i) for i in n_node_list_string]
start_date = args.start_date
compiler = args.compiler
architecture = args.architecture
# Set behavior variables
########################
write_csv = False
browse_output_files = False
if args.mode == 'write_csv':
write_csv = True
if args.mode == 'browse_output_files':
browse_output_file = True
if args.mode == 'read':
write_csv = True
browse_output_files = True
recompile = args.recompile
perf_database_file = 'my_tests_database.h5'
if args.automated == True:
run_name = 'automated_tests'
perf_database_file = 'automated_tests_database.h5'
rename_archive = True
store_full_input = False
update_perf_log_repo = True
push_on_perf_log_repo = False
pull_3_repos = True
recompile = True
if machine == 'summit':
compiler = 'pgi'
architecture = 'gpu'
# List of tests to perform
# ------------------------
# Each test runs n_repeat times
n_repeat = 2
# test_list is machine-specific
test_list = get_test_list(n_repeat)
# Define directories
# ------------------
source_dir_base = os.environ['AUTOMATED_PERF_TESTS']
warpx_dir = source_dir_base + '/warpx/'
picsar_dir = source_dir_base + '/picsar/'
amrex_dir = source_dir_base + '/amrex/'
res_dir_base = os.environ['SCRATCH'] + '/performance_warpx/'
perf_logs_repo = source_dir_base + 'perf_logs/'
# Define dictionaries
# -------------------
compiler_name = {'intel': 'intel', 'gnu': 'gcc', 'pgi':'pgi'}
module_Cname = {'cpu': 'haswell', 'knl': 'knl,quad,cache', 'gpu':''}
csv_file = {'cori':'cori_knl.csv', 'summit':'summit.csv'}
cwd = os.getcwd() + '/'
bin_dir = cwd + 'Bin/'
bin_name = executable_name(compiler, architecture)
log_dir = cwd
perf_database_file = cwd + perf_database_file
day = time.strftime('%d')
month = time.strftime('%m')
year = time.strftime('%Y')
# Initialize tests
# ----------------
if args.mode == 'run':
start_date = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
# Set default options for compilation and execution
config_command = get_config_command(compiler, architecture)
# Create main result directory if does not exist
if not os.path.exists(res_dir_base):
os.mkdir(res_dir_base)
# Recompile if requested
# ----------------------
if recompile == True:
if pull_3_repos == True:
git_repo = git.cmd.Git( picsar_dir )
git_repo.pull()
git_repo = git.cmd.Git( amrex_dir )
git_repo.pull()
git_repo = git.cmd.Git( warpx_dir )
git_repo.pull()
# Copy WarpX/GNUmakefile to current directory and recompile
# with specific options for automated performance tests.
# This way, performance test compilation does not mess with user's
# compilation
shutil.copyfile("../../GNUmakefile","./GNUmakefile")
make_realclean_command = " make realclean WARPX_HOME=../.. " \
"AMREX_HOME=../../../amrex/ PICSAR_HOME=../../../picsar/ " \
"EBASE=perf_tests COMP=%s" %compiler_name[compiler] + ";"
make_command = "make -j 16 WARPX_HOME=../.. " \
"AMREX_HOME=../../../amrex/ PICSAR_HOME=../../../picsar/ " \
"EBASE=perf_tests COMP=%s" %compiler_name[compiler]
if machine == 'summit':
make_command += ' USE_GPU=TRUE '
os.system(config_command + make_realclean_command + \
"rm -r tmp_build_dir *.mod; " + make_command )
# Store git hashes for WarpX, AMReX and PICSAR into file, so that
# they can be read when running the analysis.
if os.path.exists( cwd + 'store_git_hashes.txt' ):
os.remove( cwd + 'store_git_hashes.txt' )
store_git_hash(repo_path=picsar_dir, filename=cwd + 'store_git_hashes.txt', name='picsar')
store_git_hash(repo_path=amrex_dir , filename=cwd + 'store_git_hashes.txt', name='amrex' )
store_git_hash(repo_path=warpx_dir , filename=cwd + 'store_git_hashes.txt', name='warpx' )
# Loop over the tests and run all simulations:
# One batch job submitted per n_node. Several
# tests run within the same batch job.
# --------------------------------------------
if args.mode == 'run':
if os.path.exists( 'log_jobids_tmp.txt' ):
os.remove( 'log_jobids_tmp.txt' )
# loop on n_node. One batch script per n_node
for n_node in n_node_list:
res_dir = res_dir_base
res_dir += '_'.join([run_name, compiler, architecture, str(n_node)]) + '/'
runtime_param_list = []
# Deep copy as we change the attribute n_cell of
# each instance of class test_element
test_list_n_node = copy.deepcopy(test_list)
job_time_min = time_min(len(test_list))
batch_string = get_batch_string(test_list_n_node, job_time_min, module_Cname[architecture], n_node)
# Loop on tests
for count, current_run in enumerate(test_list_n_node):
current_run.scale_n_cell(n_node)
runtime_param_string = ' amr.n_cell=' + ' '.join(str(i) for i in current_run.n_cell)
runtime_param_string += ' amr.max_grid_size=' + str(current_run.max_grid_size)
runtime_param_string += ' amr.blocking_factor=' + str(current_run.blocking_factor)
runtime_param_string += ' max_step=' + str( current_run.n_step )
# runtime_param_list.append( runtime_param_string )
run_string = get_run_string(current_run, architecture, n_node, count, bin_name, runtime_param_string)
batch_string += run_string
batch_string += 'rm -rf plotfiles lab_frame_data diags\n'
submit_job_command = get_submit_job_command()
# Run the simulations.
run_batch_nnode(test_list_n_node, res_dir, bin_name, config_command, batch_string, submit_job_command)
os.chdir(cwd)
# submit batch for analysis
if os.path.exists( 'read_error.txt' ):
os.remove( 'read_error.txt' )
if os.path.exists( 'read_output.txt' ):
os.remove( 'read_output.txt' )
process_analysis(args.automated, cwd, compiler, architecture, args.n_node_list, start_date)
# read the output file from each test and store timers in
# hdf5 file with pandas format
# -------------------------------------------------------
for n_node in n_node_list:
print(n_node)
if browse_output_files:
res_dir = res_dir_base
res_dir += '_'.join([run_name, compiler,\
architecture, str(n_node)]) + '/'
for count, current_run in enumerate(test_list):
# Read performance data from the output file
output_filename = 'out_' + '_'.join([current_run.input_file, str(n_node), str(current_run.n_mpi_per_node), str(current_run.n_omp), str(count)]) + '.txt'
# Read data for all test to put in hdf5 a database
# This is an hdf5 file containing ALL the simulation
# parameters and results. Might be too large for a repo
df_newline = extract_dataframe(res_dir + output_filename, current_run.n_step)
# Add all simulation parameters to the dataframe
df_newline['git_hashes'] = get_file_content(filename=cwd+'store_git_hashes.txt')
df_newline['start_date'] = start_date
df_newline['run_name'] = run_name
df_newline['input_file'] = current_run.input_file
df_newline['n_node'] = n_node
df_newline['n_mpi_per_node'] = current_run.n_mpi_per_node
df_newline['n_omp'] = current_run.n_omp
df_newline['n_steps'] = current_run.n_step
df_newline['rep'] = count%n_repeat
df_newline['date'] = datetime.datetime.now()
if store_full_input:
df_newline['inputs_content'] = get_file_content( filename=cwd+current_run.input_file )
# Load file perf_database_file if exists, and
# append with results from this scan
if os.path.exists(perf_database_file):
# df_base = pd.read_hdf(perf_database_file, 'all_data', format='table')
df_base = pd.read_hdf(perf_database_file, 'all_data')
updated_df = df_base.append(df_newline, ignore_index=True)
else:
updated_df = df_newline
# Write dataframe to file perf_database_file
# (overwrite if file exists)
updated_df.to_hdf(perf_database_file, key='all_data', mode='w')
# Extract sub-set of pandas data frame, write it to
# csv file and copy this file to perf_logs repo
# -------------------------------------------------
if write_csv:
# Extract small data from data frame and write them to
# First, generate csv files
df = pd.read_hdf( perf_database_file )
# One large file
df.loc[:,'step_time'] = pd.Series(df['time_running']/df['n_steps'], index=df.index)
# Make smaller dataframe with only data to be written to csv file
df_small = df.copy()
df_small.loc[ df_small['input_file']=='automated_test_6_output_2ppc', 'step_time'] = \
df_small[ df_small['input_file']=='automated_test_6_output_2ppc' ]['time_WritePlotFile']
df_small = df_small.loc[:, ['date', 'input_file', 'git_hashes', 'n_node', 'n_mpi_per_node', 'n_omp', 'rep', 'start_date', 'time_initialization', 'step_time'] ]
# Write to csv
df_small.to_csv( csv_file[machine] )
# Errors may occur depending on the version of pandas. I had errors with v0.21.0 solved with 0.23.0
# Second, move files to perf_logs repo
if update_perf_log_repo:
# get perf_logs repo
git_repo = git.Repo( perf_logs_repo )
if push_on_perf_log_repo:
git_repo.git.stash('save')
git_repo.git.pull()
# move csv file to perf_logs repon and commit the new version
shutil.move( csv_file[machine], perf_logs_repo + '/logs_csv/' + csv_file[machine] )
os.chdir( perf_logs_repo )
sys.path.append('./')
import generate_index_html
git_repo.git.add('./index.html')
git_repo.git.add('./logs_csv/' + csv_file[machine])
index = git_repo.index
index.commit("automated tests")
# Rename all result directories for archiving purposes:
# include date in the name, and a counter to avoid over-writing
for n_node in n_node_list:
if browse_output_files:
res_dir = res_dir_base
res_dir += '_'.join([run_name, compiler,\
architecture, str(n_node)]) + '/'
# Rename directory with precise date+hour for archive purpose
if rename_archive == True:
loc_counter = 0
res_dir_arch = res_dir_base
res_dir_arch += '_'.join([year, month, day, run_name, compiler,\
architecture, str(n_node), str(loc_counter)]) + '/'
while os.path.exists( res_dir_arch ):
loc_counter += 1
res_dir_arch = res_dir_base
res_dir_arch += '_'.join([year, month, day, run_name, compiler,\
architecture, str(n_node), str(loc_counter)]) + '/'
print("renaming " + res_dir + " -> " + res_dir_arch)
os.rename( res_dir, res_dir_arch )
|