1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
|
# Copyright 2018-2019 Axel Huebl, Luca Fedeli, Maxence Thevenet
# Remi Lehe
#
# This file is part of WarpX.
#
# License: BSD-3-Clause-LBNL
import os, shutil, re, copy
import pandas as pd
import numpy as np
import git
# import cori
# import summit
# Each instance of this class contains information for a single test.
class test_element():
def __init__(self, input_file=None, n_node=None, n_mpi_per_node=None,
n_omp=None, n_cell=None, n_step=None, max_grid_size=None,
blocking_factor=None):
self.input_file = input_file
self.n_node = n_node
self.n_mpi_per_node = n_mpi_per_node
self.n_omp = n_omp
self.n_cell = n_cell
self.n_step = n_step
self.max_grid_size = max_grid_size
self.blocking_factor = blocking_factor
def scale_n_cell(self, n_node=0):
n_cell_scaled = copy.deepcopy(self.n_cell)
index_dim = 0
while n_node > 1:
n_cell_scaled[index_dim] *= 2
n_node /= 2
index_dim = (index_dim+1) % 3
self.n_cell = n_cell_scaled
def scale_n_cell(ncell, n_node):
ncell_scaled = ncell[:]
index_dim = 0
while n_node > 1:
ncell_scaled[index_dim] *= 2
n_node /= 2
index_dim = (index_dim+1) % 3
return ncell_scaled
def store_git_hash(repo_path=None, filename=None, name=None):
repo = git.Repo(path=repo_path)
sha = repo.head.object.hexsha
file_handler = open( filename, 'a+' )
file_handler.write( name + ':' + sha + ' ')
file_handler.close()
def get_file_content(filename=None):
file_handler = open( filename, 'r' )
file_content = file_handler.read()
file_handler.close()
return file_content
def run_batch(run_name, res_dir, bin_name, config_command, architecture='knl',\
Cname='knl', n_node=1, n_mpi=1, n_omp=1):
# Clean res_dir
if os.path.exists(res_dir):
shutil.rmtree(res_dir)
os.makedirs(res_dir)
# Copy files to res_dir
cwd = os.environ['WARPX'] + '/Tools/PerformanceTests/'
bin_dir = cwd + 'Bin/'
shutil.copy(bin_dir + bin_name, res_dir)
shutil.copyfile(cwd + run_name, res_dir + 'inputs')
os.chdir(res_dir)
batch_string = ''
batch_string += '#!/bin/bash\n'
batch_string += '#SBATCH --job-name=' + run_name + str(n_node) + str(n_mpi) + str(n_omp) + '\n'
batch_string += '#SBATCH --time=00:23:00\n'
batch_string += '#SBATCH -C ' + Cname + '\n'
batch_string += '#SBATCH -N ' + str(n_node) + '\n'
batch_string += '#SBATCH -q regular\n'
batch_string += '#SBATCH -e error.txt\n'
batch_string += '#SBATCH --account=m2852\n'
batch_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n'
if architecture == 'cpu':
cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives
batch_string += 'srun --cpu_bind=cores '+ \
' -n ' + str(n_node*n_mpi) + \
' -c ' + str(cflag_value) + \
' ./' + bin_name + ' inputs > perf_output.txt'
elif architecture == 'knl':
# number of logical cores per MPI process
cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives
batch_string += 'srun --cpu_bind=cores ' + \
' -n ' + str(n_node*n_mpi) + \
' -c ' + str(cflag_value) + \
' ./' + bin_name + ' inputs > perf_output.txt\n'
batch_file = 'slurm'
f_exe = open(batch_file,'w')
f_exe.write(batch_string)
f_exe.close()
os.system('chmod 700 ' + bin_name)
os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt')
return 0
def run_batch_nnode(test_list, res_dir, cwd, bin_name, config_command, batch_string, submit_job_command):
# Clean res_dir
if os.path.exists(res_dir):
shutil.rmtree(res_dir, ignore_errors=True)
os.makedirs(res_dir)
# Copy files to res_dir
bin_dir = cwd + 'Bin/'
shutil.copy(bin_dir + bin_name, res_dir)
os.chdir(res_dir)
for count, current_test in enumerate(test_list):
shutil.copy(cwd + current_test.input_file, res_dir)
batch_file = 'batch_script.sh'
f_exe = open(batch_file,'w')
f_exe.write(batch_string)
f_exe.close()
os.system('chmod 700 ' + bin_name)
os.system(config_command + submit_job_command + batch_file +\
' >> ' + cwd + 'log_jobids_tmp.txt')
# Read output file and return init time and 1-step time
def read_run_perf(filename, n_steps):
timing_list = []
# Search inclusive time to get simulation step time
partition_limit = 'NCalls Incl. Min Incl. Avg Incl. Max Max %'
with open(filename) as file_handler:
output_text = file_handler.read()
# Get total simulation time
line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text)
total_time = float(line_match_totaltime.group(0).split()[8])
search_area = output_text.partition(partition_limit)[2]
line_match_looptime = re.search('\nWarpX::Evolve().*', search_area)
time_wo_initialization = float(line_match_looptime.group(0).split()[3])
timing_list += [str(total_time - time_wo_initialization)]
timing_list += [str(time_wo_initialization/n_steps)]
partition_limit1 = 'NCalls Excl. Min Excl. Avg Excl. Max Max %'
partition_limit2 = 'NCalls Incl. Min Incl. Avg Incl. Max Max %'
file_handler.close()
with open(filename) as file_handler:
output_text = file_handler.read()
# Search EXCLISUSIVE routine timings
search_area = output_text.partition(partition_limit1)[2].partition(partition_limit2)[0]
pattern_list = ['\nParticleContainer::Redistribute().*',\
'\nFabArray::FillBoundary().*',\
'\nFabArray::ParallelCopy().*',\
'\nPPC::CurrentDeposition.*',\
'\nPPC::FieldGather.*',\
'\nPPC::ParticlePush.*',\
'\nPPC::Evolve::Copy.*',\
'\nWarpX::EvolveEM().*',\
'Checkpoint().*',\
'WriteParticles().*',\
'\nVisMF::Write(FabArray).*',\
'\nWriteMultiLevelPlotfile().*',\
'\nParticleContainer::RedistributeMPI().*']
for pattern in pattern_list:
timing = '0'
line_match = re.search(pattern, search_area)
if line_match is not None:
timing = [str(float(line_match.group(0).split()[3])/n_steps)]
timing_list += timing
return timing_list
# Write time into logfile
def write_perf_logfile(log_file, log_line):
f_log = open(log_file, 'a')
f_log.write(log_line)
f_log.close()
return 0
def get_nsteps(run_name):
with open(run_name) as file_handler:
run_name_text = file_handler.read()
line_match_nsteps = re.search('\nmax_step.*', run_name_text)
nsteps = float(line_match_nsteps.group(0).split()[2])
return nsteps
def extract_dataframe(filename, n_steps):
# Get init time and total time through Inclusive time
partition_limit_start = 'NCalls Incl. Min Incl. Avg Incl. Max Max %'
print(filename)
with open(filename) as file_handler:
output_text = file_handler.read()
# get total simulation time
line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text)
total_time = float(line_match_totaltime.group(0).split()[8])
# get time performing steps as Inclusive WarpX::Evolve() time
search_area = output_text.partition(partition_limit_start)[2]
line_match_looptime = re.search('\nWarpX::Evolve().*', search_area)
time_wo_initialization = float(line_match_looptime.group(0).split()[3])
# New, might break something
line_match_WritePlotFile = re.search('\nDiagnostics::FilterComputePackFlush().*', search_area)
if line_match_WritePlotFile is not None:
time_WritePlotFile = float(line_match_WritePlotFile.group(0).split()[3])
else:
time_WritePlotFile = 0.
# Get timers for all routines
# Where to start and stop in the output_file
partition_limit_start = 'NCalls Excl. Min Excl. Avg Excl. Max Max %'
partition_limit_end = 'NCalls Incl. Min Incl. Avg Incl. Max Max %'
# Put file content in a string
with open(filename) as file_handler:
output_text = file_handler.read()
# Keep only profiling data
search_area = output_text.partition(partition_limit_start)[2]\
.partition(partition_limit_end)[0]
list_string = search_area.split('\n')[2:-4]
time_array = np.zeros(len(list_string))
column_list= []
for i in np.arange(len(list_string)):
column_list.append(list_string[i].split()[0])
time_array[i] = float(list_string[i].split()[3])
df = pd.DataFrame(columns=column_list)
df.loc[0] = time_array
df['time_initialization'] = total_time - time_wo_initialization
df['time_running'] = time_wo_initialization
df['time_WritePlotFile'] = time_WritePlotFile
# df['string_output'] = partition_limit_start + '\n' + search_area
return df
# Run a performance test in an interactive allocation
# def run_interactive(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1):
# # Clean res_dir #
# if os.path.exists(res_dir):
# shutil.rmtree(res_dir)
# os.makedirs(res_dir)
# # Copy files to res_dir #
# shutil.copyfile(bin_dir + bin_name, res_dir + bin_name)
# shutil.copyfile(cwd + run_name, res_dir + 'inputs')
# os.chdir(res_dir)
# if args.architecture == 'cpu':
# cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives #
# exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
# 'srun --cpu_bind=cores ' + \
# ' -n ' + str(n_node*n_mpi) + \
# ' -c ' + str(cflag_value) + \
# ' ./' + bin_name + ' inputs > perf_output.txt'
# elif args.architecture == 'knl':
# # number of logical cores per MPI process #
# cflag_value = max(1,int(68/n_mpi) * 4) # Follow NERSC directives #
# exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
# 'srun --cpu_bind=cores ' + \
# ' -n ' + str(n_node*n_mpi) + \
# ' -c ' + str(cflag_value) + \
# ' ./' + bin_name + ' inputs > perf_output.txt'
# os.system('chmod 700 ' + bin_name)
# os.system(config_command + exec_command)
# return 0
|