1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
|
# Copyright 2019 Axel Huebl, Luca Fedeli, Maxence Thevenet
#
#
# This file is part of WarpX.
#
# License: BSD-3-Clause-LBNL
import copy
import os
from functions_perftest import test_element
module_name = {'cpu': 'haswell.', 'knl': 'mic-knl.', 'gpu':'.'}
def executable_name(compiler, architecture):
return 'perf_tests3d.' + compiler + \
'.' + module_name[architecture] + 'TPROF.MTMPI.OMP.QED.ex'
def get_config_command(compiler, architecture):
config_command = ''
config_command += 'module unload darshan;'
if architecture == 'knl':
if compiler == 'intel':
config_command += 'module unload PrgEnv-gnu;'
config_command += 'module load PrgEnv-intel;'
elif compiler == 'gnu':
config_command += 'module unload PrgEnv-intel;'
config_command += 'module load PrgEnv-gnu;'
config_command += 'module unload craype-haswell;'
config_command += 'module load craype-mic-knl;'
elif architecture == 'cpu':
if compiler == 'intel':
config_command += 'module unload PrgEnv-gnu;'
config_command += 'module load PrgEnv-intel;'
elif compiler == 'gnu':
config_command += 'module unload PrgEnv-intel;'
config_command += 'module load PrgEnv-gnu;'
config_command += 'module unload craype-mic-knl;'
config_command += 'module load craype-haswell;'
return config_command
# This function runs a batch script with
# dependencies to perform the analysis
# after all performance tests are done.
def process_analysis(automated, cwd, compiler, architecture, n_node_list, start_date, path_source, path_results):
dependencies = ''
f_log = open(cwd + 'log_jobids_tmp.txt' ,'r')
for line in f_log.readlines():
dependencies += line.split()[3] + ':'
batch_string = '''#!/bin/bash
#SBATCH --job-name=warpx_1node_read
#SBATCH --time=00:07:00
#SBATCH -C knl
#SBATCH -N 1
#SBATCH -S 4
#SBATCH -q regular
#SBATCH -e read_error.txt
#SBATCH -o read_output.txt
#SBATCH --mail-type=end
#SBATCH --account=m2852
module load h5py-parallel
'''
batch_string += 'python run_automated.py --compiler=' + \
compiler + ' --architecture=' + architecture + \
' --mode=read' + \
' --n_node_list=' + '"' + n_node_list + '"' + \
' --start_date=' + start_date + \
' --path_source=' + path_source + \
' --path_results=' + path_results
if automated == True:
batch_string += ' --automated'
batch_string += '\n'
batch_file = 'slurm_perfread'
f_exe = open(batch_file,'w')
f_exe.write(batch_string)
f_exe.close()
os.system('chmod 700 ' + batch_file)
print( 'process_analysis line: ' + 'sbatch --dependency afterok:' + dependencies[0:-1] + ' ' + batch_file)
os.system('sbatch --dependency afterok:' + dependencies[0:-1] + ' ' + batch_file)
# Calculate simulation time. Take 5 min + 5 min / simulation
def time_min(nb_simulations):
return 5. + nb_simulations*5.
def get_submit_job_command():
return ' sbatch '
def get_batch_string(test_list, job_time_min, Cname, n_node):
job_time_str = str(int(job_time_min/60)) + ':' + str(int(job_time_min%60)) + ':00'
batch_string = ''
batch_string += '#!/bin/bash\n'
batch_string += '#SBATCH --job-name=' + test_list[0].input_file + '\n'
batch_string += '#SBATCH --time=' + job_time_str + '\n'
batch_string += '#SBATCH -C ' + Cname + '\n'
batch_string += '#SBATCH -N ' + str(n_node) + '\n'
batch_string += '#SBATCH -q regular\n'
batch_string += '#SBATCH -e error.txt\n'
batch_string += '#SBATCH --account=m2852\n'
batch_string += 'module unload PrgEnv-gnu\n'
batch_string += 'module load PrgEnv-intel\n'
return batch_string
def get_run_string(current_test, architecture, n_node, count, bin_name, runtime_param_string):
srun_string = ''
srun_string += 'export OMP_NUM_THREADS=' + str(current_test.n_omp) + '\n'
# number of logical cores per MPI process
if architecture == 'cpu':
cflag_value = max(1, int(32/current_test.n_mpi_per_node) * 2) # Follow NERSC directives
elif architecture == 'knl':
cflag_value = max(1, int(64/current_test.n_mpi_per_node) * 4) # Follow NERSC directives
output_filename = 'out_' + '_'.join([current_test.input_file, str(n_node), str(current_test.n_mpi_per_node), str(current_test.n_omp), str(count)]) + '.txt'
srun_string += 'srun --cpu_bind=cores '+ \
' -n ' + str(n_node*current_test.n_mpi_per_node) + \
' -c ' + str(cflag_value) + \
' ./' + bin_name + \
' ' + current_test.input_file + \
runtime_param_string + \
' > ' + output_filename + '\n'
return srun_string
def get_test_list(n_repeat):
test_list_unq = []
# n_node is kept to None and passed in functions as an external argument
# That way, several test_element_instance run with the same n_node on the same batch job
test_list_unq.append( test_element(input_file='automated_test_1_uniform_rest_32ppc',
n_mpi_per_node=8,
n_omp=8,
n_cell=[128, 128, 128],
max_grid_size=64,
blocking_factor=32,
n_step=10) )
test_list_unq.append( test_element(input_file='automated_test_2_uniform_rest_1ppc',
n_mpi_per_node=8,
n_omp=8,
n_cell=[256, 256, 512],
max_grid_size=64,
blocking_factor=32,
n_step=10) )
test_list_unq.append( test_element(input_file='automated_test_3_uniform_drift_4ppc',
n_mpi_per_node=8,
n_omp=8,
n_cell=[128, 128, 128],
max_grid_size=64,
blocking_factor=32,
n_step=10) )
test_list_unq.append( test_element(input_file='automated_test_4_labdiags_2ppc',
n_mpi_per_node=8,
n_omp=8,
n_cell=[64, 64, 128],
max_grid_size=64,
blocking_factor=32,
n_step=50) )
test_list_unq.append( test_element(input_file='automated_test_5_loadimbalance',
n_mpi_per_node=8,
n_omp=8,
n_cell=[128, 128, 128],
max_grid_size=64,
blocking_factor=32,
n_step=10) )
test_list_unq.append( test_element(input_file='automated_test_6_output_2ppc',
n_mpi_per_node=8,
n_omp=8,
n_cell=[128, 256, 256],
max_grid_size=64,
blocking_factor=32,
n_step=1) )
test_list = [copy.deepcopy(item) for item in test_list_unq for _ in range(n_repeat) ]
return test_list
|