aboutsummaryrefslogtreecommitdiff
path: root/Tools/performance_tests
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/performance_tests')
-rw-r--r--Tools/performance_tests/GNUmakefile_perftest16
-rw-r--r--Tools/performance_tests/automated_test_1_uniform_rest_32ppc1
-rw-r--r--Tools/performance_tests/automated_test_2_uniform_rest_1ppc1
-rw-r--r--Tools/performance_tests/automated_test_3_uniform_drift_4ppc1
-rw-r--r--Tools/performance_tests/automated_test_5_loadimbalance1
-rw-r--r--Tools/performance_tests/automated_test_6_output_2ppc1
-rw-r--r--Tools/performance_tests/cori.py159
-rw-r--r--Tools/performance_tests/functions_perftest.py99
-rw-r--r--Tools/performance_tests/run_automated.py270
-rw-r--r--Tools/performance_tests/summit.py135
10 files changed, 459 insertions, 225 deletions
diff --git a/Tools/performance_tests/GNUmakefile_perftest b/Tools/performance_tests/GNUmakefile_perftest
deleted file mode 100644
index 38275332d..000000000
--- a/Tools/performance_tests/GNUmakefile_perftest
+++ /dev/null
@@ -1,16 +0,0 @@
-WARPX_HOME := ../..
-AMREX_HOME ?= $(WARPX_HOME)/../amrex
-PICSAR_HOME ?= $(WARPX_HOME)/../picsar
-OPENBC_HOME ?= $(WARPX_HOME)/../openbc_poisson
-DEBUG = FALSE
-DIM = 3
-COMP=intel
-TINY_PROFILE = TRUE
-USE_OMP = TRUE
-USE_CUDA = FALSE
-USE_ACC = FALSE
-USE_SENSEI_INSITU = FALSE
-EBASE = perf_tests
-USE_PYTHON_MAIN = FALSE
-WarpxBinDir = Bin
-include $(WARPX_HOME)/Source/Make.WarpX
diff --git a/Tools/performance_tests/automated_test_1_uniform_rest_32ppc b/Tools/performance_tests/automated_test_1_uniform_rest_32ppc
index 55c1a6061..500e10859 100644
--- a/Tools/performance_tests/automated_test_1_uniform_rest_32ppc
+++ b/Tools/performance_tests/automated_test_1_uniform_rest_32ppc
@@ -22,6 +22,7 @@ warpx.verbose = 1
interpolation.nox = 3
interpolation.noy = 3
interpolation.noz = 3
+warpx.do_pml = 0
# CFL
warpx.cfl = 1.0
diff --git a/Tools/performance_tests/automated_test_2_uniform_rest_1ppc b/Tools/performance_tests/automated_test_2_uniform_rest_1ppc
index 8e17042c9..2af282db8 100644
--- a/Tools/performance_tests/automated_test_2_uniform_rest_1ppc
+++ b/Tools/performance_tests/automated_test_2_uniform_rest_1ppc
@@ -22,6 +22,7 @@ warpx.verbose = 1
interpolation.nox = 3
interpolation.noy = 3
interpolation.noz = 3
+warpx.do_pml = 1
# CFL
warpx.cfl = 1.0
diff --git a/Tools/performance_tests/automated_test_3_uniform_drift_4ppc b/Tools/performance_tests/automated_test_3_uniform_drift_4ppc
index 13af8aaff..93d224061 100644
--- a/Tools/performance_tests/automated_test_3_uniform_drift_4ppc
+++ b/Tools/performance_tests/automated_test_3_uniform_drift_4ppc
@@ -23,6 +23,7 @@ warpx.verbose = 1
interpolation.nox = 3
interpolation.noy = 3
interpolation.noz = 3
+warpx.do_pml = 0
# CFL
warpx.cfl = 1.0
diff --git a/Tools/performance_tests/automated_test_5_loadimbalance b/Tools/performance_tests/automated_test_5_loadimbalance
index 22c9ec4b6..d2e03372b 100644
--- a/Tools/performance_tests/automated_test_5_loadimbalance
+++ b/Tools/performance_tests/automated_test_5_loadimbalance
@@ -21,6 +21,7 @@ warpx.load_balance_int = 5
interpolation.nox = 3
interpolation.noy = 3
interpolation.noz = 3
+warpx.do_pml = 0
# CFL
warpx.cfl = 1.0
diff --git a/Tools/performance_tests/automated_test_6_output_2ppc b/Tools/performance_tests/automated_test_6_output_2ppc
index f4498c410..9e8a839cc 100644
--- a/Tools/performance_tests/automated_test_6_output_2ppc
+++ b/Tools/performance_tests/automated_test_6_output_2ppc
@@ -22,6 +22,7 @@ warpx.verbose = 1
interpolation.nox = 3
interpolation.noy = 3
interpolation.noz = 3
+warpx.do_pml = 0
# CFL
warpx.cfl = 1.0
diff --git a/Tools/performance_tests/cori.py b/Tools/performance_tests/cori.py
new file mode 100644
index 000000000..dbe3a1e2a
--- /dev/null
+++ b/Tools/performance_tests/cori.py
@@ -0,0 +1,159 @@
+import os, copy
+
+from functions_perftest import test_element
+
+module_name = {'cpu': 'haswell.', 'knl': 'mic-knl.', 'gpu':'.'}
+
+def executable_name(compiler, architecture):
+ return 'perf_tests3d.' + compiler + \
+ '.' + module_name[architecture] + 'TPROF.MPI.OMP.ex'
+
+def get_config_command(compiler, architecture):
+ config_command = ''
+ config_command += 'module unload darshan;'
+ if architecture == 'knl':
+ if compiler == 'intel':
+ config_command += 'module unload PrgEnv-gnu;'
+ config_command += 'module load PrgEnv-intel;'
+ elif compiler == 'gnu':
+ config_command += 'module unload PrgEnv-intel;'
+ config_command += 'module load PrgEnv-gnu;'
+ config_command += 'module unload craype-haswell;'
+ config_command += 'module load craype-mic-knl;'
+ elif architecture == 'cpu':
+ if compiler == 'intel':
+ config_command += 'module unload PrgEnv-gnu;'
+ config_command += 'module load PrgEnv-intel;'
+ elif compiler == 'gnu':
+ config_command += 'module unload PrgEnv-intel;'
+ config_command += 'module load PrgEnv-gnu;'
+ config_command += 'module unload craype-mic-knl;'
+ config_command += 'module load craype-haswell;'
+ return config_command
+
+# This function runs a batch script with
+# dependencies to perform the analysis
+# after all performance tests are done.
+def process_analysis(automated, cwd, compiler, architecture, n_node_list, start_date):
+ dependencies = ''
+ f_log = open(cwd + 'log_jobids_tmp.txt' ,'r')
+ for line in f_log.readlines():
+ dependencies += line.split()[3] + ':'
+
+ batch_string = '''#!/bin/bash
+#SBATCH --job-name=warpx_1node_read
+#SBATCH --time=00:07:00
+#SBATCH -C knl
+#SBATCH -N 1
+#SBATCH -S 4
+#SBATCH -q regular
+#SBATCH -e read_error.txt
+#SBATCH -o read_output.txt
+#SBATCH --mail-type=end
+#SBATCH --account=m2852
+module load h5py-parallel
+'''
+ batch_string += 'python run_automated.py --compiler=' + \
+ compiler + ' --architecture=' + architecture + \
+ ' --mode=read' + \
+ ' --n_node_list=' + '"' + n_node_list + '"' + \
+ ' --start_date=' + start_date
+ if automated == True:
+ batch_string += ' --automated'
+ batch_string += '\n'
+ batch_file = 'slurm_perfread'
+ f_exe = open(batch_file,'w')
+ f_exe.write(batch_string)
+ f_exe.close()
+ os.system('chmod 700 ' + batch_file)
+ print( 'process_analysis line: ' + 'sbatch --dependency afterok:' + dependencies[0:-1] + ' ' + batch_file)
+ os.system('sbatch --dependency afterok:' + dependencies[0:-1] + ' ' + batch_file)
+
+# Calculate simulation time. Take 5 min + 5 min / simulation
+def time_min(nb_simulations):
+ return 5. + nb_simulations*5.
+
+def get_submit_job_command():
+ return ' sbatch '
+
+def get_batch_string(test_list, job_time_min, Cname, n_node):
+
+ job_time_str = str(int(job_time_min/60)) + ':' + str(int(job_time_min%60)) + ':00'
+
+ batch_string = ''
+ batch_string += '#!/bin/bash\n'
+ batch_string += '#SBATCH --job-name=' + test_list[0].input_file + '\n'
+ batch_string += '#SBATCH --time=' + job_time_str + '\n'
+ batch_string += '#SBATCH -C ' + Cname + '\n'
+ batch_string += '#SBATCH -N ' + str(n_node) + '\n'
+ batch_string += '#SBATCH -q regular\n'
+ batch_string += '#SBATCH -e error.txt\n'
+ batch_string += '#SBATCH --account=m2852\n'
+ return batch_string
+
+def get_run_string(current_test, architecture, n_node, count, bin_name, runtime_param_string):
+ srun_string = ''
+ srun_string += 'export OMP_NUM_THREADS=' + str(current_test.n_omp) + '\n'
+ # number of logical cores per MPI process
+ if architecture == 'cpu':
+ cflag_value = max(1, int(32/current_test.n_mpi_per_node) * 2) # Follow NERSC directives
+ elif architecture == 'knl':
+ cflag_value = max(1, int(64/current_test.n_mpi_per_node) * 4) # Follow NERSC directives
+ output_filename = 'out_' + '_'.join([current_test.input_file, str(n_node), str(current_test.n_mpi_per_node), str(current_test.n_omp), str(count)]) + '.txt'
+ srun_string += 'srun --cpu_bind=cores '+ \
+ ' -n ' + str(n_node*current_test.n_mpi_per_node) + \
+ ' -c ' + str(cflag_value) + \
+ ' ./' + bin_name + \
+ ' ' + current_test.input_file + \
+ runtime_param_string + \
+ ' > ' + output_filename + '\n'
+ return srun_string
+
+def get_test_list(n_repeat):
+ test_list_unq = []
+ # n_node is kept to None and passed in functions as an external argument
+ # That way, several test_element_instance run with the same n_node on the same batch job
+ test_list_unq.append( test_element(input_file='automated_test_1_uniform_rest_32ppc',
+ n_mpi_per_node=8,
+ n_omp=8,
+ n_cell=[128, 128, 128],
+ max_grid_size=64,
+ blocking_factor=32,
+ n_step=10) )
+ test_list_unq.append( test_element(input_file='automated_test_2_uniform_rest_1ppc',
+ n_mpi_per_node=8,
+ n_omp=8,
+ n_cell=[256, 256, 512],
+ max_grid_size=64,
+ blocking_factor=32,
+ n_step=10) )
+ test_list_unq.append( test_element(input_file='automated_test_3_uniform_drift_4ppc',
+ n_mpi_per_node=8,
+ n_omp=8,
+ n_cell=[128, 128, 128],
+ max_grid_size=64,
+ blocking_factor=32,
+ n_step=10) )
+ test_list_unq.append( test_element(input_file='automated_test_4_labdiags_2ppc',
+ n_mpi_per_node=8,
+ n_omp=8,
+ n_cell=[64, 64, 128],
+ max_grid_size=64,
+ blocking_factor=32,
+ n_step=50) )
+ test_list_unq.append( test_element(input_file='automated_test_5_loadimbalance',
+ n_mpi_per_node=8,
+ n_omp=8,
+ n_cell=[128, 128, 128],
+ max_grid_size=64,
+ blocking_factor=32,
+ n_step=10) )
+ test_list_unq.append( test_element(input_file='automated_test_6_output_2ppc',
+ n_mpi_per_node=8,
+ n_omp=8,
+ n_cell=[128, 256, 256],
+ max_grid_size=64,
+ blocking_factor=32,
+ n_step=0) )
+ test_list = [copy.deepcopy(item) for item in test_list_unq for _ in range(n_repeat) ]
+ return test_list
diff --git a/Tools/performance_tests/functions_perftest.py b/Tools/performance_tests/functions_perftest.py
index 5e026bf12..67622317a 100644
--- a/Tools/performance_tests/functions_perftest.py
+++ b/Tools/performance_tests/functions_perftest.py
@@ -1,7 +1,32 @@
-import os, shutil, re
+import os, shutil, re, copy
import pandas as pd
import numpy as np
import git
+# import cori
+# import summit
+
+# Each instance of this class contains information for a single test.
+class test_element():
+ def __init__(self, input_file=None, n_node=None, n_mpi_per_node=None,
+ n_omp=None, n_cell=None, n_step=None, max_grid_size=None,
+ blocking_factor=None):
+ self.input_file = input_file
+ self.n_node = n_node
+ self.n_mpi_per_node = n_mpi_per_node
+ self.n_omp = n_omp
+ self.n_cell = n_cell
+ self.n_step = n_step
+ self.max_grid_size = max_grid_size
+ self.blocking_factor = blocking_factor
+
+ def scale_n_cell(self, n_node=0):
+ n_cell_scaled = copy.deepcopy(self.n_cell)
+ index_dim = 0
+ while n_node > 1:
+ n_cell_scaled[index_dim] *= 2
+ n_node /= 2
+ index_dim = (index_dim+1) % 3
+ self.n_cell = n_cell_scaled
def scale_n_cell(ncell, n_node):
ncell_scaled = ncell[:]
@@ -25,56 +50,6 @@ def get_file_content(filename=None):
file_handler.close()
return file_content
-def run_batch_nnode(test_list, res_dir, bin_name, config_command, architecture='knl', Cname='knl', n_node=1, runtime_param_list=[]):
- # Clean res_dir
- if os.path.exists(res_dir):
- shutil.rmtree(res_dir, ignore_errors=True)
- os.makedirs(res_dir)
- # Copy files to res_dir
- cwd = os.environ['AUTOMATED_PERF_TESTS'] + '/WarpX/Tools/performance_tests/'
- bin_dir = cwd + 'Bin/'
- shutil.copy(bin_dir + bin_name, res_dir)
- os.chdir(res_dir)
- # Calculate simulation time. Take 5 min + 2 min / simulation
- job_time_min = 5. + len(test_list)*5.
- job_time_str = str(int(job_time_min/60)) + ':' + str(int(job_time_min%60)) + ':00'
- batch_string = ''
- batch_string += '#!/bin/bash\n'
- batch_string += '#SBATCH --job-name=' + test_list[0].input_file + '\n'
- batch_string += '#SBATCH --time=' + job_time_str + '\n'
- batch_string += '#SBATCH -C ' + Cname + '\n'
- batch_string += '#SBATCH -N ' + str(n_node) + '\n'
- batch_string += '#SBATCH -q regular\n'
- batch_string += '#SBATCH -e error.txt\n'
- batch_string += '#SBATCH --account=m2852\n'
-
- for count, current_test in enumerate(test_list):
- shutil.copy(cwd + current_test.input_file, res_dir)
- srun_string = ''
- srun_string += 'export OMP_NUM_THREADS=' + str(current_test.n_omp) + '\n'
- # number of logical cores per MPI process
- if architecture == 'cpu':
- cflag_value = max(1, int(32/current_test.n_mpi_per_node) * 2) # Follow NERSC directives
- elif architecture == 'knl':
- cflag_value = max(1, int(64/current_test.n_mpi_per_node) * 4) # Follow NERSC directives
- output_filename = 'out_' + '_'.join([current_test.input_file, str(n_node), str(current_test.n_mpi_per_node), str(current_test.n_omp), str(count)]) + '.txt'
- srun_string += 'srun --cpu_bind=cores '+ \
- ' -n ' + str(n_node*current_test.n_mpi_per_node) + \
- ' -c ' + str(cflag_value) + \
- ' ./' + bin_name + \
- ' ' + current_test.input_file + \
- runtime_param_list[ count ] + \
- ' > ' + output_filename + '\n'
- batch_string += srun_string
- batch_string += 'rm -rf plotfiles ; rm -rf lab_frame_data\n'
- batch_file = 'slurm'
- f_exe = open(batch_file,'w')
- f_exe.write(batch_string)
- f_exe.close()
- os.system('chmod 700 ' + bin_name)
- os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt')
- return 0
-
def run_batch(run_name, res_dir, bin_name, config_command, architecture='knl',\
Cname='knl', n_node=1, n_mpi=1, n_omp=1):
# Clean res_dir
@@ -82,7 +57,6 @@ def run_batch(run_name, res_dir, bin_name, config_command, architecture='knl',\
shutil.rmtree(res_dir)
os.makedirs(res_dir)
# Copy files to res_dir
- # Copy files to res_dir
cwd = os.environ['WARPX'] + '/Tools/performance_tests/'
bin_dir = cwd + 'Bin/'
shutil.copy(bin_dir + bin_name, res_dir)
@@ -119,6 +93,27 @@ def run_batch(run_name, res_dir, bin_name, config_command, architecture='knl',\
os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt')
return 0
+def run_batch_nnode(test_list, res_dir, bin_name, config_command, batch_string, submit_job_command):
+ # Clean res_dir
+ if os.path.exists(res_dir):
+ shutil.rmtree(res_dir, ignore_errors=True)
+ os.makedirs(res_dir)
+ # Copy files to res_dir
+ cwd = os.environ['AUTOMATED_PERF_TESTS'] + '/warpx/Tools/performance_tests/'
+ bin_dir = cwd + 'Bin/'
+ shutil.copy(bin_dir + bin_name, res_dir)
+ os.chdir(res_dir)
+
+ for count, current_test in enumerate(test_list):
+ shutil.copy(cwd + current_test.input_file, res_dir)
+ batch_file = 'batch_script.sh'
+ f_exe = open(batch_file,'w')
+ f_exe.write(batch_string)
+ f_exe.close()
+ os.system('chmod 700 ' + bin_name)
+ os.system(config_command + submit_job_command + batch_file +\
+ ' >> ' + cwd + 'log_jobids_tmp.txt')
+
# Read output file and return init time and 1-step time
def read_run_perf(filename, n_steps):
timing_list = []
diff --git a/Tools/performance_tests/run_automated.py b/Tools/performance_tests/run_automated.py
index 8f79750d4..fd771faac 100644
--- a/Tools/performance_tests/run_automated.py
+++ b/Tools/performance_tests/run_automated.py
@@ -1,15 +1,41 @@
-#!/usr/common/software/python/2.7-anaconda-4.4/bin/python
-
import os, sys, shutil, datetime, git
import argparse, re, time, copy
import pandas as pd
from functions_perftest import store_git_hash, get_file_content, \
- run_batch_nnode, extract_dataframe
+ run_batch_nnode, extract_dataframe
+
+# Get name of supercomputer and import configuration functions from
+# machine-specific file
+if os.getenv("LMOD_SYSTEM_NAME") == 'summit':
+ machine = 'summit'
+ from summit import executable_name, process_analysis, \
+ get_config_command, time_min, get_submit_job_command, \
+ get_batch_string, get_run_string, get_test_list
+if os.getenv("NERSC_HOST") == 'cori':
+ machine = 'cori'
+ from cori import executable_name, process_analysis, \
+ get_config_command, time_min, get_submit_job_command, \
+ get_batch_string, get_run_string, get_test_list
# typical use: python run_automated.py --n_node_list='1,8,16,32' --automated
# Assume warpx, picsar, amrex and perf_logs repos ar in the same directory and
# environment variable AUTOMATED_PERF_TESTS contains the path to this directory
+# requirements:
+# - python packages: gitpython and pandas
+# - AUTOMATED_PERF_TESTS: environment variables where warpx,
+# amrex and picsar are installed ($AUTOMATED_PERF_TESTS/warpx etc.)
+# - SCRATCH: environment variable where performance results are written.
+# This script will create folder $SCRATCH/performance_warpx/
+
+if "AUTOMATED_PERF_TESTS" not in os.environ:
+ raise ValueError("environment variable AUTOMATED_PERF_TESTS is not defined.\n"
+ "It should contain the path to the directory where WarpX, "
+ "AMReX and PICSAR repos are.")
+if "SCRATCH" not in os.environ:
+ raise ValueError("environment variable SCRATCH is not defined.\n"
+ "This script will create $SCRATCH/performance_warpx/ "
+ "to store performance results.")
# Handle parser
###############
parser = argparse.ArgumentParser( description='Run performance tests and write results in files' )
@@ -33,11 +59,11 @@ parser.add_argument('--n_node_list',
parser.add_argument('--start_date',
dest='start_date' )
parser.add_argument('--compiler',
- choices=['gnu', 'intel'],
+ choices=['gnu', 'intel', 'pgi'],
default='intel',
help='which compiler to use')
parser.add_argument('--architecture',
- choices=['cpu', 'knl'],
+ choices=['cpu', 'knl', 'gpu'],
default='knl',
help='which architecture to cross-compile for NERSC machines')
parser.add_argument('--mode',
@@ -48,6 +74,8 @@ args = parser.parse_args()
n_node_list_string = args.n_node_list.split(',')
n_node_list = [int(i) for i in n_node_list_string]
start_date = args.start_date
+compiler = args.compiler
+architecture = args.architecture
# Set behavior variables
########################
@@ -71,70 +99,21 @@ if args.automated == True:
push_on_perf_log_repo = False
pull_3_repos = True
recompile = True
-
-# Each instance of this class contains information for a single test.
-class test_element():
- def __init__(self, input_file=None, n_node=None, n_mpi_per_node=None,
- n_omp=None, n_cell=None, n_step=None):
- self.input_file = input_file
- self.n_node = n_node
- self.n_mpi_per_node = n_mpi_per_node
- self.n_omp = n_omp
- self.n_cell = n_cell
- self.n_step = n_step
-
- def scale_n_cell(self, n_node=0):
- n_cell_scaled = copy.deepcopy(self.n_cell)
- index_dim = 0
- while n_node > 1:
- n_cell_scaled[index_dim] *= 2
- n_node /= 2
- index_dim = (index_dim+1) % 3
- self.n_cell = n_cell_scaled
+ if machine == 'summit':
+ compiler = 'pgi'
+ architecture = 'gpu'
# List of tests to perform
# ------------------------
-test_list_unq = []
# Each test runs n_repeat times
n_repeat = 2
-# n_node is kept to None and passed in functions as an external argument
-# That way, several test_element_instance run with the same n_node on the same batch job
-test_list_unq.append( test_element(input_file='automated_test_1_uniform_rest_32ppc',
- n_mpi_per_node=8,
- n_omp=8,
- n_cell=[128, 128, 128],
- n_step=10) )
-test_list_unq.append( test_element(input_file='automated_test_2_uniform_rest_1ppc',
- n_mpi_per_node=8,
- n_omp=8,
- n_cell=[256, 256, 512],
- n_step=10) )
-test_list_unq.append( test_element(input_file='automated_test_3_uniform_drift_4ppc',
- n_mpi_per_node=8,
- n_omp=8,
- n_cell=[128, 128, 128],
- n_step=10) )
-test_list_unq.append( test_element(input_file='automated_test_4_labdiags_2ppc',
- n_mpi_per_node=8,
- n_omp=8,
- n_cell=[64, 64, 128],
- n_step=50) )
-test_list_unq.append( test_element(input_file='automated_test_5_loadimbalance',
- n_mpi_per_node=8,
- n_omp=8,
- n_cell=[128, 128, 128],
- n_step=10) )
-test_list_unq.append( test_element(input_file='automated_test_6_output_2ppc',
- n_mpi_per_node=8,
- n_omp=8,
- n_cell=[128, 256, 256],
- n_step=0) )
-test_list = [copy.deepcopy(item) for item in test_list_unq for _ in range(n_repeat) ]
+# test_list is machine-specific
+test_list = get_test_list(n_repeat)
# Define directories
# ------------------
source_dir_base = os.environ['AUTOMATED_PERF_TESTS']
-warpx_dir = source_dir_base + '/WarpX/'
+warpx_dir = source_dir_base + '/warpx/'
picsar_dir = source_dir_base + '/picsar/'
amrex_dir = source_dir_base + '/amrex/'
res_dir_base = os.environ['SCRATCH'] + '/performance_warpx/'
@@ -142,12 +121,13 @@ perf_logs_repo = source_dir_base + 'perf_logs/'
# Define dictionaries
# -------------------
-compiler_name = {'intel': 'intel', 'gnu': 'gcc'}
-module_name = {'cpu': 'haswell', 'knl': 'mic-knl'}
-module_Cname = {'cpu': 'haswell', 'knl': 'knl,quad,cache'}
+compiler_name = {'intel': 'intel', 'gnu': 'gcc', 'pgi':'pgi'}
+module_Cname = {'cpu': 'haswell', 'knl': 'knl,quad,cache', 'gpu':''}
+csv_file = {'cori':'cori_knl.csv', 'summit':'summit.csv'}
cwd = os.getcwd() + '/'
bin_dir = cwd + 'Bin/'
-bin_name = 'perf_tests3d.' + args.compiler + '.' + module_name[args.architecture] + '.TPROF.MPI.OMP.ex'
+bin_name = executable_name(compiler, architecture)
+
log_dir = cwd
perf_database_file = cwd + perf_database_file
day = time.strftime('%d')
@@ -159,27 +139,7 @@ year = time.strftime('%Y')
if args.mode == 'run':
start_date = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
# Set default options for compilation and execution
- config_command = ''
- config_command += 'module unload darshan;'
- config_command += 'module load craype-hugepages4M;'
- if args.architecture == 'knl':
- if args.compiler == 'intel':
- config_command += 'module unload PrgEnv-gnu;'
- config_command += 'module load PrgEnv-intel;'
- elif args.compiler == 'gnu':
- config_command += 'module unload PrgEnv-intel;'
- config_command += 'module load PrgEnv-gnu;'
- config_command += 'module unload craype-haswell;'
- config_command += 'module load craype-mic-knl;'
- elif args.architecture == 'cpu':
- if args.compiler == 'intel':
- config_command += 'module unload PrgEnv-gnu;'
- config_command += 'module load PrgEnv-intel;'
- elif args.compiler == 'gnu':
- config_command += 'module unload PrgEnv-intel;'
- config_command += 'module load PrgEnv-gnu;'
- config_command += 'module unload craype-mic-knl;'
- config_command += 'module load craype-haswell;'
+ config_command = get_config_command(compiler, architecture)
# Create main result directory if does not exist
if not os.path.exists(res_dir_base):
os.mkdir(res_dir_base)
@@ -194,56 +154,31 @@ if args.mode == 'run':
git_repo.pull()
git_repo = git.cmd.Git( warpx_dir )
git_repo.pull()
- with open(cwd + 'GNUmakefile_perftest') as makefile_handler:
- makefile_text = makefile_handler.read()
- makefile_text = re.sub('\nCOMP.*', '\nCOMP=%s' %compiler_name[args.compiler], makefile_text)
- with open(cwd + 'GNUmakefile_perftest', 'w') as makefile_handler:
- makefile_handler.write( makefile_text )
- os.system(config_command + " make -f GNUmakefile_perftest realclean ; " + " rm -r tmp_build_dir *.mod; make -j 8 -f GNUmakefile_perftest")
+
+ # Copy WarpX/GNUmakefile to current directory and recompile
+ # with specific options for automated performance tests.
+ # This way, performance test compilation does not mess with user's
+ # compilation
+ shutil.copyfile("../../GNUmakefile","./GNUmakefile")
+ make_realclean_command = " make realclean WARPX_HOME=../.. " \
+ "AMREX_HOME=../../../amrex/ PICSAR_HOME=../../../picsar/ " \
+ "EBASE=perf_tests COMP=%s" %compiler_name[compiler] + ";"
+ make_command = "make -j 16 WARPX_HOME=../.. " \
+ "AMREX_HOME=../../../amrex/ PICSAR_HOME=../../../picsar/ " \
+ "EBASE=perf_tests COMP=%s" %compiler_name[compiler]
+ if machine == 'summit':
+ make_command += ' USE_GPU=TRUE '
+ os.system(config_command + make_realclean_command + \
+ "rm -r tmp_build_dir *.mod; " + make_command )
+
+ # Store git hashes for WarpX, AMReX and PICSAR into file, so that
+ # they can be read when running the analysis.
if os.path.exists( cwd + 'store_git_hashes.txt' ):
os.remove( cwd + 'store_git_hashes.txt' )
store_git_hash(repo_path=picsar_dir, filename=cwd + 'store_git_hashes.txt', name='picsar')
store_git_hash(repo_path=amrex_dir , filename=cwd + 'store_git_hashes.txt', name='amrex' )
store_git_hash(repo_path=warpx_dir , filename=cwd + 'store_git_hashes.txt', name='warpx' )
-# This function runs a batch script with
-# dependencies to perform the analysis
-# after all performance tests are done.
-def process_analysis():
- dependencies = ''
- f_log = open(cwd + 'log_jobids_tmp.txt' ,'r')
- for line in f_log.readlines():
- dependencies += line.split()[3] + ':'
- batch_string = ''
- batch_string += '#!/bin/bash\n'
- batch_string += '#SBATCH --job-name=warpx_1node_read\n'
- batch_string += '#SBATCH --time=00:07:00\n'
- batch_string += '#SBATCH -C knl\n'
- batch_string += '#SBATCH -N 1\n'
- batch_string += '#SBATCH -S 4\n'
- batch_string += '#SBATCH -q regular\n'
- batch_string += '#SBATCH -e read_error.txt\n'
- batch_string += '#SBATCH -o read_output.txt\n'
- batch_string += '#SBATCH --mail-type=end\n'
- batch_string += '#SBATCH --account=m2852\n'
- batch_string += 'module load h5py-parallel\n'
- batch_string += 'python ' + __file__ + ' --compiler=' + \
- args.compiler + ' --architecture=' + args.architecture + \
- ' --mode=read' + \
- ' --n_node_list=' + '"' + args.n_node_list + '"' + \
- ' --start_date=' + start_date
- if args.automated == True:
- batch_string += ' --automated'
- batch_string += '\n'
- batch_file = 'slurm_perfread'
- f_exe = open(batch_file,'w')
- f_exe.write(batch_string)
- f_exe.close()
- os.system('chmod 700 ' + batch_file)
- print( 'process_analysis line: ' + 'sbatch --dependency afterok:' + dependencies[0:-1] + ' ' + batch_file)
- os.system('sbatch --dependency afterok:' + dependencies[0:-1] + ' ' + batch_file)
- return 0
-
# Loop over the tests and run all simulations:
# One batch job submitted per n_node. Several
# tests run within the same batch job.
@@ -254,24 +189,35 @@ if args.mode == 'run':
# loop on n_node. One batch script per n_node
for n_node in n_node_list:
res_dir = res_dir_base
- res_dir += '_'.join([run_name, args.compiler, args.architecture, str(n_node)]) + '/'
+ res_dir += '_'.join([run_name, compiler, architecture, str(n_node)]) + '/'
runtime_param_list = []
# Deep copy as we change the attribute n_cell of
# each instance of class test_element
test_list_n_node = copy.deepcopy(test_list)
+ job_time_min = time_min(len(test_list))
+ batch_string = get_batch_string(test_list_n_node, job_time_min, module_Cname[architecture], n_node)
# Loop on tests
- for current_run in test_list_n_node:
+ for count, current_run in enumerate(test_list_n_node):
current_run.scale_n_cell(n_node)
runtime_param_string = ' amr.n_cell=' + ' '.join(str(i) for i in current_run.n_cell)
+ runtime_param_string += ' amr.max_grid_size=' + str(current_run.max_grid_size)
+ runtime_param_string += ' amr.blocking_factor=' + str(current_run.blocking_factor)
runtime_param_string += ' max_step=' + str( current_run.n_step )
- runtime_param_list.append( runtime_param_string )
+ # runtime_param_list.append( runtime_param_string )
+ run_string = get_run_string(current_run, architecture, n_node, count, bin_name, runtime_param_string)
+ batch_string += run_string
+ batch_string += 'rm -rf plotfiles lab_frame_data diags\n'
+
+ submit_job_command = get_submit_job_command()
# Run the simulations.
- run_batch_nnode(test_list_n_node, res_dir, bin_name, config_command,\
- architecture=args.architecture, Cname=module_Cname[args.architecture], \
- n_node=n_node, runtime_param_list=runtime_param_list)
+ run_batch_nnode(test_list_n_node, res_dir, bin_name, config_command, batch_string, submit_job_command)
os.chdir(cwd)
# submit batch for analysis
- process_analysis()
+ if os.path.exists( 'read_error.txt' ):
+ os.remove( 'read_error.txt' )
+ if os.path.exists( 'read_output.txt' ):
+ os.remove( 'read_output.txt' )
+ process_analysis(args.automated, cwd, compiler, architecture, args.n_node_list, start_date)
# read the output file from each test and store timers in
# hdf5 file with pandas format
@@ -279,10 +225,10 @@ if args.mode == 'run':
for n_node in n_node_list:
print(n_node)
if browse_output_files:
+ res_dir = res_dir_base
+ res_dir += '_'.join([run_name, compiler,\
+ architecture, str(n_node)]) + '/'
for count, current_run in enumerate(test_list):
- res_dir = res_dir_base
- res_dir += '_'.join([run_name, args.compiler,\
- args.architecture, str(n_node)]) + '/'
# Read performance data from the output file
output_filename = 'out_' + '_'.join([current_run.input_file, str(n_node), str(current_run.n_mpi_per_node), str(current_run.n_omp), str(count)]) + '.txt'
# Read data for all test to put in hdf5 a database
@@ -305,8 +251,8 @@ for n_node in n_node_list:
# Load file perf_database_file if exists, and
# append with results from this scan
if os.path.exists(perf_database_file):
- df_base = pd.read_hdf(perf_database_file, 'all_data', format='table')
- # df_base = pd.read_hdf(perf_database_file, 'all_data')
+ # df_base = pd.read_hdf(perf_database_file, 'all_data', format='table')
+ df_base = pd.read_hdf(perf_database_file, 'all_data')
updated_df = df_base.append(df_newline, ignore_index=True)
else:
updated_df = df_newline
@@ -314,19 +260,6 @@ for n_node in n_node_list:
# (overwrite if file exists)
updated_df.to_hdf(perf_database_file, key='all_data', mode='w')
- # Rename directory with precise date+hour for archive purpose
- if rename_archive == True:
- loc_counter = 0
- res_dir_arch = res_dir_base
- res_dir_arch += '_'.join([year, month, day, run_name, args.compiler,\
- args.architecture, str(n_node), str(loc_counter)]) + '/'
- while os.path.exists( res_dir_arch ):
- loc_counter += 1
- res_dir_arch = res_dir_base
- res_dir_arch += '_'.join([year, month, day, run_name, args.compiler,\
- args.architecture, str(n_node), str(loc_counter)]) + '/'
- os.rename( res_dir, res_dir_arch )
-
# Extract sub-set of pandas data frame, write it to
# csv file and copy this file to perf_logs repo
# -------------------------------------------------
@@ -342,19 +275,42 @@ if write_csv:
df_small[ df_small['input_file']=='automated_test_6_output_2ppc' ]['time_WritePlotFile']
df_small = df_small.loc[:, ['date', 'input_file', 'git_hashes', 'n_node', 'n_mpi_per_node', 'n_omp', 'rep', 'start_date', 'time_initialization', 'step_time'] ]
# Write to csv
- df_small.to_csv( 'cori_knl.csv' )
+ df_small.to_csv( csv_file[machine] )
# Errors may occur depending on the version of pandas. I had errors with v0.21.0 solved with 0.23.0
# Second, move files to perf_logs repo
if update_perf_log_repo:
+ # get perf_logs repo
git_repo = git.Repo( perf_logs_repo )
if push_on_perf_log_repo:
git_repo.git.stash('save')
git_repo.git.pull()
- shutil.move( 'cori_knl.csv', perf_logs_repo + '/logs_csv/cori_knl.csv' )
+ # move csv file to perf_logs repon and commit the new version
+ shutil.move( csv_file[machine], perf_logs_repo + '/logs_csv/' + csv_file[machine] )
os.chdir( perf_logs_repo )
sys.path.append('./')
import generate_index_html
git_repo.git.add('./index.html')
- git_repo.git.add('./logs_csv/cori_knl.csv')
+ git_repo.git.add('./logs_csv/' + csv_file[machine])
index = git_repo.index
index.commit("automated tests")
+
+# Rename all result directories for archiving purposes:
+# include date in the name, and a counter to avoid over-writing
+for n_node in n_node_list:
+ if browse_output_files:
+ res_dir = res_dir_base
+ res_dir += '_'.join([run_name, compiler,\
+ architecture, str(n_node)]) + '/'
+ # Rename directory with precise date+hour for archive purpose
+ if rename_archive == True:
+ loc_counter = 0
+ res_dir_arch = res_dir_base
+ res_dir_arch += '_'.join([year, month, day, run_name, compiler,\
+ architecture, str(n_node), str(loc_counter)]) + '/'
+ while os.path.exists( res_dir_arch ):
+ loc_counter += 1
+ res_dir_arch = res_dir_base
+ res_dir_arch += '_'.join([year, month, day, run_name, compiler,\
+ architecture, str(n_node), str(loc_counter)]) + '/'
+ print("renaming " + res_dir + " -> " + res_dir_arch)
+ os.rename( res_dir, res_dir_arch )
diff --git a/Tools/performance_tests/summit.py b/Tools/performance_tests/summit.py
new file mode 100644
index 000000000..69598f1fd
--- /dev/null
+++ b/Tools/performance_tests/summit.py
@@ -0,0 +1,135 @@
+# requirements:
+# - module load python/3.7.0-anaconda3-5.3.0
+
+import os, copy
+from functions_perftest import test_element
+
+def executable_name(compiler,architecture):
+ return 'perf_tests3d.' + compiler + '.TPROF.MPI.ACC.CUDA.ex'
+
+def get_config_command(compiler, architecture):
+ config_command = ''
+ config_command += 'module load pgi;'
+ config_command += 'module load cuda;'
+ return config_command
+
+# This function runs a batch script with
+# dependencies to perform the analysis
+# after all performance tests are done.
+def process_analysis(automated, cwd, compiler, architecture, n_node_list, start_date):
+
+ batch_string = '''#!/bin/bash
+#BSUB -P APH114
+#BSUB -W 00:10
+#BSUB -nnodes 1
+#BSUB -J perf_test
+#BSUB -o read_output.txt
+#BSUB -e read_error.txt
+'''
+ f_log = open(cwd + 'log_jobids_tmp.txt' ,'r')
+ for line in f_log.readlines():
+ dependency = line.split()[1][1:-1]
+ batch_string += '#BSUB -w ended(' + dependency + ')\n'
+
+ batch_string += 'python run_automated.py --compiler=' + \
+ compiler + ' --architecture=' + architecture + \
+ ' --mode=read' + \
+ ' --n_node_list=' + '"' + n_node_list + '"' + \
+ ' --start_date=' + start_date
+ if automated == True:
+ batch_string += ' --automated'
+ batch_string += '\n'
+ batch_file = 'bsub_perfread'
+ f_exe = open(batch_file,'w')
+ f_exe.write(batch_string)
+ f_exe.close()
+ os.system('chmod 700 ' + batch_file)
+
+ print( 'process_analysis line: ' + 'bsub ' + batch_file)
+ os.system('bsub ' + batch_file)
+
+# Calculate simulation time. Take 2 min + 2 min / simulation
+def time_min(nb_simulations):
+ return 2. + nb_simulations*2.
+
+def get_submit_job_command():
+ return ' bsub '
+
+def get_batch_string(test_list, job_time_min, Cname, n_node):
+
+ job_time_str = str(int(job_time_min/60)) + ':' + str(int(job_time_min%60))
+
+ batch_string = ''
+ batch_string += '#!/bin/bash\n'
+ batch_string += '#BSUB -P APH114\n'
+ batch_string += '#BSUB -W ' + job_time_str + '\n'
+ batch_string += '#BSUB -nnodes ' + str(n_node) + '\n'
+ batch_string += '#BSUB -J ' + test_list[0].input_file + '\n'
+ batch_string += '#BSUB -e error.txt\n'
+ batch_string += 'module load pgi\n'
+ batch_string += 'module load cuda\n'
+ return batch_string
+
+def get_run_string(current_test, architecture, n_node, count, bin_name, runtime_param_string):
+
+ output_filename = 'out_' + '_'.join([current_test.input_file, str(n_node), str(current_test.n_mpi_per_node), str(current_test.n_omp), str(count)]) + '.txt'
+
+ ngpu = str(current_test.n_mpi_per_node)
+ srun_string = ''
+ srun_string += 'jsrun '
+ srun_string += ' -n ' + str(n_node)
+ srun_string += ' -a ' + ngpu + ' -g ' + ngpu + ' -c ' + ngpu + ' --bind=packed:1 '
+ srun_string += ' ./' + bin_name + ' '
+ srun_string += current_test.input_file + ' '
+ srun_string += runtime_param_string
+ srun_string += ' > ' + output_filename + '\n'
+ return srun_string
+
+def get_test_list(n_repeat):
+ test_list_unq = []
+ # n_node is kept to None and passed in functions as an external argument
+ # That way, several test_element_instance run with the same n_node on the same batch job
+ test_list_unq.append( test_element(input_file='automated_test_1_uniform_rest_32ppc',
+ n_mpi_per_node=6,
+ n_omp=1,
+ n_cell=[128, 128, 192],
+ max_grid_size=256,
+ blocking_factor=32,
+ n_step=10) )
+ test_list_unq.append( test_element(input_file='automated_test_2_uniform_rest_1ppc',
+ n_mpi_per_node=6,
+ n_omp=1,
+ n_cell=[256, 512, 768],
+ max_grid_size=512,
+ blocking_factor=256,
+ n_step=10) )
+ test_list_unq.append( test_element(input_file='automated_test_3_uniform_drift_4ppc',
+ n_mpi_per_node=6,
+ n_omp=1,
+ n_cell=[128, 128, 384],
+ max_grid_size=256,
+ blocking_factor=64,
+ n_step=10) )
+ test_list_unq.append( test_element(input_file='automated_test_4_labdiags_2ppc',
+ n_mpi_per_node=6,
+ n_omp=1,
+ n_cell=[384, 512, 512],
+ max_grid_size=256,
+ blocking_factor=128,
+ n_step=50) )
+ test_list_unq.append( test_element(input_file='automated_test_5_loadimbalance',
+ n_mpi_per_node=6,
+ n_omp=1,
+ n_cell=[64, 128, 192],
+ max_grid_size=64,
+ blocking_factor=32,
+ n_step=10) )
+ test_list_unq.append( test_element(input_file='automated_test_6_output_2ppc',
+ n_mpi_per_node=6,
+ n_omp=1,
+ n_cell=[384, 256, 512],
+ max_grid_size=256,
+ blocking_factor=64,
+ n_step=0) )
+ test_list = [copy.deepcopy(item) for item in test_list_unq for _ in range(n_repeat) ]
+ return test_list