# Copyright 2018-2019 Axel Huebl, Luca Fedeli, Maxence Thevenet
# Remi Lehe
#
# This file is part of WarpX.
#
# License: BSD-3-Clause-LBNL

import os, shutil, re, copy
import pandas as pd
import numpy as np
import git
# import cori
# import summit

# Each instance of this class contains information for a single test.
class test_element():
    def __init__(self, input_file=None, n_node=None, n_mpi_per_node=None,
                 n_omp=None, n_cell=None, n_step=None, max_grid_size=None,
                 blocking_factor=None):
        self.input_file = input_file
        self.n_node = n_node
        self.n_mpi_per_node = n_mpi_per_node
        self.n_omp = n_omp
        self.n_cell = n_cell
        self.n_step = n_step
        self.max_grid_size = max_grid_size
        self.blocking_factor = blocking_factor

    def scale_n_cell(self, n_node=0):
        n_cell_scaled = copy.deepcopy(self.n_cell)
        index_dim = 0
        while n_node > 1:
            n_cell_scaled[index_dim] *= 2
            n_node /= 2
            index_dim = (index_dim+1) % 3
        self.n_cell = n_cell_scaled

def scale_n_cell(ncell, n_node):
     ncell_scaled = ncell[:]
     index_dim = 0
     while n_node > 1:
         ncell_scaled[index_dim] *= 2
         n_node /= 2
         index_dim = (index_dim+1) % 3
     return ncell_scaled

def store_git_hash(repo_path=None, filename=None, name=None):
    repo = git.Repo(path=repo_path)
    sha = repo.head.object.hexsha
    file_handler = open( filename, 'a+' )
    file_handler.write( name + ':' + sha + ' ')
    file_handler.close()

def get_file_content(filename=None):
    file_handler = open( filename, 'r' )
    file_content = file_handler.read()
    file_handler.close()
    return file_content

def run_batch(run_name, res_dir, bin_name, config_command, architecture='knl',\
              Cname='knl', n_node=1, n_mpi=1, n_omp=1):
    # Clean res_dir
    if os.path.exists(res_dir):
        shutil.rmtree(res_dir)
    os.makedirs(res_dir)
    # Copy files to res_dir
    cwd = os.environ['WARPX'] + '/Tools/PerformanceTests/'
    bin_dir = cwd + 'Bin/'
    shutil.copy(bin_dir + bin_name, res_dir)
    shutil.copyfile(cwd + run_name, res_dir + 'inputs')
    os.chdir(res_dir)
    batch_string = ''
    batch_string += '#!/bin/bash\n'
    batch_string += '#SBATCH --job-name=' + run_name + str(n_node) + str(n_mpi) + str(n_omp) + '\n'
    batch_string += '#SBATCH --time=00:23:00\n'
    batch_string += '#SBATCH -C ' + Cname + '\n'
    batch_string += '#SBATCH -N ' + str(n_node) + '\n'
    batch_string += '#SBATCH -q regular\n'
    batch_string += '#SBATCH -e error.txt\n'
    batch_string += '#SBATCH --account=m2852\n'
    batch_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n'
    if architecture == 'cpu':
        cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives
        batch_string += 'srun --cpu_bind=cores '+ \
                    ' -n ' + str(n_node*n_mpi) + \
                    ' -c ' + str(cflag_value)   + \
                    ' ./'  + bin_name + ' inputs > perf_output.txt'
    elif architecture == 'knl':
        # number of logical cores per MPI process
        cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives
        batch_string += 'srun --cpu_bind=cores '     + \
                        ' -n ' + str(n_node*n_mpi) + \
                        ' -c ' + str(cflag_value)   + \
                        ' ./'  + bin_name + ' inputs > perf_output.txt\n'
    batch_file = 'slurm'
    f_exe = open(batch_file,'w')
    f_exe.write(batch_string)
    f_exe.close()
    os.system('chmod 700 ' + bin_name)
    os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt')
    return 0

def run_batch_nnode(test_list, res_dir, cwd, bin_name, config_command, batch_string, submit_job_command):
    # Clean res_dir
    if os.path.exists(res_dir):
         shutil.rmtree(res_dir, ignore_errors=True)
    os.makedirs(res_dir)
    # Copy files to res_dir
    bin_dir = cwd + 'Bin/'
    shutil.copy(bin_dir + bin_name, res_dir)
    os.chdir(res_dir)

    for count, current_test in enumerate(test_list):
        shutil.copy(cwd + current_test.input_file, res_dir)
    batch_file = 'batch_script.sh'
    f_exe = open(batch_file,'w')
    f_exe.write(batch_string)
    f_exe.close()
    os.system('chmod 700 ' + bin_name)
    os.system(config_command + submit_job_command + batch_file +\
                   ' >> ' + cwd + 'log_jobids_tmp.txt')

# Read output file and return init time and 1-step time
def read_run_perf(filename, n_steps):
    timing_list = []
    # Search inclusive time to get simulation step time
    partition_limit = 'NCalls  Incl. Min  Incl. Avg  Incl. Max   Max %'
    with open(filename) as file_handler:
        output_text = file_handler.read()
    # Get total simulation time
    line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text)
    total_time = float(line_match_totaltime.group(0).split()[8])
    search_area = output_text.partition(partition_limit)[2]
    line_match_looptime = re.search('\nWarpX::Evolve().*', search_area)
    time_wo_initialization = float(line_match_looptime.group(0).split()[3])
    timing_list += [str(total_time - time_wo_initialization)]
    timing_list += [str(time_wo_initialization/n_steps)]
    partition_limit1 = 'NCalls  Excl. Min  Excl. Avg  Excl. Max   Max %'
    partition_limit2 = 'NCalls  Incl. Min  Incl. Avg  Incl. Max   Max %'
    file_handler.close()
    with open(filename) as file_handler:
        output_text = file_handler.read()
    # Search EXCLISUSIVE routine timings
    search_area = output_text.partition(partition_limit1)[2].partition(partition_limit2)[0]
    pattern_list = ['\nParticleContainer::Redistribute().*',\
                    '\nFabArray::FillBoundary().*',\
                    '\nFabArray::ParallelCopy().*',\
                    '\nPPC::CurrentDeposition.*',\
                    '\nPPC::FieldGather.*',\
                    '\nPPC::ParticlePush.*',\
                    '\nPPC::Evolve::Copy.*',\
                    '\nWarpX::EvolveEM().*',\
                    'Checkpoint().*',\
                    'WriteParticles().*',\
                    '\nVisMF::Write(FabArray).*',\
                    '\nWriteMultiLevelPlotfile().*',\
                    '\nParticleContainer::RedistributeMPI().*']
    for pattern in pattern_list:
        timing = '0'
        line_match = re.search(pattern, search_area)
        if line_match is not None:
            timing = [str(float(line_match.group(0).split()[3])/n_steps)]
        timing_list += timing
    return timing_list

# Write time into logfile
def write_perf_logfile(log_file, log_line):
    f_log = open(log_file, 'a')
    f_log.write(log_line)
    f_log.close()
    return 0

def get_nsteps(run_name):
    with open(run_name) as file_handler:
        run_name_text = file_handler.read()
    line_match_nsteps = re.search('\nmax_step.*', run_name_text)
    nsteps = float(line_match_nsteps.group(0).split()[2])
    return nsteps

def extract_dataframe(filename, n_steps):
    # Get init time and total time through Inclusive time
    partition_limit_start = 'NCalls  Incl. Min  Incl. Avg  Incl. Max   Max %'
    print(filename)
    with open(filename) as file_handler:
        output_text = file_handler.read()
    # get total simulation time
    line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text)
    total_time = float(line_match_totaltime.group(0).split()[8])
    # get time performing steps as Inclusive WarpX::Evolve() time
    search_area = output_text.partition(partition_limit_start)[2]
    line_match_looptime = re.search('\nWarpX::Evolve().*', search_area)
    time_wo_initialization = float(line_match_looptime.group(0).split()[3])
    # New, might break something
    line_match_WritePlotFile = re.search('\nWarpX::WritePlotFile().*', search_area)
    if line_match_WritePlotFile is not None:
         time_WritePlotFile = float(line_match_WritePlotFile.group(0).split()[3])
    else:
         time_WritePlotFile = 0.
    # Get timers for all routines
    # Where to start and stop in the output_file
    partition_limit_start = 'NCalls  Excl. Min  Excl. Avg  Excl. Max   Max %'
    partition_limit_end   = 'NCalls  Incl. Min  Incl. Avg  Incl. Max   Max %'
    # Put file content in a string
    with open(filename) as file_handler:
        output_text = file_handler.read()
    # Keep only profiling data
    search_area = output_text.partition(partition_limit_start)[2]\
                             .partition(partition_limit_end)[0]
    list_string = search_area.split('\n')[2:-4]
    time_array = np.zeros(len(list_string))
    column_list= []
    for i in np.arange(len(list_string)):
        column_list.append(list_string[i].split()[0])
        time_array[i] = float(list_string[i].split()[3])
    df = pd.DataFrame(columns=column_list)
    df.loc[0] = time_array
    df['time_initialization'] = total_time - time_wo_initialization
    df['time_running'] = time_wo_initialization
    df['time_WritePlotFile'] = time_WritePlotFile
    # df['string_output'] = partition_limit_start + '\n' + search_area
    return df

# Run a performance test in an interactive allocation
# def run_interactive(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1):
#     # Clean res_dir                                                                                                                                                                                                                                                           #
#     if os.path.exists(res_dir):
#         shutil.rmtree(res_dir)
#     os.makedirs(res_dir)
#     # Copy files to res_dir                                                                                                                                                                                                                                                   #
#     shutil.copyfile(bin_dir + bin_name, res_dir + bin_name)
#     shutil.copyfile(cwd  + run_name, res_dir + 'inputs')
#     os.chdir(res_dir)
#     if args.architecture == 'cpu':
#         cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives                                                                                                                                                                                                     #
#         exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
#                        'srun --cpu_bind=cores '     + \
#                        ' -n ' + str(n_node*n_mpi) + \
#                        ' -c ' + str(cflag_value)   + \
#                        ' ./'  + bin_name + ' inputs > perf_output.txt'
#     elif args.architecture == 'knl':
#         # number of logical cores per MPI process                                                                                                                                                                                                                             #
#         cflag_value = max(1,int(68/n_mpi) * 4) # Follow NERSC directives                                                                                                                                                                                                      #
#         exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\
#                        'srun --cpu_bind=cores '     + \
#                        ' -n ' + str(n_node*n_mpi) + \
#                        ' -c ' + str(cflag_value)   + \
#                        ' ./'  + bin_name + ' inputs > perf_output.txt'
#     os.system('chmod 700 ' + bin_name)
#     os.system(config_command + exec_command)
#     return 0