diff options
author | 2021-01-19 18:34:47 -0800 | |
---|---|---|
committer | 2021-01-19 18:34:47 -0800 | |
commit | db97d71171ddd0f7de1601323b26542495f719d2 (patch) | |
tree | 80b31caac462c2f6bb19929fdba879c11e84bc4f /Tools/PostProcessing/plot_distribution_mapping.py | |
parent | 16651c50bf37e45c6ad36b059a4da617d25c94c7 (diff) | |
download | WarpX-db97d71171ddd0f7de1601323b26542495f719d2.tar.gz WarpX-db97d71171ddd0f7de1601323b26542495f719d2.tar.zst WarpX-db97d71171ddd0f7de1601323b26542495f719d2.zip |
Distribution mapping and cost plotting (#1444)
* Distribution mapping and cost plotting
Cleanup
Cleanup
Cleanup
EOL
Unused import
* Cost initializes to 0.0
* plot slices of 3D
* WIP
* WIP docs
* docs
* docs
* docs
* docs
* docs
* docs
* docs
* docs
* EOL
Diffstat (limited to 'Tools/PostProcessing/plot_distribution_mapping.py')
-rw-r--r-- | Tools/PostProcessing/plot_distribution_mapping.py | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/Tools/PostProcessing/plot_distribution_mapping.py b/Tools/PostProcessing/plot_distribution_mapping.py new file mode 100644 index 000000000..9b229c601 --- /dev/null +++ b/Tools/PostProcessing/plot_distribution_mapping.py @@ -0,0 +1,163 @@ +# Standard imports +import os +from collections import defaultdict +from itertools import product + +# High-performance math +import numpy as np + + +class SimData: + """ + Structure for easy access to load costs reduced diagnostics + """ + def __init__(self, directory, prange): + """ + Set data-containing dir, data range; load data + """ + self._get_costs_reduced_diagnostics(directory, prange) + + def __call__(self, i): + """ + Set data for current timestep + """ + if not self.data_fields: + print("No data_fields!") + return + + if not i in self.keys: + print("Index is out of range!") + print("Valid keys are ", self.keys) + return + + # Set field values at output i + self.values = [] + for attr in list(self.data_fields[i].keys()): + setattr(self, attr, self.data_fields[i][attr]) + self.values.append(attr) + + # Data_fields index currently set + self.idx = i + + + def _get_costs_reduced_diagnostics(self, directory, prange): + """ + Read costs reduced diagnostics + """ + # Load data + self.directory, self.prange = directory, prange + if not os.path.exists(directory): + print("Directory " + directory + " does not exist") + return + + data_fields = defaultdict(dict) + self.data_fields, self.keys = data_fields, list(prange) + + data = np.genfromtxt(directory) + steps = data[:,0].astype(int) + + times = data[:,1] + reduced_diags_interval = steps[1] - steps[0] + data = data[:,2:] + + # Compute the number of datafields saved per box + n_data_fields = 0 + with open(directory) as f: + h = f.readlines()[0] + unique_headers=[''.join([l for l in w if not l.isdigit()]) + for w in h.split()][2::] + + # Either 7 or 8 depending if GPU + n_data_fields = 7 if (len(set(unique_headers)) - 2)%7 == 0 else 8 + f.close() + + # From data header, data layout is: + # [step, time, + # cost_box_0, proc_box_0, lev_box_0, i_low_box_0, j_low_box_0, + # k_low_box_0(, gpu_ID_box_0 if GPU run), hostname_box_0 + # cost_box_1, proc_box_1, lev_box_1, i_low_box_1, j_low_box_1, + # k_low_box_1(, gpu_ID_box_1 if GPU run), hostname_box_1 + # ... + # cost_box_n, proc_box_n, lev_box_n, i_low_box_n, j_low_box_n, + # k_low_box_n(, gpu_ID_box_n if GPU run), hostname_box_n + i, j, k = (data[0,3::n_data_fields], + data[0,4::n_data_fields], + data[0,5::n_data_fields]) + + i_blocks = np.diff(np.array(sorted(i.astype(int)))) + j_blocks = np.diff(np.array(sorted(j.astype(int)))) + k_blocks = np.diff(np.array(sorted(k.astype(int)))) + + i_blocking_factor = i_blocks[i_blocks != 0].min() + j_blocking_factor = j_blocks[j_blocks != 0].min() + k_blocking_factor = k_blocks[k_blocks != 0].min() + + imax = i.astype(int).max()//i_blocking_factor + jmax = j.astype(int).max()//j_blocking_factor + kmax = k.astype(int).max()//k_blocking_factor + + i_is_int = all([el.is_integer() for el in i/i_blocking_factor]) + j_is_int = all([el.is_integer() for el in j/j_blocking_factor]) + k_is_int = all([el.is_integer() for el in k/k_blocking_factor]) + + is_3D = i_is_int and j_is_int and k_is_int + + for key in self.keys: + row = key//reduced_diags_interval + costs = data[row, 0::n_data_fields].astype(float) + ranks = data[row, 1::n_data_fields].astype(int) + icoords = i.astype(int)//i_blocking_factor + jcoords = j.astype(int)//j_blocking_factor + kcoords = k.astype(int)//k_blocking_factor + + # Fill in cost array + shape = (kmax+1, jmax+1, imax+1)[:2+is_3D] + coords = [coord[:2+is_3D] for coord in zip(kcoords, jcoords, icoords)] + + cost_arr = np.full(shape, 0.0) + rank_arr = np.full(shape, -1) + for nc, cost in enumerate(costs): + coord = coords[nc] + cost_arr[coord] += cost + rank_arr[coord] = ranks[nc] + + # For non-uniform blocks: fill with the corresponding cost/rank + visited = np.full(shape, False) + def dfs(corner, pos, prev): + # Exit conditions + if any([pos[i]>=shape[i] for i in range(len(shape))]): return + edges = list(rank_arr[corner[0]:pos[0]+1, pos[1], pos[2]]) \ + + list(rank_arr[pos[0], corner[1]:pos[1]+1, pos[2]]) \ + + list(rank_arr[pos[0], pos[1], corner[2]:pos[2]+1]) \ + if is_3D else \ + list(rank_arr[corner[0]:pos[0]+1, pos[1]]) \ + + list(rank_arr[pos[0], corner[1]:pos[1]+1]) + if visited[pos] or not set(edges).issubset(set([prev, -1])): return + + visited[pos] = True + if rank_arr[pos] not in [-1, prev]: prev, corner = rank_arr[pos], pos + else: rank_arr[pos] = prev + + args = [[0, 1] for _ in range(len(shape))] + neighbors = [tuple(np.array(pos) + np.array(p)) for p in product(*args) + if not p == (0,)*len(shape)] + for n in neighbors: dfs(corner, n, prev) + + for corner in coords: dfs(corner, corner, rank_arr[corner]) + + self.data_fields[key]['cost_arr'] = cost_arr + self.data_fields[key]['rank_arr'] = rank_arr + + # Compute load balance efficiency + rank_to_cost_map = {r:0. for r in set(ranks)} + for c, r in zip(costs, ranks): rank_to_cost_map[r] += c + + efficiencies = np.array(list(rank_to_cost_map.values())) + efficiencies /= efficiencies.max() + self.data_fields[key]['ranks'] = np.array(list(rank_to_cost_map.keys())) + self.data_fields[key]['lb_efficiencies'] = efficiencies + self.data_fields[key]['lb_efficiency'] = efficiencies.mean() + self.data_fields[key]['lb_efficiency_max'] = efficiencies.max() + self.data_fields[key]['lb_efficiency_min'] = efficiencies.min() + self.data_fields[key]['t'] = times[row] + self.data_fields[key]['step'] = steps[row] |