diff options
author | 2018-02-06 13:56:38 -0800 | |
---|---|---|
committer | 2018-02-06 13:56:38 -0800 | |
commit | ec6dd873d46a87707c20aa09b58c691d37a36f98 (patch) | |
tree | cfa6a71947445b185f9d197bfeec356ddbbadffb | |
parent | c081449f8f1729ef6f635743e95e96c345ae5d2c (diff) | |
parent | 2d3cca8885f44fe4f8829e1f066e83cffce82c57 (diff) | |
download | WarpX-ec6dd873d46a87707c20aa09b58c691d37a36f98.tar.gz WarpX-ec6dd873d46a87707c20aa09b58c691d37a36f98.tar.zst WarpX-ec6dd873d46a87707c20aa09b58c691d37a36f98.zip |
Merge branch 'master' into with_python
26 files changed, 1961 insertions, 249 deletions
diff --git a/Docs/source/running_cpp/parameters.rst b/Docs/source/running_cpp/parameters.rst index cf03a04ad..540389f0f 100644 --- a/Docs/source/running_cpp/parameters.rst +++ b/Docs/source/running_cpp/parameters.rst @@ -12,7 +12,6 @@ Overall simulation parameters * ``max_step`` (`integer`) The number of PIC cycles to perform. - * ``warpx.gamma_boost`` (`float`) The Lorentz factor of the boosted frame in which the simulation is run. (The corresponding Lorentz transformation is assumed to be along ``warpx.boost_direction``.) @@ -91,6 +90,42 @@ Distribution across MPI ranks and parallelization Particle initialization ----------------------- +* ``particles.nspecies`` (`int`) + The number of species that will be used in the simulation. + +* ``particles.species_names`` (`strings`, separated by spaces) + The name of each species. This is then used in the rest of the input deck ; + in this documentation we use `<species_name>` as a placeholder. + +* ``<species_name>.charge`` (`float`) + The charge of one `physical` particle of this species. + +* ``<species_name>.mass`` (`float`) + The mass of one `physical` particle of this species. + +* ``<species_name>.injection_style`` (`string`) + Determines how the particles will be injected in the simulation. + The options are: + + * ``NUniformPerCell``: injection with a fixed number of particles + per cell, with particles being evenly-spaced in each direction within a cell. + This requires the additional parameter ``<species_name>.num_particles_per_cell_each_dim``. + + * ``NRandomPerCell``: injection with a fixed number of particles + per cell, with particles being randomly distributed within each cell. + This requires the additional parameter ``<species_name>.num_particles_per_cell``. + + * ``Gaussian_Beam``: + +Additional parameters for plasma injection (``NUniformPerCell`` and ``NRandomPerCell``) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +Additional parameters for gaussian beams (``Gaussian_Beam``) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + Laser initialization -------------------- @@ -212,6 +247,11 @@ Laser initialization Numerics and algorithms ----------------------- +* ``warpx.cfl`` (`float`) + The ratio between the actual timestep that is used in the simulation + and the CFL limit. (e.g. for `warpx.cfl=1`, the timestep will be + exactly equal to the CFL limit.) + * ``warpx.use_filter`` (`0 or 1`) Whether to smooth the charge and currents on the mesh, after depositing them from the macroparticles. This uses a bilinear filter @@ -274,3 +314,20 @@ Diagnostics and output Only used when ``warpx.do_boosted_frame_diagnostic`` is ``1``. The time interval inbetween the lab-frame snapshots (where this time interval is expressed in the laboratory frame). + +* ``warpx.plot_raw_fields`` (`0` or `1`) + By default, the fields written in the plot files are averaged on the nodes. + When ```warpx.plot_raw_fields`` is `1`, then the raw (i.e. unaveraged) + fields are also saved in the plot files. + +* ``warpx.plot_raw_fields_guards`` (`0` or `1`) + Only used when ``warpx.plot_raw_fields`` is ``1``. + Whether to include the guard cells in the output of the raw fields. + +* ``warpx.plot_finepatch`` (`0` or `1`) + Only used when mesh refinement is activated and ``warpx.plot_raw_fields`` is ``1``. + Whether to output the data of the fine patch, in the plot files. + +* ``warpx.plot_crsepatch`` (`0` or `1`) + Only used when mesh refinement is activated and ``warpx.plot_raw_fields`` is ``1``. + Whether to output the data of the coarse patch, in the plot files. diff --git a/Example/boosted_diags/inputs.2d b/Example/boosted_diags/inputs.2d new file mode 100644 index 000000000..fc7a23f0b --- /dev/null +++ b/Example/boosted_diags/inputs.2d @@ -0,0 +1,100 @@ +# Maximum number of time steps +max_step = 260 + +# number of grid points +amr.n_cell = 64 64 512 + +# Maximum allowable size of each subdomain in the problem domain; +# this is used to decompose the domain for parallel calculations. + +amr.max_grid_size = 32 + +# Maximum level in hierarchy (for now must be 0, i.e., one level in total) +amr.max_level = 0 +amr.plot_int = 10 # How often to write plotfiles. "<= 0" means no plotfiles. +amr.check_int = 10 + +# Geometry +geometry.coord_sys = 0 # 0: Cartesian +geometry.is_periodic = 1 1 0 # Is periodic? +geometry.prob_lo = -150.e-6 -150.e-6 -0.6e-3 # physical domain +geometry.prob_hi = 150.e-6 150.e-6 0. + +# Verbosity +warpx.verbose = 1 + +# Algorithms +algo.current_deposition = 3 +algo.charge_deposition = 0 +algo.field_gathering = 0 +algo.particle_pusher = 0 + +# Numerics +interpolation.nox = 3 +interpolation.noy = 3 +interpolation.noz = 3 +warpx.use_filter = 1 +warpx.cfl = 1.0 +warpx.do_pml = 0 + +# Moving window +warpx.do_moving_window = 1 +warpx.moving_window_dir = z +warpx.moving_window_v = 1.0 # in units of the speed of light + +# Boosted frame +warpx.gamma_boost = 15. +warpx.boost_direction = z + +# Diagnostics +warpx.do_boosted_frame_diagnostic = 1 +warpx.num_snapshots_lab = 20; +warpx.dt_snapshots_lab = 7.0e-14; + +# Particle Injection +warpx.do_plasma_injection = 1 +warpx.num_injected_species = 2 +warpx.injected_plasma_species = 0 1 + +# Species +particles.nspecies = 2 +particles.species_names = electrons ions + +electrons.charge = -q_e +electrons.mass = m_e +electrons.injection_style = "NUniformPerCell" +electrons.xmin = -150.e-6 +electrons.xmax = 150.e-6 +electrons.ymin = -150.e-6 +electrons.ymax = 150.e-6 +electrons.zmin = 0.e-6 +electrons.num_particles_per_cell_each_dim = 1 1 2 +electrons.profile = constant +electrons.density = 1. +electrons.momentum_distribution_type = "constant" + +ions.charge = q_e +ions.mass = m_p +ions.injection_style = "NUniformPerCell" +ions.xmin = -150.e-6 +ions.xmax = 150.e-6 +ions.ymin = -150.e-6 +ions.ymax = 150.e-6 +ions.zmin = 0.e-6 +ions.num_particles_per_cell_each_dim = 1 1 2 +ions.profile = constant +ions.density = 1. +ions.momentum_distribution_type = "constant" + +# Laser +warpx.use_laser = 1 +laser.profile = Gaussian +laser.position = 0. 0. -1.e-6 # This point is on the laser plane +laser.direction = 0. 0. 1. # The plane normal direction +laser.polarization = 1. 0. 0. # The main polarization vector +laser.e_max = 8.e12 # Maximum amplitude of the laser field (in V/m) +laser.profile_waist = 5.e-5 # The waist of the laser (in meters) +laser.profile_duration = 16.7e-15 # The duration of the laser (in seconds) +laser.profile_t_peak = 33.4e-15 # The time at which the laser reaches its peak (in seconds) +laser.profile_focal_distance = 0.e-6 # Focal distance from the antenna (in meters) +laser.wavelength = 0.8e-6 # The wavelength of the laser (in meters) diff --git a/Example/boosted_diags/inputs.3d b/Example/boosted_diags/inputs.3d new file mode 100644 index 000000000..fc7a23f0b --- /dev/null +++ b/Example/boosted_diags/inputs.3d @@ -0,0 +1,100 @@ +# Maximum number of time steps +max_step = 260 + +# number of grid points +amr.n_cell = 64 64 512 + +# Maximum allowable size of each subdomain in the problem domain; +# this is used to decompose the domain for parallel calculations. + +amr.max_grid_size = 32 + +# Maximum level in hierarchy (for now must be 0, i.e., one level in total) +amr.max_level = 0 +amr.plot_int = 10 # How often to write plotfiles. "<= 0" means no plotfiles. +amr.check_int = 10 + +# Geometry +geometry.coord_sys = 0 # 0: Cartesian +geometry.is_periodic = 1 1 0 # Is periodic? +geometry.prob_lo = -150.e-6 -150.e-6 -0.6e-3 # physical domain +geometry.prob_hi = 150.e-6 150.e-6 0. + +# Verbosity +warpx.verbose = 1 + +# Algorithms +algo.current_deposition = 3 +algo.charge_deposition = 0 +algo.field_gathering = 0 +algo.particle_pusher = 0 + +# Numerics +interpolation.nox = 3 +interpolation.noy = 3 +interpolation.noz = 3 +warpx.use_filter = 1 +warpx.cfl = 1.0 +warpx.do_pml = 0 + +# Moving window +warpx.do_moving_window = 1 +warpx.moving_window_dir = z +warpx.moving_window_v = 1.0 # in units of the speed of light + +# Boosted frame +warpx.gamma_boost = 15. +warpx.boost_direction = z + +# Diagnostics +warpx.do_boosted_frame_diagnostic = 1 +warpx.num_snapshots_lab = 20; +warpx.dt_snapshots_lab = 7.0e-14; + +# Particle Injection +warpx.do_plasma_injection = 1 +warpx.num_injected_species = 2 +warpx.injected_plasma_species = 0 1 + +# Species +particles.nspecies = 2 +particles.species_names = electrons ions + +electrons.charge = -q_e +electrons.mass = m_e +electrons.injection_style = "NUniformPerCell" +electrons.xmin = -150.e-6 +electrons.xmax = 150.e-6 +electrons.ymin = -150.e-6 +electrons.ymax = 150.e-6 +electrons.zmin = 0.e-6 +electrons.num_particles_per_cell_each_dim = 1 1 2 +electrons.profile = constant +electrons.density = 1. +electrons.momentum_distribution_type = "constant" + +ions.charge = q_e +ions.mass = m_p +ions.injection_style = "NUniformPerCell" +ions.xmin = -150.e-6 +ions.xmax = 150.e-6 +ions.ymin = -150.e-6 +ions.ymax = 150.e-6 +ions.zmin = 0.e-6 +ions.num_particles_per_cell_each_dim = 1 1 2 +ions.profile = constant +ions.density = 1. +ions.momentum_distribution_type = "constant" + +# Laser +warpx.use_laser = 1 +laser.profile = Gaussian +laser.position = 0. 0. -1.e-6 # This point is on the laser plane +laser.direction = 0. 0. 1. # The plane normal direction +laser.polarization = 1. 0. 0. # The main polarization vector +laser.e_max = 8.e12 # Maximum amplitude of the laser field (in V/m) +laser.profile_waist = 5.e-5 # The waist of the laser (in meters) +laser.profile_duration = 16.7e-15 # The duration of the laser (in seconds) +laser.profile_t_peak = 33.4e-15 # The time at which the laser reaches its peak (in seconds) +laser.profile_focal_distance = 0.e-6 # Focal distance from the antenna (in meters) +laser.wavelength = 0.8e-6 # The wavelength of the laser (in meters) diff --git a/Python/pywarpx/_libwarpx.py b/Python/pywarpx/_libwarpx.py index e0a7262be..71e0a6729 100755 --- a/Python/pywarpx/_libwarpx.py +++ b/Python/pywarpx/_libwarpx.py @@ -83,18 +83,54 @@ f.restype = LP_LP_c_double f = libwarpx.warpx_getEfieldLoVects f.restype = LP_c_int +f = libwarpx.warpx_getEfieldCP +f.restype = LP_LP_c_double + +f = libwarpx.warpx_getEfieldCPLoVects +f.restype = LP_c_int + +f = libwarpx.warpx_getEfieldFP +f.restype = LP_LP_c_double + +f = libwarpx.warpx_getEfieldFPLoVects +f.restype = LP_c_int + f = libwarpx.warpx_getBfield f.restype = LP_LP_c_double f = libwarpx.warpx_getBfieldLoVects f.restype = LP_c_int +f = libwarpx.warpx_getBfieldCP +f.restype = LP_LP_c_double + +f = libwarpx.warpx_getBfieldCPLoVects +f.restype = LP_c_int + +f = libwarpx.warpx_getBfieldFP +f.restype = LP_LP_c_double + +f = libwarpx.warpx_getBfieldFPLoVects +f.restype = LP_c_int + f = libwarpx.warpx_getCurrentDensity f.restype = LP_LP_c_double f = libwarpx.warpx_getCurrentDensityLoVects f.restype = LP_c_int +f = libwarpx.warpx_getCurrentDensityCP +f.restype = LP_LP_c_double + +f = libwarpx.warpx_getCurrentDensityCPLoVects +f.restype = LP_c_int + +f = libwarpx.warpx_getCurrentDensityFP +f.restype = LP_LP_c_double + +f = libwarpx.warpx_getCurrentDensityFPLoVects +f.restype = LP_c_int + #f = libwarpx.warpx_getPMLSigma #f.restype = LP_c_double # @@ -550,6 +586,8 @@ def get_mesh_electric_field(level, direction, include_ghosts=True): This returns a list of numpy arrays containing the mesh electric field data on each grid for this process. + This version is for the full "auxillary" solution on the given level. + The data for the numpy arrays are not copied, but share the underlying memory buffer with WarpX. The numpy arrays are fully writeable. @@ -592,12 +630,112 @@ def get_mesh_electric_field(level, direction, include_ghosts=True): return grid_data +def get_mesh_electric_field_cp(level, direction, include_ghosts=True): + ''' + + This returns a list of numpy arrays containing the mesh electric field + data on each grid for this process. This version returns the field on + the coarse patch for the given level. + + The data for the numpy arrays are not copied, but share the underlying + memory buffer with WarpX. The numpy arrays are fully writeable. + + Parameters + ---------- + + level : the AMR level to get the data for + direction : the component of the data you want + include_ghosts : whether to include ghost zones or not + + Returns + ------- + + A List of numpy arrays. + + ''' + + assert(level == 0) + + shapes = LP_c_int() + size = ctypes.c_int(0) + ngrow = ctypes.c_int(0) + data = libwarpx.warpx_getEfieldCP(level, direction, + ctypes.byref(size), ctypes.byref(ngrow), + ctypes.byref(shapes)) + ng = ngrow.value + grid_data = [] + for i in range(size.value): + shape = tuple([shapes[dim*i + d] for d in range(dim)]) + # --- The data is stored in Fortran order, hence shape is reversed and a transpose is taken. + arr = np.ctypeslib.as_array(data[i], shape[::-1]).T + arr.setflags(write=1) + if include_ghosts: + grid_data.append(arr) + else: + grid_data.append(arr[[slice(ng, -ng) for _ in range(dim)]]) + + libc.free(shapes) + libc.free(data) + return grid_data + + +def get_mesh_electric_field_fp(level, direction, include_ghosts=True): + ''' + + This returns a list of numpy arrays containing the mesh electric field + data on each grid for this process. This version returns the field on + the fine patch for the given level. + + The data for the numpy arrays are not copied, but share the underlying + memory buffer with WarpX. The numpy arrays are fully writeable. + + Parameters + ---------- + + level : the AMR level to get the data for + direction : the component of the data you want + include_ghosts : whether to include ghost zones or not + + Returns + ------- + + A List of numpy arrays. + + ''' + + assert(level == 0) + + shapes = LP_c_int() + size = ctypes.c_int(0) + ngrow = ctypes.c_int(0) + data = libwarpx.warpx_getEfieldFP(level, direction, + ctypes.byref(size), ctypes.byref(ngrow), + ctypes.byref(shapes)) + ng = ngrow.value + grid_data = [] + for i in range(size.value): + shape = tuple([shapes[dim*i + d] for d in range(dim)]) + # --- The data is stored in Fortran order, hence shape is reversed and a transpose is taken. + arr = np.ctypeslib.as_array(data[i], shape[::-1]).T + arr.setflags(write=1) + if include_ghosts: + grid_data.append(arr) + else: + grid_data.append(arr[[slice(ng, -ng) for _ in range(dim)]]) + + libc.free(shapes) + libc.free(data) + return grid_data + + def get_mesh_magnetic_field(level, direction, include_ghosts=True): ''' This returns a list of numpy arrays containing the mesh magnetic field data on each grid for this process. + This version is for the full "auxillary" solution on the given level. + The data for the numpy arrays are not copied, but share the underlying memory buffer with WarpX. The numpy arrays are fully writeable. @@ -640,6 +778,104 @@ def get_mesh_magnetic_field(level, direction, include_ghosts=True): return grid_data +def get_mesh_magnetic_field_cp(level, direction, include_ghosts=True): + ''' + + This returns a list of numpy arrays containing the mesh magnetic field + data on each grid for this process. This version returns the field on + the coarse patch for the given level. + + The data for the numpy arrays are not copied, but share the underlying + memory buffer with WarpX. The numpy arrays are fully writeable. + + Parameters + ---------- + + level : the AMR level to get the data for + direction : the component of the data you want + include_ghosts : whether to include ghost zones or not + + Returns + ------- + + A List of numpy arrays. + + ''' + + assert(level == 0) + + shapes = LP_c_int() + size = ctypes.c_int(0) + ngrow = ctypes.c_int(0) + data = libwarpx.warpx_getBfieldCP(level, direction, + ctypes.byref(size), ctypes.byref(ngrow), + ctypes.byref(shapes)) + ng = ngrow.value + grid_data = [] + for i in range(size.value): + shape = tuple([shapes[dim*i + d] for d in range(dim)]) + # --- The data is stored in Fortran order, hence shape is reversed and a transpose is taken. + arr = np.ctypeslib.as_array(data[i], shape[::-1]).T + arr.setflags(write=1) + if include_ghosts: + grid_data.append(arr) + else: + grid_data.append(arr[[slice(ng, -ng) for _ in range(dim)]]) + + libc.free(shapes) + libc.free(data) + return grid_data + + +def get_mesh_magnetic_field_fp(level, direction, include_ghosts=True): + ''' + + This returns a list of numpy arrays containing the mesh magnetic field + data on each grid for this process. This version returns the field on + the fine patch for the given level. + + The data for the numpy arrays are not copied, but share the underlying + memory buffer with WarpX. The numpy arrays are fully writeable. + + Parameters + ---------- + + level : the AMR level to get the data for + direction : the component of the data you want + include_ghosts : whether to include ghost zones or not + + Returns + ------- + + A List of numpy arrays. + + ''' + + assert(level == 0) + + shapes = LP_c_int() + size = ctypes.c_int(0) + ngrow = ctypes.c_int(0) + data = libwarpx.warpx_getBfieldFP(level, direction, + ctypes.byref(size), ctypes.byref(ngrow), + ctypes.byref(shapes)) + ng = ngrow.value + grid_data = [] + for i in range(size.value): + shape = tuple([shapes[dim*i + d] for d in range(dim)]) + # --- The data is stored in Fortran order, hence shape is reversed and a transpose is taken. + arr = np.ctypeslib.as_array(data[i], shape[::-1]).T + arr.setflags(write=1) + if include_ghosts: + grid_data.append(arr) + else: + grid_data.append(arr[[slice(ng, -ng) for _ in range(dim)]]) + + libc.free(shapes) + libc.free(data) + return grid_data + + def get_mesh_current_density(level, direction, include_ghosts=True): ''' @@ -688,6 +924,104 @@ def get_mesh_current_density(level, direction, include_ghosts=True): return grid_data +def get_mesh_current_density_cp(level, direction, include_ghosts=True): + ''' + + This returns a list of numpy arrays containing the mesh current density + data on each grid for this process. This version returns the density for + the coarse patch on the given level. + + The data for the numpy arrays are not copied, but share the underlying + memory buffer with WarpX. The numpy arrays are fully writeable. + + Parameters + ---------- + + level : the AMR level to get the data for + direction : the component of the data you want + include_ghosts : whether to include ghost zones or not + + Returns + ------- + + A List of numpy arrays. + + ''' + + assert(level == 0) + + shapes = LP_c_int() + size = ctypes.c_int(0) + ngrow = ctypes.c_int(0) + data = libwarpx.warpx_getCurrentDensityCP(level, direction, + ctypes.byref(size), ctypes.byref(ngrow), + ctypes.byref(shapes)) + ng = ngrow.value + grid_data = [] + for i in range(size.value): + shape = tuple([shapes[dim*i + d] for d in range(dim)]) + # --- The data is stored in Fortran order, hence shape is reversed and a transpose is taken. + arr = np.ctypeslib.as_array(data[i], shape[::-1]).T + arr.setflags(write=1) + if include_ghosts: + grid_data.append(arr) + else: + grid_data.append(arr[[slice(ng, -ng) for _ in range(dim)]]) + + libc.free(shapes) + libc.free(data) + return grid_data + + +def get_mesh_current_density_fp(level, direction, include_ghosts=True): + ''' + + This returns a list of numpy arrays containing the mesh current density + data on each grid for this process. This version returns the density on + the fine patch for the given level. + + The data for the numpy arrays are not copied, but share the underlying + memory buffer with WarpX. The numpy arrays are fully writeable. + + Parameters + ---------- + + level : the AMR level to get the data for + direction : the component of the data you want + include_ghosts : whether to include ghost zones or not + + Returns + ------- + + A List of numpy arrays. + + ''' + + assert(level == 0) + + shapes = LP_c_int() + size = ctypes.c_int(0) + ngrow = ctypes.c_int(0) + data = libwarpx.warpx_getCurrentDensityFP(level, direction, + ctypes.byref(size), ctypes.byref(ngrow), + ctypes.byref(shapes)) + ng = ngrow.value + grid_data = [] + for i in range(size.value): + shape = tuple([shapes[dim*i + d] for d in range(dim)]) + # --- The data is stored in Fortran order, hence shape is reversed and a transpose is taken. + arr = np.ctypeslib.as_array(data[i], shape[::-1]).T + arr.setflags(write=1) + if include_ghosts: + grid_data.append(arr) + else: + grid_data.append(arr[[slice(ng, -ng) for _ in range(dim)]]) + + libc.free(shapes) + libc.free(data) + return grid_data + + def _get_mesh_array_lovects(level, direction, include_ghosts=True, getarrayfunc=None): assert(0 <= level and level <= libwarpx.warpx_finestLevel()) @@ -715,6 +1049,8 @@ def get_mesh_electric_field_lovects(level, direction, include_ghosts=True): This returns a list of the lo vectors of the arrays containing the mesh electric field data on each grid for this process. + This version is for the full "auxillary" solution on the given level. + Parameters ---------- @@ -731,12 +1067,58 @@ def get_mesh_electric_field_lovects(level, direction, include_ghosts=True): return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getEfieldLoVects) +def get_mesh_electric_field_cp_lovects(level, direction, include_ghosts=True): + ''' + + This returns a list of the lo vectors of the arrays containing the mesh electric field + data on each grid for this process. + + Parameters + ---------- + + level : the AMR level to get the data for + direction : the component of the data you want + include_ghosts : whether to include ghost zones or not + + Returns + ------- + + A 2d numpy array of the lo vector for each grid with the shape (dims, number of grids) + + ''' + return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getEfieldCPLoVects) + + +def get_mesh_electric_field_fp_lovects(level, direction, include_ghosts=True): + ''' + + This returns a list of the lo vectors of the arrays containing the mesh electric field + data on each grid for this process. + + Parameters + ---------- + + level : the AMR level to get the data for + direction : the component of the data you want + include_ghosts : whether to include ghost zones or not + + Returns + ------- + + A 2d numpy array of the lo vector for each grid with the shape (dims, number of grids) + + ''' + return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getEfieldFPLoVects) + + def get_mesh_magnetic_field_lovects(level, direction, include_ghosts=True): ''' This returns a list of the lo vectors of the arrays containing the mesh electric field data on each grid for this process. + This version is for the full "auxillary" solution on the given level. + Parameters ---------- @@ -753,6 +1135,50 @@ def get_mesh_magnetic_field_lovects(level, direction, include_ghosts=True): return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getBfieldLoVects) +def get_mesh_magnetic_field_cp_lovects(level, direction, include_ghosts=True): + ''' + + This returns a list of the lo vectors of the arrays containing the mesh electric field + data on each grid for this process. + + Parameters + ---------- + + level : the AMR level to get the data for + direction : the component of the data you want + include_ghosts : whether to include ghost zones or not + + Returns + ------- + + A 2d numpy array of the lo vector for each grid with the shape (dims, number of grids) + + ''' + return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getBfieldCPLoVects) + + +def get_mesh_magnetic_field_fp_lovects(level, direction, include_ghosts=True): + ''' + + This returns a list of the lo vectors of the arrays containing the mesh electric field + data on each grid for this process. + + Parameters + ---------- + + level : the AMR level to get the data for + direction : the component of the data you want + include_ghosts : whether to include ghost zones or not + + Returns + ------- + + A 2d numpy array of the lo vector for each grid with the shape (dims, number of grids) + + ''' + return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getBfieldFPLoVects) + + def get_mesh_current_density_lovects(level, direction, include_ghosts=True): ''' @@ -775,3 +1201,45 @@ def get_mesh_current_density_lovects(level, direction, include_ghosts=True): return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getCurrentDensityLoVects) +def get_mesh_current_density_cp_lovects(level, direction, include_ghosts=True): + ''' + + This returns a list of the lo vectors of the arrays containing the mesh electric field + data on each grid for this process. + + Parameters + ---------- + + level : the AMR level to get the data for + direction : the component of the data you want + include_ghosts : whether to include ghost zones or not + + Returns + ------- + + A 2d numpy array of the lo vector for each grid with the shape (dims, number of grids) + + ''' + return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getCurrentDensityCPLoVects) + +def get_mesh_current_density_fp_lovects(level, direction, include_ghosts=True): + ''' + + This returns a list of the lo vectors of the arrays containing the mesh electric field + data on each grid for this process. + + Parameters + ---------- + + level : the AMR level to get the data for + direction : the component of the data you want + include_ghosts : whether to include ghost zones or not + + Returns + ------- + + A 2d numpy array of the lo vector for each grid with the shape (dims, number of grids) + + ''' + return _get_mesh_array_lovects(level, direction, include_ghosts, libwarpx.warpx_getCurrentDensityFPLoVects) + diff --git a/Source/CustomDensityProb.cpp b/Source/CustomDensityProb.cpp index 1df3d75ad..2f6005bc2 100644 --- a/Source/CustomDensityProb.cpp +++ b/Source/CustomDensityProb.cpp @@ -1,13 +1,52 @@ #include <PlasmaInjector.H> -#include <iostream> +using namespace amrex; -#include <AMReX.H> +/// +/// This "custom" density profile just does constant +/// +Real CustomDensityProfile::getDensity(Real x, Real y, Real z) const { + const Real on_axis_density = params[0]; + const Real plasma_zmin = params[1]; + const Real plasma_zmax = params[2]; + const Real plasma_lramp_start = params[3]; + const Real plasma_lramp_end = params[4]; + const Real plasma_rcap = params[5]; + const Real plasma_rdownramp = params[6]; + const Real plasma_rchannel = params[7]; + static const Real re = 2.8178403227e-15; // Electron classical radius + static const Real pi = 3.14159265359; -amrex::Real CustomDensityProfile::getDensity(amrex::Real x, - amrex::Real y, - amrex::Real z) const -{ - amrex::Abort("If running with a custom density profile, you must supply a CustomDensityProb.cpp file"); - return 0.0; + Real r2 = x*x + y*y; + Real r = std::sqrt( r2 ); + + // Transverse part of the profile + Real nr; + if (r<plasma_rcap) { + nr = 1. + 1./(pi*on_axis_density*re) * r2/pow(plasma_rchannel, 4); + } else { + nr = 1. + 1./(pi*on_axis_density*re) * + pow(plasma_rcap, 2)/pow(plasma_rchannel, 4) * + (plasma_rcap+plasma_rdownramp-r)/plasma_rdownramp; + } + // Longitudinal part of the profile + Real nz; + if (z<plasma_zmin) { + nz = 0; + } else if (z<plasma_zmin+plasma_lramp_start) { + nz = (z-plasma_zmin)/plasma_lramp_start; + } else if (z<plasma_zmax-plasma_lramp_end) { + nz = 1.; + } else if (z<plasma_zmax){ + nz = -(z-plasma_zmax)/plasma_lramp_end; + } else { + nz = 0; + } + // Combine and saturate profile + Real n = nr*nz; + if (n > 4.) { + n = 4.; + } + + return on_axis_density*n; } diff --git a/Source/ParticleContainer.H b/Source/ParticleContainer.H index a1402d4df..9767edd59 100644 --- a/Source/ParticleContainer.H +++ b/Source/ParticleContainer.H @@ -120,6 +120,8 @@ public: void Redistribute (); + void RedistributeLocal (); + amrex::Vector<long> NumberOfParticlesInGrid(int lev) const; void Increment (amrex::MultiFab& mf, int lev); diff --git a/Source/ParticleContainer.cpp b/Source/ParticleContainer.cpp index 55889cf8d..168b9167c 100644 --- a/Source/ParticleContainer.cpp +++ b/Source/ParticleContainer.cpp @@ -186,6 +186,14 @@ MultiParticleContainer::Redistribute () } } +void +MultiParticleContainer::RedistributeLocal () +{ + for (auto& pc : allcontainers) { + pc->Redistribute(0, 0, 0, true); + } +} + Vector<long> MultiParticleContainer::NumberOfParticlesInGrid(int lev) const { diff --git a/Source/PhysicalParticleContainer.cpp b/Source/PhysicalParticleContainer.cpp index e5f6b0c82..050e50daa 100644 --- a/Source/PhysicalParticleContainer.cpp +++ b/Source/PhysicalParticleContainer.cpp @@ -225,9 +225,9 @@ PhysicalParticleContainer::AddPlasma(int lev, RealBox part_realbox ) // and that the boost is along z) Real t = WarpX::GetInstance().gett_new(lev); Real v_boost = WarpX::beta_boost*PhysConst::c; - Real z_lab = WarpX::gamma_boost*( z - v_boost*t ); + Real z_lab = WarpX::gamma_boost*( z + v_boost*t ); plasma_injector->getMomentum(u, x, y, z_lab); - dens = plasma_injector->getDensity(x, y, z); + dens = plasma_injector->getDensity(x, y, z_lab); // Perform Lorentz transform // (Assumes that the plasma has a low velocity) u[2] = WarpX::gamma_boost * ( u[2] - v_boost ); diff --git a/Source/WarpX.H b/Source/WarpX.H index d0876afd2..35cf93cc7 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -90,8 +90,16 @@ public: static amrex::Vector<amrex::Real> boost_direction; const amrex::MultiFab& getcurrent (int lev, int direction) {return *current_fp[lev][direction];} - const amrex::MultiFab& getEfield (int lev, int direction) {return *Efield_fp[lev][direction];} - const amrex::MultiFab& getBfield (int lev, int direction) {return *Bfield_fp[lev][direction];} + const amrex::MultiFab& getEfield (int lev, int direction) {return *Efield_aux[lev][direction];} + const amrex::MultiFab& getBfield (int lev, int direction) {return *Bfield_aux[lev][direction];} + + const amrex::MultiFab& getcurrent_cp (int lev, int direction) {return *current_cp[lev][direction];} + const amrex::MultiFab& getEfield_cp (int lev, int direction) {return *Efield_cp[lev][direction];} + const amrex::MultiFab& getBfield_cp (int lev, int direction) {return *Bfield_cp[lev][direction];} + + const amrex::MultiFab& getcurrent_fp (int lev, int direction) {return *current_fp[lev][direction];} + const amrex::MultiFab& getEfield_fp (int lev, int direction) {return *Efield_fp[lev][direction];} + const amrex::MultiFab& getBfield_fp (int lev, int direction) {return *Bfield_fp[lev][direction];} static amrex::MultiFab* getCosts (int lev) { if (m_instance) { diff --git a/Source/WarpXBoostedFrameDiagnostic.H b/Source/WarpXBoostedFrameDiagnostic.H index a5f0bd7ff..96a77f182 100644 --- a/Source/WarpXBoostedFrameDiagnostic.H +++ b/Source/WarpXBoostedFrameDiagnostic.H @@ -67,8 +67,8 @@ class BoostedFrameDiagnostic { int boost_direction_; amrex::Vector<std::unique_ptr<amrex::MultiFab> > data_buffer_; - int num_buffer_ = 32; - int max_box_size_ = 64; + int num_buffer_ = 256; + int max_box_size_ = 256; amrex::Vector<int> buff_counter_; amrex::Vector<LabSnapShot> snapshots_; diff --git a/Source/WarpXBoostedFrameDiagnostic.cpp b/Source/WarpXBoostedFrameDiagnostic.cpp index d58ed0be7..a617961ee 100644 --- a/Source/WarpXBoostedFrameDiagnostic.cpp +++ b/Source/WarpXBoostedFrameDiagnostic.cpp @@ -46,7 +46,10 @@ BoostedFrameDiagnostic:: writeLabFrameData(const MultiFab& cell_centered_data, const Geometry& geom, Real t_boost) { BL_PROFILE("BoostedFrameDiagnostic::writeLabFrameData"); - + + VisMF::Header::Version current_version = VisMF::GetHeaderVersion(); + VisMF::SetHeaderVersion(amrex::VisMF::Header::NoFabHeader_v1); + const RealBox& domain_z_boost = geom.ProbDomain(); const Real zlo_boost = domain_z_boost.lo(boost_direction_); const Real zhi_boost = domain_z_boost.hi(boost_direction_); @@ -119,6 +122,8 @@ writeLabFrameData(const MultiFab& cell_centered_data, const Geometry& geom, Real buff_counter_[i] = 0; } } + + VisMF::SetHeaderVersion(current_version); } void @@ -167,20 +172,23 @@ LabSnapShot(Real t_lab_in, Real zmin_lab_in, current_z_boost = 0.0; file_name = Concatenate("lab_frame_data/snapshot", file_num, 5); - const int nlevels = 1; - const std::string level_prefix = "Level_"; - - if (!UtilCreateDirectory(file_name, 0755)) - CreateDirectoryFailed(file_name); - for(int i(0); i < nlevels; ++i) { - const std::string &fullpath = LevelFullPath(i, file_name); - if (!UtilCreateDirectory(fullpath, 0755)) - CreateDirectoryFailed(fullpath); + if (ParallelDescriptor::IOProcessor()) { + + const int nlevels = 1; + const std::string level_prefix = "Level_"; + + if (!UtilCreateDirectory(file_name, 0755)) + CreateDirectoryFailed(file_name); + for(int i(0); i < nlevels; ++i) { + const std::string &fullpath = LevelFullPath(i, file_name); + if (!UtilCreateDirectory(fullpath, 0755)) + CreateDirectoryFailed(fullpath); + } } ParallelDescriptor::Barrier(); - writeSnapShotHeader(); + if (ParallelDescriptor::IOProcessor()) writeSnapShotHeader(); } void diff --git a/Source/WarpXEvolve.cpp b/Source/WarpXEvolve.cpp index 6ce7db0ef..cda70dd53 100644 --- a/Source/WarpXEvolve.cpp +++ b/Source/WarpXEvolve.cpp @@ -260,7 +260,12 @@ WarpX::EvolveEM (int numsteps) // We might need to move j because we are going to make a plotfile. MoveWindow(move_j); - mypc->Redistribute(); // Redistribute particles + if (max_level == 0) { + mypc->RedistributeLocal(); + } + else { + mypc->Redistribute(); + } amrex::Print()<< "STEP " << step+1 << " ends." << " TIME = " << cur_time << " DT = " << dt[0] << "\n"; diff --git a/Source/WarpXIO.cpp b/Source/WarpXIO.cpp index b289fdde9..212d69a14 100644 --- a/Source/WarpXIO.cpp +++ b/Source/WarpXIO.cpp @@ -404,7 +404,7 @@ WarpX::GetCellCenteredData() { DistributionMap(lev), nc, ng) ); - Array<const MultiFab*> srcmf(BL_SPACEDIM); + Vector<const MultiFab*> srcmf(BL_SPACEDIM); int dcomp = 0; // first the electric field diff --git a/Source/WarpXInitData.cpp b/Source/WarpXInitData.cpp index df01afe88..0d6c35a4d 100644 --- a/Source/WarpXInitData.cpp +++ b/Source/WarpXInitData.cpp @@ -54,8 +54,12 @@ WarpX::InitDiagnostics () { const Real* current_hi = geom[0].ProbHi(); Real dt_boost = dt[0]; - myBFD.reset(new BoostedFrameDiagnostic(current_lo[moving_window_dir], - current_hi[moving_window_dir], + // Find the positions of the lab-frame box that corresponds to the boosted-frame box at t=0 + Real zmin_lab = current_lo[moving_window_dir]/( (1.+beta_boost)*gamma_boost ); + Real zmax_lab = current_hi[moving_window_dir]/( (1.+beta_boost)*gamma_boost ); + + myBFD.reset(new BoostedFrameDiagnostic(zmin_lab, + zmax_lab, moving_window_v, dt_snapshots_lab, num_snapshots_lab, gamma_boost, dt_boost, moving_window_dir)); diff --git a/Source/WarpXWrappers.cpp b/Source/WarpXWrappers.cpp index 54aeedf35..d106cfca7 100644 --- a/Source/WarpXWrappers.cpp +++ b/Source/WarpXWrappers.cpp @@ -192,6 +192,30 @@ extern "C" return getMultiFabLoVects(mf, return_size, ngrow); } + double** warpx_getEfieldCP(int lev, int direction, + int *return_size, int *ngrow, int **shapes) { + auto & mf = WarpX::GetInstance().getEfield_cp(lev, direction); + return getMultiFabPointers(mf, return_size, ngrow, shapes); + } + + int* warpx_getEfieldCPLoVects(int lev, int direction, + int *return_size, int *ngrow) { + auto & mf = WarpX::GetInstance().getEfield_cp(lev, direction); + return getMultiFabLoVects(mf, return_size, ngrow); + } + + double** warpx_getEfieldFP(int lev, int direction, + int *return_size, int *ngrow, int **shapes) { + auto & mf = WarpX::GetInstance().getEfield_fp(lev, direction); + return getMultiFabPointers(mf, return_size, ngrow, shapes); + } + + int* warpx_getEfieldFPLoVects(int lev, int direction, + int *return_size, int *ngrow) { + auto & mf = WarpX::GetInstance().getEfield_fp(lev, direction); + return getMultiFabLoVects(mf, return_size, ngrow); + } + double** warpx_getBfield(int lev, int direction, int *return_size, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getBfield(lev, direction); @@ -204,6 +228,30 @@ extern "C" return getMultiFabLoVects(mf, return_size, ngrow); } + double** warpx_getBfieldCP(int lev, int direction, + int *return_size, int *ngrow, int **shapes) { + auto & mf = WarpX::GetInstance().getBfield_cp(lev, direction); + return getMultiFabPointers(mf, return_size, ngrow, shapes); + } + + int* warpx_getBfieldCPLoVects(int lev, int direction, + int *return_size, int *ngrow) { + auto & mf = WarpX::GetInstance().getBfield_cp(lev, direction); + return getMultiFabLoVects(mf, return_size, ngrow); + } + + double** warpx_getBfieldFP(int lev, int direction, + int *return_size, int *ngrow, int **shapes) { + auto & mf = WarpX::GetInstance().getBfield_fp(lev, direction); + return getMultiFabPointers(mf, return_size, ngrow, shapes); + } + + int* warpx_getBfieldFPLoVects(int lev, int direction, + int *return_size, int *ngrow) { + auto & mf = WarpX::GetInstance().getBfield_fp(lev, direction); + return getMultiFabLoVects(mf, return_size, ngrow); + } + double** warpx_getCurrentDensity(int lev, int direction, int *return_size, int *ngrow, int **shapes) { auto & mf = WarpX::GetInstance().getcurrent(lev, direction); @@ -216,6 +264,30 @@ extern "C" return getMultiFabLoVects(mf, return_size, ngrow); } + double** warpx_getCurrentDensityCP(int lev, int direction, + int *return_size, int *ngrow, int **shapes) { + auto & mf = WarpX::GetInstance().getcurrent_cp(lev, direction); + return getMultiFabPointers(mf, return_size, ngrow, shapes); + } + + int* warpx_getCurrentDensityCPLoVects(int lev, int direction, + int *return_size, int *ngrow) { + auto & mf = WarpX::GetInstance().getcurrent_cp(lev, direction); + return getMultiFabLoVects(mf, return_size, ngrow); + } + + double** warpx_getCurrentDensityFP(int lev, int direction, + int *return_size, int *ngrow, int **shapes) { + auto & mf = WarpX::GetInstance().getcurrent_fp(lev, direction); + return getMultiFabPointers(mf, return_size, ngrow, shapes); + } + + int* warpx_getCurrentDensityFPLoVects(int lev, int direction, + int *return_size, int *ngrow) { + auto & mf = WarpX::GetInstance().getcurrent_fp(lev, direction); + return getMultiFabLoVects(mf, return_size, ngrow); + } + double** warpx_getParticleStructs(int speciesnumber, int* num_tiles, int** particles_per_tile) { auto & mypc = WarpX::GetInstance().GetPartContainer(); diff --git a/Tools/performance_tests/automated_test_1_uniform_rest_32ppc b/Tools/performance_tests/automated_test_1_uniform_rest_32ppc new file mode 100644 index 000000000..0f2f5e036 --- /dev/null +++ b/Tools/performance_tests/automated_test_1_uniform_rest_32ppc @@ -0,0 +1,58 @@ +# Maximum number of time steps +max_step = 100 + +# number of grid points +amr.n_cell = 128 128 128 + +amr.plot_int = -1 # How often to write plotfiles. + +# Maximum allowable size of each subdomain in the problem domain; +# this is used to decompose the domain for parallel calculations. +amr.max_grid_size = 32 + +# Maximum level in hierarchy (for now must be 0, i.e., one level in total) +amr.max_level = 0 + +# Geometry +geometry.coord_sys = 0 # 0: Cartesian +geometry.is_periodic = 1 1 1 # Is periodic? +geometry.prob_lo = -20.e-6 -20.e-6 -20.e-6 # physical domain +geometry.prob_hi = 20.e-6 20.e-6 20.e-6 + +# Verbosity +warpx.verbose = 1 + +# Algorithms +algo.current_deposition = 2 +algo.charge_deposition = 0 +algo.field_gathering = 0 +algo.particle_pusher = 0 +interpolation.nox = 1 +interpolation.noy = 1 +interpolation.noz = 1 + +# CFL +warpx.cfl = 1.0 + +particles.nspecies = 2 +particles.species_names = electrons ions + +electrons.charge = -q_e +electrons.mass = m_e +electrons.injection_style = "NUniformPerCell" +electrons.num_particles_per_cell_each_dim = 2 2 4 +electrons.profile = constant +electrons.density = 1.e20 # number of electrons per m^3 +electrons.momentum_distribution_type = "gaussian" +electrons.u_th = 0.01 # uth the std of the (unitless) momentum +electrons.uz_m = 0. # Mean momentum along z (unitless) + +ions.charge = q_e +ions.mass = m_p +ions.injection_style = "NUniformPerCell" +ions.num_particles_per_cell_each_dim = 2 2 4 +ions.profile = constant +ions.density = 1.e20 # number of electrons per m^3 +ions.momentum_distribution_type = "gaussian" +ions.u_th = 0.01 # uth the std of the (unitless) momentum +ions.uz_m = 0. # Mean momentum along z (unitless) diff --git a/Tools/performance_tests/automated_test_2_uniform_rest_1ppc b/Tools/performance_tests/automated_test_2_uniform_rest_1ppc new file mode 100644 index 000000000..603d29a6d --- /dev/null +++ b/Tools/performance_tests/automated_test_2_uniform_rest_1ppc @@ -0,0 +1,48 @@ +# Maximum number of time steps +max_step = 100 + +# number of grid points +amr.n_cell = 256 256 256 + +amr.plot_int = -1 # How often to write plotfiles. + +# Maximum allowable size of each subdomain in the problem domain; +# this is used to decompose the domain for parallel calculations. +amr.max_grid_size = 32 + +# Maximum level in hierarchy (for now must be 0, i.e., one level in total) +amr.max_level = 0 + +# Geometry +geometry.coord_sys = 0 # 0: Cartesian +geometry.is_periodic = 1 1 1 # Is periodic? +geometry.prob_lo = -20.e-6 -20.e-6 -20.e-6 # physical domain +geometry.prob_hi = 20.e-6 20.e-6 20.e-6 + +# Verbosity +warpx.verbose = 1 + +# Algorithms +algo.current_deposition = 0 +algo.charge_deposition = 0 +algo.field_gathering = 0 +algo.particle_pusher = 0 +interpolation.nox = 1 +interpolation.noy = 1 +interpolation.noz = 1 + +# CFL +warpx.cfl = 1.0 + +particles.nspecies = 1 +particles.species_names = electrons + +electrons.charge = -q_e +electrons.mass = m_e +electrons.injection_style = "NUniformPerCell" +electrons.num_particles_per_cell_each_dim = 1 1 1 +electrons.profile = constant +electrons.density = 1.e20 # number of electrons per m^3 +electrons.momentum_distribution_type = "gaussian" +electrons.u_th = 0.01 # uth the std of the (unitless) momentum +electrons.uz_m = 0. # Mean momentum along z (unitless) diff --git a/Tools/performance_tests/automated_test_3_uniform_drift_4ppc b/Tools/performance_tests/automated_test_3_uniform_drift_4ppc new file mode 100644 index 000000000..d8a257d96 --- /dev/null +++ b/Tools/performance_tests/automated_test_3_uniform_drift_4ppc @@ -0,0 +1,58 @@ +# Maximum number of time steps +max_step = 100 + +# number of grid points +amr.n_cell = 128 128 128 + +amr.plot_int = -1 # How often to write plotfiles. + +# Maximum allowable size of each subdomain in the problem domain; +# this is used to decompose the domain for parallel calculations. +amr.max_grid_size = 32 + +# Maximum level in hierarchy (for now must be 0, i.e., one level in total) +amr.max_level = 0 + +# Geometry +geometry.coord_sys = 0 # 0: Cartesian +geometry.is_periodic = 1 1 1 # Is periodic? +geometry.prob_lo = -20.e-6 -20.e-6 -20.e-6 # physical domain +geometry.prob_hi = 20.e-6 20.e-6 20.e-6 + +# Verbosity +warpx.verbose = 1 + +# Algorithms +algo.current_deposition = 2 +algo.charge_deposition = 0 +algo.field_gathering = 0 +algo.particle_pusher = 0 +interpolation.nox = 1 +interpolation.noy = 1 +interpolation.noz = 1 + +# CFL +warpx.cfl = 1.0 + +particles.nspecies = 2 +particles.species_names = electrons ions + +electrons.charge = -q_e +electrons.mass = m_e +electrons.injection_style = "NUniformPerCell" +electrons.num_particles_per_cell_each_dim = 2 1 1 +electrons.profile = constant +electrons.density = 1.e20 # number of electrons per m^3 +electrons.momentum_distribution_type = "gaussian" +electrons.u_th = 0.01 # uth the std of the (unitless) momentum +electrons.uz_m = 100. # Mean momentum along z (unitless) + +ions.charge = q_e +ions.mass = m_p +ions.injection_style = "NUniformPerCell" +ions.num_particles_per_cell_each_dim = 2 1 1 +ions.profile = constant +ions.density = 1.e20 # number of electrons per m^3 +ions.momentum_distribution_type = "gaussian" +ions.u_th = 0.01 # uth the std of the (unitless) momentum +ions.uz_m = 100. # Mean momentum along z (unitless) diff --git a/Tools/performance_tests/automated_test_4_labdiags_2ppc b/Tools/performance_tests/automated_test_4_labdiags_2ppc new file mode 100644 index 000000000..54512001c --- /dev/null +++ b/Tools/performance_tests/automated_test_4_labdiags_2ppc @@ -0,0 +1,100 @@ +# Maximum number of time steps +max_step = 100 + +# number of grid points +amr.n_cell = 64 64 512 + +# Maximum allowable size of each subdomain in the problem domain; +# this is used to decompose the domain for parallel calculations. + +amr.max_grid_size = 32 + +# Maximum level in hierarchy (for now must be 0, i.e., one level in total) +amr.max_level = 0 +amr.plot_int = 10 # How often to write plotfiles. "<= 0" means no plotfiles. +amr.check_int = 10 + +# Geometry +geometry.coord_sys = 0 # 0: Cartesian +geometry.is_periodic = 1 1 0 # Is periodic? +geometry.prob_lo = -150.e-6 -150.e-6 -0.6e-3 # physical domain +geometry.prob_hi = 150.e-6 150.e-6 0. + +# Verbosity +warpx.verbose = 1 + +# Algorithms +algo.current_deposition = 2 +algo.charge_deposition = 0 +algo.field_gathering = 0 +algo.particle_pusher = 0 + +# Numerics +interpolation.nox = 3 +interpolation.noy = 3 +interpolation.noz = 3 +warpx.use_filter = 1 +warpx.cfl = 1.0 +warpx.do_pml = 0 + +# Moving window +warpx.do_moving_window = 1 +warpx.moving_window_dir = z +warpx.moving_window_v = 1.0 # in units of the speed of light + +# Boosted frame +warpx.gamma_boost = 15. +warpx.boost_direction = z + +# Diagnostics +warpx.do_boosted_frame_diagnostic = 1 +warpx.num_snapshots_lab = 20 +warpx.dt_snapshots_lab = 7.0e-14 + +# Particle Injection +warpx.do_plasma_injection = 1 +warpx.num_injected_species = 2 +warpx.injected_plasma_species = 0 1 + +# Species +particles.nspecies = 2 +particles.species_names = electrons ions + +electrons.charge = -q_e +electrons.mass = m_e +electrons.injection_style = "NUniformPerCell" +electrons.xmin = -150.e-6 +electrons.xmax = 150.e-6 +electrons.ymin = -150.e-6 +electrons.ymax = 150.e-6 +electrons.zmin = 0.e-6 +electrons.num_particles_per_cell_each_dim = 1 1 1 +electrons.profile = constant +electrons.density = 1. +electrons.momentum_distribution_type = "constant" + +ions.charge = q_e +ions.mass = m_p +ions.injection_style = "NUniformPerCell" +ions.xmin = -150.e-6 +ions.xmax = 150.e-6 +ions.ymin = -150.e-6 +ions.ymax = 150.e-6 +ions.zmin = 0.e-6 +ions.num_particles_per_cell_each_dim = 1 1 1 +ions.profile = constant +ions.density = 1. +ions.momentum_distribution_type = "constant" + +# Laser +warpx.use_laser = 1 +laser.profile = Gaussian +laser.position = 0. 0. -1.e-6 # This point is on the laser plane +laser.direction = 0. 0. 1. # The plane normal direction +laser.polarization = 1. 0. 0. # The main polarization vector +laser.e_max = 8.e12 # Maximum amplitude of the laser field (in V/m) +laser.profile_waist = 5.e-5 # The waist of the laser (in meters) +laser.profile_duration = 16.7e-15 # The duration of the laser (in seconds) +laser.profile_t_peak = 33.4e-15 # The time at which the laser reaches its peak (in seconds) +laser.profile_focal_distance = 0.e-6 # Focal distance from the antenna (in meters) +laser.wavelength = 0.8e-6 # The wavelength of the laser (in meters) diff --git a/Tools/performance_tests/automated_test_5_loadimbalance b/Tools/performance_tests/automated_test_5_loadimbalance new file mode 100644 index 000000000..6546f6804 --- /dev/null +++ b/Tools/performance_tests/automated_test_5_loadimbalance @@ -0,0 +1,76 @@ +# Maximum number of time steps +max_step = 100 + +# number of grid points +amr.n_cell = 256 256 256 + +amr.plot_int = -1 # How often to write plotfiles. + +# Maximum allowable size of each subdomain in the problem domain; +# this is used to decompose the domain for parallel calculations. +amr.max_grid_size = 32 + +# Maximum level in hierarchy (for now must be 0, i.e., one level in total) +amr.max_level = 0 + +# Geometry +geometry.coord_sys = 0 # 0: Cartesian +geometry.is_periodic = 1 1 1 # Is periodic? +geometry.prob_lo = -20.e-6 -20.e-6 -20.e-6 # physical domain +geometry.prob_hi = 20.e-6 20.e-6 20.e-6 + +# Verbosity +warpx.verbose = 1 + +# Algorithms +algo.current_deposition = 2 +algo.charge_deposition = 0 +algo.field_gathering = 0 +algo.particle_pusher = 0 +interpolation.nox = 1 +interpolation.noy = 1 +interpolation.noz = 1 + +# CFL +warpx.cfl = 1.0 + +particles.nspecies = 2 +particles.species_names = electrons ions + +electrons.charge = -q_e +electrons.mass = m_e +electrons.injection_style = "gaussian_beam" +electrons.x_rms = 2.e-6 +electrons.y_rms = 2.e-6 +electrons.z_rms = 5.e-6 +electrons.x_m = 0. +electrons.y_m = 0. +electrons.z_m = 0. +electrons.npart = 500000 +electrons.q_tot = -1.602e-9 +electrons.profile = "constant" +electrons.density = 1.e25 +electrons.momentum_distribution_type = "gaussian" +electrons.ux_m = 0.0 +electrons.uy_m = 0.0 +electrons.uz_m = 0.0 +electrons.u_th = 0.01 + +ions.charge = q_e +ions.mass = m_p +ions.injection_style = "gaussian_beam" +ions.x_rms = 2.e-6 +ions.y_rms = 2.e-6 +ions.z_rms = 5.e-6 +ions.x_m = 0. +ions.y_m = 0. +ions.z_m = 0. +ions.npart = 500000 +ions.q_tot = -1.602e-9 +ions.profile = "constant" +ions.density = 1.e25 +ions.momentum_distribution_type = "gaussian" +ions.ux_m = 0.0 +ions.uy_m = 0.0 +ions.uz_m = 0.0 +ions.u_th = 0.01 diff --git a/Tools/performance_tests/automated_test_6_output_2ppc b/Tools/performance_tests/automated_test_6_output_2ppc new file mode 100644 index 000000000..a1c4172fe --- /dev/null +++ b/Tools/performance_tests/automated_test_6_output_2ppc @@ -0,0 +1,58 @@ +# Maximum number of time steps +max_step = 10 + +# number of grid points +amr.n_cell = 128 128 128 + +amr.plot_int = 2 # How often to write plotfiles. + +# Maximum allowable size of each subdomain in the problem domain; +# this is used to decompose the domain for parallel calculations. +amr.max_grid_size = 32 + +# Maximum level in hierarchy (for now must be 0, i.e., one level in total) +amr.max_level = 0 + +# Geometry +geometry.coord_sys = 0 # 0: Cartesian +geometry.is_periodic = 1 1 1 # Is periodic? +geometry.prob_lo = -20.e-6 -20.e-6 -20.e-6 # physical domain +geometry.prob_hi = 20.e-6 20.e-6 20.e-6 + +# Verbosity +warpx.verbose = 1 + +# Algorithms +algo.current_deposition = 2 +algo.charge_deposition = 0 +algo.field_gathering = 0 +algo.particle_pusher = 0 +interpolation.nox = 1 +interpolation.noy = 1 +interpolation.noz = 1 + +# CFL +warpx.cfl = 1.0 + +particles.nspecies = 2 +particles.species_names = electrons ions + +electrons.charge = -q_e +electrons.mass = m_e +electrons.injection_style = "NUniformPerCell" +electrons.num_particles_per_cell_each_dim = 1 1 1 +electrons.profile = constant +electrons.density = 1.e20 # number of electrons per m^3 +electrons.momentum_distribution_type = "gaussian" +electrons.u_th = 0.01 # uth the std of the (unitless) momentum +electrons.uz_m = 0. # Mean momentum along z (unitless) + +ions.charge = q_e +ions.mass = m_p +ions.injection_style = "NUniformPerCell" +ions.num_particles_per_cell_each_dim = 1 1 1 +ions.profile = constant +ions.density = 1.e20 # number of electrons per m^3 +ions.momentum_distribution_type = "gaussian" +ions.u_th = 0.01 # uth the std of the (unitless) momentum +ions.uz_m = 0. # Mean momentum along z (unitless) diff --git a/Tools/performance_tests/functions_perftest.py b/Tools/performance_tests/functions_perftest.py new file mode 100644 index 000000000..2085367c7 --- /dev/null +++ b/Tools/performance_tests/functions_perftest.py @@ -0,0 +1,189 @@ +import os, shutil, re + +def run_batch_nnode(test_list, res_dir, bin_name, config_command, architecture='knl', Cname='knl', n_node=1): + # Clean res_dir + if os.path.exists(res_dir): + shutil.rmtree(res_dir) + os.makedirs(res_dir) + # Copy files to res_dir + cwd = os.environ['WARPX'] + '/Tools/performance_tests/' + bin_dir = cwd + 'Bin/' + shutil.copy(bin_dir + bin_name, res_dir) + os.chdir(res_dir) + # Calculate simulation time. Take 10 min + 10 min / simulation + job_time_min = 5. + len(test_list)*5. + job_time_str = str(int(job_time_min/60)) + ':' + str(int(job_time_min%60)) + ':00' + batch_string = '' + batch_string += '#!/bin/bash\n' + batch_string += '#SBATCH --job-name=' + test_list[0][0] + '\n' + batch_string += '#SBATCH --time=' + job_time_str + '\n' + batch_string += '#SBATCH -C ' + Cname + '\n' + batch_string += '#SBATCH -N ' + str(n_node) + '\n' + batch_string += '#SBATCH -q regular\n' + batch_string += '#SBATCH -e error.txt\n' + batch_string += '#SBATCH --account=m2852\n' + + for count, test_item in enumerate(test_list): + # test_item reads [input_file, int n_mpi, int n_omp] + input_file = test_item[0]; + shutil.copy(cwd + input_file, res_dir) + # test_item[1] is not read since it contain the number of node, which is an + # global parameter. However, we keep it for compatibility with run_alltests.py + n_mpi = test_item[2] + n_omp = test_item[3] + srun_string = '' + srun_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n' + # number of logical cores per MPI process + if architecture == 'cpu': + cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives + elif architecture == 'knl': + cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives + output_filename = 'out_' + '_'.join([input_file, str(n_node), str(n_mpi), str(n_omp), str(count)]) + '.txt' + srun_string += 'srun --cpu_bind=cores '+ \ + ' -n ' + str(n_node*n_mpi) + \ + ' -c ' + str(cflag_value) + \ + ' ./' + bin_name + \ + ' ' + input_file + \ + ' > ' + output_filename + '\n' + batch_string += srun_string + batch_string += 'rm -rf plt*\n' + batch_string += 'rm -rf chk*\n' + batch_string += 'rm -rf lab_frame_data\n' + batch_file = 'slurm' + f_exe = open(batch_file,'w') + f_exe.write(batch_string) + f_exe.close() + os.system('chmod 700 ' + bin_name) + os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt') + return 0 + +def run_batch(run_name, res_dir, bin_name, config_command, architecture='knl',\ + Cname='knl', n_node=1, n_mpi=1, n_omp=1): + # Clean res_dir + if os.path.exists(res_dir): + shutil.rmtree(res_dir) + os.makedirs(res_dir) + # Copy files to res_dir + # Copy files to res_dir + cwd = os.environ['WARPX'] + '/Tools/performance_tests/' + bin_dir = cwd + 'Bin/' + shutil.copy(bin_dir + bin_name, res_dir) + shutil.copyfile(cwd + run_name, res_dir + 'inputs') + os.chdir(res_dir) + batch_string = '' + batch_string += '#!/bin/bash\n' + batch_string += '#SBATCH --job-name=' + run_name + str(n_node) + str(n_mpi) + str(n_omp) + '\n' + batch_string += '#SBATCH --time=00:20:00\n' + batch_string += '#SBATCH -C ' + Cname + '\n' + batch_string += '#SBATCH -N ' + str(n_node) + '\n' + batch_string += '#SBATCH -q regular\n' + batch_string += '#SBATCH -e error.txt\n' + batch_string += '#SBATCH --account=m2852\n' + batch_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n' + if architecture == 'cpu': + cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives + batch_string += 'srun --cpu_bind=cores '+ \ + ' -n ' + str(n_node*n_mpi) + \ + ' -c ' + str(cflag_value) + \ + ' ./' + bin_name + ' inputs > perf_output.txt' + elif architecture == 'knl': + # number of logical cores per MPI process + cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives + batch_string += 'srun --cpu_bind=cores ' + \ + ' -n ' + str(n_node*n_mpi) + \ + ' -c ' + str(cflag_value) + \ + ' ./' + bin_name + ' inputs > perf_output.txt\n' + batch_file = 'slurm' + f_exe = open(batch_file,'w') + f_exe.write(batch_string) + f_exe.close() + os.system('chmod 700 ' + bin_name) + os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt') + return 0 + +# Read output file and return init time and 1-step time +def read_run_perf(filename, n_steps): + timing_list = [] + # Search inclusive time to get simulation step time + partition_limit = 'NCalls Incl. Min Incl. Avg Incl. Max Max %' + with open(filename) as file_handler: + output_text = file_handler.read() + # Get total simulation time + line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text) + total_time = float(line_match_totaltime.group(0).split()[8]) + search_area = output_text.partition(partition_limit)[2] + line_match_looptime = re.search('\nWarpX::Evolve().*', search_area) + time_wo_initialization = float(line_match_looptime.group(0).split()[3]) + timing_list += [str(total_time - time_wo_initialization)] + timing_list += [str(time_wo_initialization/n_steps)] + partition_limit1 = 'NCalls Excl. Min Excl. Avg Excl. Max Max %' + partition_limit2 = 'NCalls Incl. Min Incl. Avg Incl. Max Max %' + file_handler.close() + with open(filename) as file_handler: + output_text = file_handler.read() + # Search EXCLISUSIVE routine timings + search_area = output_text.partition(partition_limit1)[2].partition(partition_limit2)[0] + pattern_list = ['\nParticleContainer::Redistribute().*',\ + '\nFabArray::FillBoundary().*',\ + '\nFabArray::ParallelCopy().*',\ + '\nPICSAR::CurrentDeposition.*',\ + '\nPICSAR::FieldGather.*',\ + '\nPICSAR::ParticlePush.*',\ + '\nPPC::Evolve::Copy.*',\ + '\nWarpX::EvolveEM().*',\ + 'Checkpoint().*',\ + 'WriteParticles().*',\ + '\nVisMF::Write(FabArray).*',\ + '\nWriteMultiLevelPlotfile().*',\ + '\nParticleContainer::RedistributeMPI().*'] + for pattern in pattern_list: + timing = '0' + line_match = re.search(pattern, search_area) + if line_match is not None: + timing = [str(float(line_match.group(0).split()[3])/n_steps)] + timing_list += timing + return timing_list + +# Write time into logfile +def write_perf_logfile(log_file, log_line): + f_log = open(log_file, 'a') + f_log.write(log_line) + f_log.close() + return 0 + +def get_nsteps(run_name): + with open(run_name) as file_handler: + run_name_text = file_handler.read() + line_match_nsteps = re.search('\nmax_step.*', run_name_text) + nsteps = float(line_match_nsteps.group(0).split()[2]) + return nsteps + + +# Run a performance test in an interactive allocation +# def run_interactive(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1): +# # Clean res_dir # +# if os.path.exists(res_dir): +# shutil.rmtree(res_dir) +# os.makedirs(res_dir) +# # Copy files to res_dir # +# shutil.copyfile(bin_dir + bin_name, res_dir + bin_name) +# shutil.copyfile(cwd + run_name, res_dir + 'inputs') +# os.chdir(res_dir) +# if args.architecture == 'cpu': +# cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives # +# exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\ +# 'srun --cpu_bind=cores ' + \ +# ' -n ' + str(n_node*n_mpi) + \ +# ' -c ' + str(cflag_value) + \ +# ' ./' + bin_name + ' inputs > perf_output.txt' +# elif args.architecture == 'knl': +# # number of logical cores per MPI process # +# cflag_value = max(1,int(68/n_mpi) * 4) # Follow NERSC directives # +# exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\ +# 'srun --cpu_bind=cores ' + \ +# ' -n ' + str(n_node*n_mpi) + \ +# ' -c ' + str(cflag_value) + \ +# ' ./' + bin_name + ' inputs > perf_output.txt' +# os.system('chmod 700 ' + bin_name) +# os.system(config_command + exec_command) +# return 0 diff --git a/Tools/performance_tests/performance_log.txt b/Tools/performance_tests/performance_log.txt index cb38025d9..543d257a0 100644 --- a/Tools/performance_tests/performance_log.txt +++ b/Tools/performance_tests/performance_log.txt @@ -1,37 +1,33 @@ -## year month day run_name compiler architecture n_node n_mpi n_omp time_initialization(s) time_one_iteration(s) -2017 10 13 uniform_plasma intel knl 1 1 1 1.88 0.8257 -2017 10 13 uniform_plasma intel knl 1 1 1 1.87 0.8229 -2017 10 13 uniform_plasma intel knl 1 1 1 1.87 0.8244 -2017 10 13 uniform_plasma intel knl 1 1 2 1.87 0.4372 -2017 10 13 uniform_plasma intel knl 1 1 2 1.89 0.4374 -2017 10 13 uniform_plasma intel knl 1 1 2 1.9 0.4366 -2017 10 13 uniform_plasma intel knl 2 2 1 0.54 0.2152 -2017 10 13 uniform_plasma intel knl 2 2 1 0.6 0.2163 -2017 10 13 uniform_plasma intel knl 2 2 1 0.6 0.217 -2017 10 13 uniform_plasma gnu knl 1 1 1 1.17 0.8062 -2017 10 13 uniform_plasma gnu knl 1 1 1 1.39 0.8062 -2017 10 13 uniform_plasma gnu knl 1 1 1 1.4 0.8067 -2017 10 13 uniform_plasma gnu knl 1 1 2 1.07 0.4271 -2017 10 13 uniform_plasma gnu knl 1 1 2 1.02 0.4249 -2017 10 13 uniform_plasma gnu knl 1 1 2 1.06 0.425 -2017 10 13 uniform_plasma gnu knl 2 2 1 0.75 0.2119 -2017 10 13 uniform_plasma gnu knl 2 2 1 0.86 0.2111 -2017 10 13 uniform_plasma gnu knl 2 2 1 0.71 0.2121 -2017 10 13 uniform_plasma gnu cpu 1 1 1 0.66 0.1667 -2017 10 13 uniform_plasma gnu cpu 1 1 1 0.71 0.1653 -2017 10 13 uniform_plasma gnu cpu 1 1 1 0.8 0.1667 -2017 10 13 uniform_plasma gnu cpu 1 1 2 0.579 0.09701 -2017 10 13 uniform_plasma gnu cpu 1 1 2 0.629 0.09651 -2017 10 13 uniform_plasma gnu cpu 1 1 2 0.892 0.09718 -2017 10 13 uniform_plasma gnu cpu 2 2 1 0.466 0.04317 -2017 10 13 uniform_plasma gnu cpu 2 2 1 0.535 0.04414 -2017 10 13 uniform_plasma gnu cpu 2 2 1 0.542 0.04404 -2017 10 15 uniform_plasma gnu knl 1 8 16 0.94 0.1971 -2017 10 15 uniform_plasma gnu knl 1 8 16 0.76 0.1795 -2017 10 15 uniform_plasma gnu knl 1 8 16 1.07 0.1799 -2017 10 15 uniform_plasma gnu knl 1 4 32 1.17 0.2019 -2017 10 15 uniform_plasma gnu knl 1 4 32 1.09 0.2055 -2017 10 15 uniform_plasma gnu knl 1 4 32 1.4 0.1926 -2017 10 15 uniform_plasma gnu knl 2 4 32 0.97 0.1313 -2017 10 15 uniform_plasma gnu knl 2 4 32 1.05 0.1402 -2017 10 15 uniform_plasma gnu knl 2 4 32 1.07 0.1429 +## year month day run_name compiler architecture n_node n_mpi n_omp time_initialization time_one_iteration Redistribute FillBoundary ParallelCopy CurrentDeposition FieldGather ParthiclePush Copy EvolveEM Checkpoint WriteParticles Write_FabArray WriteMultiLevelPlotfile(unit: second) RedistributeMPI +2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.14 0.3986 0.1713 0.01719 0.01615 0.06987 0.03636 0.01901 0.01999 0.003602 0 0 0 0 0.007262 +2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.39 0.4009 0.1712 0.01676 0.01583 0.07061 0.03684 0.01926 0.02011 0.003687 0 0 0 0 0.007841 +2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 2.91 0.4024 0.1716 0.01826 0.01918 0.0703 0.0363 0.01912 0.01989 0.003017 0 0 0 0 0.007256 +2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.21 0.3997 0.1717 0.01706 0.0162 0.07026 0.03655 0.01928 0.01999 0.003687 0 0 0 0 0.006799 +2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 0.89 0.4779 0.04441 0.1143 0.09117 0.1072 0.01254 0.003702 0.004217 0.01247 0 0 0 0 0.003441 +2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.58 0.4626 0.04424 0.1048 0.0851 0.1073 0.01259 0.003767 0.004282 0.01311 0 0 0 0 0.002798 +2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.63 0.4616 0.04441 0.1033 0.08398 0.1079 0.01312 0.003802 0.004224 0.01278 0 0 0 0 0.003188 +2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.72 0.461 0.04419 0.1038 0.08424 0.1074 0.01257 0.003799 0.0043 0.01318 0 0 0 0 0.002816 +2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.32 0.3986 0.1712 0.01804 0.01697 0.06999 0.03615 0.01842 0.01896 0.003445 0 0 0 0 0.00738 +2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 3.17 0.3974 0.1711 0.01722 0.01587 0.07016 0.03642 0.01844 0.01902 0.003431 0 0 0 0 0.007332 +2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 2.88 0.3946 0.1709 0.01686 0.01562 0.06972 0.03595 0.01848 0.01916 0.003269 0 0 0 0 0.006887 +2018 01 31 automated_test_1_uniform_rest_32ppc intel knl 1 16 8 2.95 0.4094 0.1708 0.01761 0.01632 0.07001 0.03651 0.01863 0.01906 0.003314 0 0 0 0 0.01898 +2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.3 0.4787 0.04447 0.1139 0.09124 0.108 0.01287 0.003811 0.004205 0.01249 0 0 0 0 0.003045 +2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 3.16 0.4578 0.04412 0.1015 0.08339 0.1078 0.01301 0.003919 0.004182 0.0125 0 0 0 0 0.002701 +2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 2.78 0.4679 0.04418 0.1035 0.08456 0.1079 0.01303 0.003902 0.004214 0.0127 0 0 0 0 0.009118 +2018 01 31 automated_test_2_uniform_rest_1ppc intel knl 1 16 8 1.12 0.4613 0.04425 0.1043 0.08517 0.1073 0.01242 0.003797 0.004221 0.01239 0 0 0 0 0.003665 +2018 01 31 automated_test_3_uniform_drift_4ppc intel knl 1 16 8 0.48 0.1237 0.03056 0.01622 0.01468 0.02039 0.005016 0.003737 0.002632 0.00326 0 0 0 0 0.006871 +2018 01 31 automated_test_3_uniform_drift_4ppc intel knl 1 16 8 0.79 0.1287 0.0308 0.01706 0.01715 0.02042 0.005452 0.003636 0.002797 0.003143 0 0 0 0 0.007324 +2018 01 31 automated_test_3_uniform_drift_4ppc intel knl 1 16 8 0.9 0.1296 0.03084 0.01711 0.01731 0.02053 0.005379 0.003641 0.002843 0.003137 0 0 0 0 0.008151 +2018 01 31 automated_test_3_uniform_drift_4ppc intel knl 1 16 8 0.9 0.1323 0.03081 0.01703 0.01736 0.02065 0.005339 0.003638 0.002751 0.004008 0 0 0 0 0.01015 +2018 01 31 automated_test_4_labdiags_2ppc intel knl 1 16 8 0.85 0.2896 0.03832 0.06449 0.07493 0.003507 0.002987 0.0001515 0.0001762 0.007921 0.0371 0.001537 0 0.0004387 0.03832 +2018 01 31 automated_test_4_labdiags_2ppc intel knl 1 16 8 1.12 0.2895 0.03845 0.06423 0.07481 0.003489 0.002994 0.000152 0.0001779 0.00834 0.0357 0.001545 0 0.0005249 0.03845 +2018 01 31 automated_test_4_labdiags_2ppc intel knl 1 16 8 0.76 0.3243 0.03804 0.0646 0.07462 0.003483 0.002991 0.0001508 0.0001769 0.008051 0.05983 0.001565 0 0.005392 0.03804 +2018 01 31 automated_test_4_labdiags_2ppc intel knl 1 16 8 0.74 0.3143 0.03941 0.06478 0.07547 0.003486 0.003007 0.0001518 0.0001808 0.007845 0.05079 0.001543 0 0.0007033 0.03941 +2018 01 31 automated_test_5_loadimbalance intel knl 1 16 8 9.2 0.3845 0.08558 0.1042 0.1332 0 0 0 0 0.01226 0 0 0 0 0.08558 +2018 01 31 automated_test_5_loadimbalance intel knl 1 16 8 9.19 0.3864 0.085 0.1051 0.134 0 0 0 0 0.01202 0 0 0 0 0.085 +2018 01 31 automated_test_5_loadimbalance intel knl 1 16 8 8.98 0.3912 0.08665 0.1061 0.1356 0 0 0 0 0.01193 0 0 0 0 0.08665 +2018 01 31 automated_test_5_loadimbalance intel knl 1 16 8 9.03 0.3826 0.08484 0.1031 0.1329 0 0 0 0 0.01205 0 0 0 0 0.08484 +2018 01 31 automated_test_6_output_2ppc intel knl 1 16 8 3.6 1.086 0.0898 0.1311 0.09441 0.1345 0.027 0.008783 0.009792 0.02151 0.08454 0.04962 0 0.0008218 0.005303 +2018 01 31 automated_test_6_output_2ppc intel knl 1 16 8 4.7 1.136 0.09059 0.1437 0.09535 0.1358 0.02915 0.009238 0.01002 0.02315 0.09088 0.05006 0 0.01081 0.005381 +2018 01 31 automated_test_6_output_2ppc intel knl 1 16 8 4.0 1.132 0.09145 0.1377 0.09592 0.1365 0.02817 0.009353 0.0103 0.02447 0.066 0.05309 0 0.02047 0.009196 +2018 01 31 automated_test_6_output_2ppc intel knl 1 16 8 3.8 1.135 0.09088 0.1308 0.09623 0.135 0.02762 0.008839 0.009758 0.02561 0.1144 0.04874 0 0.0008693 0.008112 diff --git a/Tools/performance_tests/run_alltests.py b/Tools/performance_tests/run_alltests.py index 7c02481fb..440da363d 100644 --- a/Tools/performance_tests/run_alltests.py +++ b/Tools/performance_tests/run_alltests.py @@ -1,5 +1,6 @@ import os, sys, shutil import argparse, re, time +from functions_perftest import * # This script runs automated performance tests for WarpX. # It runs tests in list test_list defined below, and write @@ -9,7 +10,7 @@ import argparse, re, time # Before running performance tests, make sure you have the latest version # of performance_log.txt # A typical execution reads: -# > python run_alltests.py --no-recompile --compiler=gnu --architecture=cpu --mode=run --no-commit --log_file='my_performance_log.txt' +# > python run_alltests.py --no-recompile --compiler=gnu --architecture=cpu --mode=run --log_file='my_performance_log.txt' # These are default values, and will give the same result as # > python run_alltests.py # To add a new test item, extent the test_list with a line like @@ -33,9 +34,53 @@ import argparse, re, time # write data into the performance log file # push file performance_log.txt on the repo +# Define the list of tests to run +# ------------------------------- +# each element of test_list contains +# [str runname, int n_node, int n_mpi PER NODE, int n_omp] +test_list = [] +n_repeat = 3 +basename1 = 'uniform_t0.01_' + +test_list.extend([[basename1 + '128', 1, 16, 8]]*n_repeat) +test_list.extend([[basename1 + '128', 1, 32, 16]]*n_repeat) + +# test_list.extend([[basename1 + '128', 1, 16, 8]]*n_repeat) +# test_list.extend([[basename1 + '256', 8, 16, 8]]*n_repeat) +# test_list.extend([[basename1 + '512', 64, 16, 8]]*n_repeat) +# test_list.extend([[basename1 + '1024', 512, 16, 8]]*n_repeat) +# test_list.extend([[basename1 + '2048', 4096, 16, 8]]*n_repeat) + + +# test_list.extend([['uniform_t0.01_direct1_1ppc_128', 1, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_direct3_1ppc_128', 1, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk1_1ppc_128', 1, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk3_1ppc_128', 1, 16, 8]]*n_repeat) + +# test_list.extend([['uniform_t0.01_direct1_1ppc_256', 8, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_direct3_1ppc_256', 8, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk1_1ppc_256', 8, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk3_1ppc_256', 8, 16, 8]]*n_repeat) + +# test_list.extend([['uniform_t0.01_direct1_1ppc_512', 64, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_direct3_1ppc_512', 64, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk1_1ppc_512', 64, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk3_1ppc_512', 64, 16, 8]]*n_repeat) + +# test_list.extend([['uniform_t0.01_direct1_1ppc_1024', 512, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_direct3_1ppc_1024', 512, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk1_1ppc_1024', 512, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk3_1ppc_1024', 512, 16, 8]]*n_repeat) + +# test_list.extend([['uniform_t0.01_direct1_1ppc_2048', 4096, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_direct3_1ppc_2048', 4096, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk1_1ppc_2048', 4096, 16, 8]]*n_repeat) +# test_list.extend([['uniform_t0.01_esirk3_1ppc_2048', 4096, 16, 8]]*n_repeat) + +n_tests = len(test_list) + # Read command-line arguments # --------------------------- - # Create parser and read arguments parser = argparse.ArgumentParser( description='Run performance tests and write results in files') @@ -115,136 +160,6 @@ if args.recompile == True: # Define functions to run a test and analyse results # -------------------------------------------------- - -# Run a performance test in an interactive allocation -def run_interactive(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1): - # Clean res_dir - if os.path.exists(res_dir): - shutil.rmtree(res_dir) - os.makedirs(res_dir) - # Copy files to res_dir - shutil.copyfile(bin_dir + bin_name, res_dir + bin_name) - shutil.copyfile(cwd + run_name, res_dir + 'inputs') - os.chdir(res_dir) - if args.architecture == 'cpu': - cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives - exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\ - 'srun --cpu_bind=cores ' + \ - ' -n ' + str(n_node*n_mpi) + \ - ' -c ' + str(cflag_value) + \ - ' ./' + bin_name + ' inputs > perf_output.txt' - elif args.architecture == 'knl': - # number of logical cores per MPI process - cflag_value = max(1,int(68/n_mpi) * 4) # Follow NERSC directives - exec_command = 'export OMP_NUM_THREADS=' + str(n_omp) + ';' +\ - 'srun --cpu_bind=cores ' + \ - ' -n ' + str(n_node*n_mpi) + \ - ' -c ' + str(cflag_value) + \ - ' ./' + bin_name + ' inputs > perf_output.txt' - os.system('chmod 700 ' + bin_name) - os.system(config_command + exec_command) - return 0 - -def run_batch(run_name, res_dir, n_node=1, n_mpi=1, n_omp=1): - # Clean res_dir - if os.path.exists(res_dir): - shutil.rmtree(res_dir) - os.makedirs(res_dir) - # Copy files to res_dir - shutil.copyfile(bin_dir + bin_name, res_dir + bin_name) - shutil.copyfile(cwd + run_name, res_dir + 'inputs') - os.chdir(res_dir) - batch_string = '' - batch_string += '#!/bin/bash\n' - batch_string += '#SBATCH --job-name=' + run_name + str(n_node) + str(n_mpi) + str(n_omp) + '\n' - batch_string += '#SBATCH --time=00:30:00\n' - batch_string += '#SBATCH -C ' + module_Cname[args.architecture] + '\n' - batch_string += '#SBATCH -N ' + str(n_node) + '\n' - batch_string += '#SBATCH --partition=regular\n' - batch_string += '#SBATCH --qos=normal\n' - batch_string += '#SBATCH -e error.txt\n' - batch_string += '#SBATCH --account=m2852\n' - batch_string += 'export OMP_NUM_THREADS=' + str(n_omp) + '\n' - if args.architecture == 'cpu': - cflag_value = max(1, int(32/n_mpi) * 2) # Follow NERSC directives - batch_string += 'srun --cpu_bind=cores '+ \ - ' -n ' + str(n_node*n_mpi) + \ - ' -c ' + str(cflag_value) + \ - ' ./' + bin_name + ' inputs > perf_output.txt' - elif args.architecture == 'knl': - # number of logical cores per MPI process - cflag_value = max(1, int(64/n_mpi) * 4) # Follow NERSC directives - batch_string += 'srun --cpu_bind=cores ' + \ - ' -n ' + str(n_node*n_mpi) + \ - ' -c ' + str(cflag_value) + \ - ' ./' + bin_name + ' inputs > perf_output.txt\n' - batch_file = 'slurm' - f_exe = open(batch_file,'w') - f_exe.write(batch_string) - f_exe.close() - os.system('chmod 700 ' + bin_name) - os.system(config_command + 'sbatch ' + batch_file + ' >> ' + cwd + 'log_jobids_tmp.txt') - return 0 - -# Read output file and return init time and 1-step time -def read_run_perf(filename): - timing_list = [] - # Search inclusive time to get simulation step time - partition_limit = 'NCalls Incl. Min Incl. Avg Incl. Max Max %' - with open(filename) as file_handler: - output_text = file_handler.read() - # Get total simulation time - line_match_totaltime = re.search('TinyProfiler total time across processes.*', output_text) - total_time = float(line_match_totaltime.group(0).split()[8]) - search_area = output_text.partition(partition_limit)[2] - line_match_looptime = re.search('\nWarpX::Evolve().*', search_area) - time_wo_initialization = float(line_match_looptime.group(0).split()[3]) - timing_list += [str(total_time - time_wo_initialization)] - timing_list += [str(time_wo_initialization/n_steps)] - # Search exclusive time to get routines timing - partition_limit1 = 'NCalls Excl. Min Excl. Avg Excl. Max Max %' - partition_limit2 = 'NCalls Incl. Min Incl. Avg Incl. Max Max %' - file_handler.close() - with open(filename) as file_handler: - output_text = file_handler.read() - search_area = output_text.partition(partition_limit1)[2].partition(partition_limit2)[0] - pattern_list = ['\nParticleContainer::Redistribute().*',\ - '\nFabArray::FillBoundary().*',\ - '\nFabArray::ParallelCopy().*',\ - '\nPICSAR::CurrentDeposition.*',\ - '\nPICSAR::FieldGather.*',\ - '\nPICSAR::ParticlePush.*',\ - '\nPPC::Evolve::Copy.*',\ - '\nWarpX::EvolveEM().*',\ - 'NArrayInt>::Checkpoint().*',\ - 'NArrayInt>::WriteParticles().*',\ - '\nVisMF::Write_FabArray.*',\ - '\nWriteMultiLevelPlotfile().*'] - for pattern in pattern_list: - timing = '0' - line_match = re.search(pattern, search_area) - if line_match is not None: - timing = [str(float(line_match.group(0).split()[3])/n_steps)] - timing_list += timing - return timing_list - -# Write time into logfile -def write_perf_logfile(log_file): - log_line = ' '.join([year, month, day, run_name, args.compiler,\ - args.architecture, str(n_node), str(n_mpi),\ - str(n_omp)] + timing_list + ['\n']) - f_log = open(log_file, 'a') - f_log.write(log_line) - f_log.close() - return 0 - -def get_nsteps(runname): - with open(runname) as file_handler: - runname_text = file_handler.read() - line_match_nsteps = re.search('\nmax_step.*', runname_text) - nsteps = float(line_match_nsteps.group(0).split()[2]) - return nsteps - def process_analysis(): dependencies = '' f_log = open(cwd + 'log_jobids_tmp.txt','r') @@ -254,18 +169,19 @@ def process_analysis(): dependencies += line.split()[3] + ':' batch_string = '' batch_string += '#!/bin/bash\n' - batch_string += '#SBATCH --job-name=perftests_read\n' + batch_string += '#SBATCH --job-name=warpx_read\n' batch_string += '#SBATCH --time=00:05:00\n' batch_string += '#SBATCH -C ' + module_Cname[args.architecture] + '\n' batch_string += '#SBATCH -N 1\n' batch_string += '#SBATCH -S 4\n' - batch_string += '#SBATCH --partition=regular\n' - batch_string += '#SBATCH --qos=normal\n' + batch_string += '#SBATCH -q regular\n' batch_string += '#SBATCH -e read_error.txt\n' batch_string += '#SBATCH -o read_output.txt\n' batch_string += '#SBATCH --mail-type=end\n' batch_string += '#SBATCH --account=m2852\n' - batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + args.compiler + ' --architecture=' + args.architecture + ' --mode=read' + ' --log_file=' + log_file + batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + \ + args.compiler + ' --architecture=' + args.architecture + \ + ' --mode=read' + ' --log_file=' + log_file if args.commit == True: batch_string += ' --commit' batch_string += '\n' @@ -279,20 +195,6 @@ def process_analysis(): # Loop over the tests and return run time + details # ------------------------------------------------- - -# each element of test_list contains -# [str runname, int n_node, int n_mpi PER NODE, int n_omp] - -test_list = [] -n_repeat = 1 -filename1 = 'uniform_plasma' - -test_list.extend([[filename1, 1, 8, 16]]*3) -test_list.extend([[filename1, 1, 4, 32]]*3) -test_list.extend([[filename1, 2, 4, 32]]*3) - -n_tests = len(test_list) - if args.mode == 'run': # Remove file log_jobids_tmp.txt if exists. # This file contains the jobid of every perf test @@ -308,13 +210,14 @@ if args.mode == 'run': n_omp = current_run[3] n_steps = get_nsteps(cwd + run_name) res_dir = res_dir_base - res_dir += '_'.join([year, month, day, run_name, args.compiler,\ + res_dir += '_'.join([run_name, args.compiler,\ args.architecture, str(n_node), str(n_mpi),\ - str(n_omp)]) + '/' + str(n_omp), str(count)]) + '/' # Run the simulation. # If you are currently in an interactive session and want to run interactive, # just replace run_batch with run_interactive - run_batch(run_name, res_dir, n_node=n_node, n_mpi=n_mpi, n_omp=n_omp) + run_batch(run_name, res_dir, bin_name, config_command, architecture=args.architecture, \ + Cname=module_Cname[args.architecture], n_node=n_node, n_mpi=n_mpi, n_omp=n_omp) os.chdir(cwd) process_analysis() @@ -326,7 +229,8 @@ if args.mode == 'read': 'FillBoundary ParallelCopy CurrentDeposition FieldGather '+\ 'ParthiclePush Copy EvolveEM Checkpoint '+\ 'WriteParticles Write_FabArray '+\ - 'WriteMultiLevelPlotfile(unit: second)\n' + 'WriteMultiLevelPlotfile '+\ + 'RedistributeMPI(unit: second)\n' f_log = open(log_dir + log_file, 'a') f_log.write(log_line) f_log.close() @@ -340,13 +244,20 @@ if args.mode == 'read': n_steps = get_nsteps(cwd + run_name) print('n_steps = ' + str(n_steps)) res_dir = res_dir_base - res_dir += '_'.join([year, month, day, run_name, args.compiler,\ + res_dir += '_'.join([run_name, args.compiler,\ args.architecture, str(n_node), str(n_mpi),\ - str(n_omp)]) + '/' - # Read performance data from the output file - timing_list = read_run_perf(res_dir + 'perf_output.txt') + str(n_omp), str(count)]) + '/' +# res_dir += '_'.join([year, month, '25', run_name, args.compiler,\ +# args.architecture, str(n_node), str(n_mpi), \ +# str(n_omp)]) + '/' + # Read performance data from the output file + output_filename = 'perf_output.txt' + timing_list = read_run_perf(res_dir + output_filename, n_steps) # Write performance data to the performance log file - write_perf_logfile(log_dir + log_file) + log_line = ' '.join([year, month, day, run_name, args.compiler,\ + args.architecture, str(n_node), str(n_mpi),\ + str(n_omp)] + timing_list + ['\n']) + write_perf_logfile(log_dir + log_file, log_line) # Store test parameters fot record dir_record_base = './perf_warpx_record/' @@ -363,6 +274,21 @@ if args.mode == 'read': for count, current_run in enumerate(test_list): shutil.copy(current_run[0], dir_record) + for count, current_run in enumerate(test_list): + run_name = current_run[0] + n_node = current_run[1] + n_mpi = current_run[2] + n_omp = current_run[3] + res_dir = res_dir_base + res_dir += '_'.join([run_name, args.compiler,\ + args.architecture, str(n_node), str(n_mpi),\ + str(n_omp), str(count)]) + '/' + res_dir_arch = res_dir_base + res_dir_arch += '_'.join([year, month, day, run_name, args.compiler,\ + args.architecture, str(n_node), str(n_mpi), \ + str(n_omp), str(count)]) + '/' + os.rename(res_dir, res_dir_arch) + # Commit results to the Repo if args.commit == True: os.system('git add ' + log_dir + log_file + ';'\ diff --git a/Tools/performance_tests/run_alltests_1node.py b/Tools/performance_tests/run_alltests_1node.py new file mode 100644 index 000000000..4c6849c3b --- /dev/null +++ b/Tools/performance_tests/run_alltests_1node.py @@ -0,0 +1,333 @@ +import os, sys, shutil +import argparse, re, time +from functions_perftest import * + +# This script runs automated performance tests for WarpX. +# It runs tests in list test_list defined below, and write +# results in file performance_log.txt in warpx/performance_tests/ + +# ---- User's manual ---- +# Before running performance tests, make sure you have the latest version +# of performance_log.txt + +# ---- Running a custom set of performance tests ---- +# > python run_alltests_1node.py --no-recompile --compiler=intel +# > --architecture=knl --mode=run --input_file=uniform_plasma +# > --n_node=1 --log_file='my_performance_log.txt' + +# ---- Running the pre-drefined automated tests ---- +# Compile and run: +# > python run_alltests_1node.py --automated --recompile +# Just run: +# > python run_alltests_1node.py --automated + +# To add a new test item, extent the test_list with a line like +# test_list.extend([['my_input_file', n_node, n_mpi, n_omp]]*n_repeat) +# - my_input_file must be in warpx/performance_tests + +# ---- Developer's manual ---- +# This script can run in two modes: +# - 'run' mode: for each test item, a batch job is executed. +# create folder '$SCRATCH/performance_warpx/' +# recompile the code if option --recompile is used +# loop over test_list and submit one batch script per item +# Submit a batch job that executes the script in read mode +# This last job runs once all others are completed +# - 'read' mode: Get performance data from all test items +# create performance log file if does not exist +# loop over test_file +# read initialization time and step time +# write data into the performance log file +# push file performance_log.txt on the repo + +# Read command-line arguments +# --------------------------- +# Create parser and read arguments +parser = argparse.ArgumentParser( + description='Run performance tests and write results in files') +parser.add_argument('--recompile', dest='recompile', action='store_true', default=False) +parser.add_argument('--no-recompile', dest='recompile', action='store_false', default=False) +parser.add_argument('--commit', dest='commit', action='store_true', default=False) +parser.add_argument( '--compiler', choices=['gnu', 'intel'], default='intel', + help='which compiler to use') +parser.add_argument( '--architecture', choices=['cpu', 'knl'], default='knl', + help='which architecture to cross-compile for NERSC machines') +parser.add_argument( '--mode', choices=['run', 'read'], default='run', + help='whether to run perftests or read their perf output. run calls read') +parser.add_argument( '--log_file', dest = 'log_file', default='my_performance_log.txt', + help='name of log file where data will be written. ignored if option --commit is used') +parser.add_argument('--n_node', dest='n_node', default=1, help='nomber of nodes for the runs') +parser.add_argument('--input_file', dest='input_file', default='input_file.pixr', + type=str, help='input file to run') +parser.add_argument('--automated', dest='automated', action='store_true', default=False, + help='Use to run the automated test list') + +args = parser.parse_args() +log_file = args.log_file +do_commit = args.commit +run_name = args.input_file + +# list of tests to run and analyse. +# Note: This is overwritten if is_automated +# each element of test_list contains +# [str input_file, int n_node, int n_mpi PER NODE, int n_omp] +test_list = [] +n_repeat = 2 +filename1 = args.input_file +test_list.extend([[filename1, 1, 128, 1]]*n_repeat) +test_list.extend([[filename1, 1, 64, 2]]*n_repeat) +# test_list.extend([[filename1, 1, 32, 4]]*n_repeat) +# test_list.extend([[filename1, 1, 16, 8]]*n_repeat) +# test_list.extend([[filename1, 1, 8, 16]]*n_repeat) +# test_list.extend([[filename1, 1, 4, 32]]*n_repeat) +# test_list.extend([[filename1, 1, 2, 64]]*n_repeat) +# test_list.extend([[filename1, 1, 1, 128]]*n_repeat) + +# Nothing should be changed after this line +# if flag --automated is used, test_list and do_commit are +# overwritten + +if args.automated == True: + test_list = [] + n_repeat = 4 + test_list.extend([['automated_test_1_uniform_rest_32ppc', 1, 16, 8]]*n_repeat) + test_list.extend([['automated_test_2_uniform_rest_1ppc', 1, 16, 8]]*n_repeat) + test_list.extend([['automated_test_3_uniform_drift_4ppc', 1, 16, 8]]*n_repeat) + test_list.extend([['automated_test_4_labdiags_2ppc', 1, 16, 8]]*n_repeat) + test_list.extend([['automated_test_5_loadimbalance', 1, 16, 8]]*n_repeat) + test_list.extend([['automated_test_6_output_2ppc', 1, 16, 8]]*n_repeat) + do_commit = False + run_name = 'automated_tests' + +n_tests = len(test_list) +if do_commit == True: + log_file = 'performance_log.txt' + +# Dictionaries +# compiler names. Used for WarpX executable name +compiler_name = {'intel': 'intel', 'gnu': 'gcc'} +# architecture. Used for WarpX executable name +module_name = {'cpu': 'haswell', 'knl': 'mic-knl'} +# architecture. Used in batch scripts +module_Cname = {'cpu': 'haswell', 'knl': 'knl,quad,cache'} +# Define environment variables +cwd = os.getcwd() + '/' +res_dir_base = os.environ['SCRATCH'] + '/performance_warpx/' +bin_dir = cwd + 'Bin/' +bin_name = 'perf_tests3d.' + args.compiler + '.' + module_name[args.architecture] + '.TPROF.MPI.OMP.ex' +log_dir = cwd + +day = time.strftime('%d') +month = time.strftime('%m') +year = time.strftime('%Y') +n_node = int(args.n_node) + +# Initialize tests +# ---------------- +if args.mode == 'run': +# Set default options for compilation and execution + config_command = '' + config_command += 'module unload darshan;' + config_command += 'module load craype-hugepages4M;' + if args.architecture == 'knl': + if args.compiler == 'intel': + config_command += 'module unload PrgEnv-gnu;' + config_command += 'module load PrgEnv-intel;' + elif args.compiler == 'gnu': + config_command += 'module unload PrgEnv-intel;' + config_command += 'module load PrgEnv-gnu;' + config_command += 'module unload craype-haswell;' + config_command += 'module load craype-mic-knl;' + elif args.architecture == 'cpu': + if args.compiler == 'intel': + config_command += 'module unload PrgEnv-gnu;' + config_command += 'module load PrgEnv-intel;' + elif args.compiler == 'gnu': + config_command += 'module unload PrgEnv-intel;' + config_command += 'module load PrgEnv-gnu;' + config_command += 'module unload craype-mic-knl;' + config_command += 'module load craype-haswell;' + # Create main result directory if does not exist + if not os.path.exists(res_dir_base): + os.mkdir(res_dir_base) + +# Recompile if requested +if args.recompile == True: + with open(cwd + 'GNUmakefile_perftest') as makefile_handler: + makefile_text = makefile_handler.read() + makefile_text = re.sub('\nCOMP.*', '\nCOMP=%s' %compiler_name[args.compiler], makefile_text) + with open(cwd + 'GNUmakefile_perftest', 'w') as makefile_handler: + makefile_handler.write( makefile_text ) + os.system(config_command + " make -f GNUmakefile_perftest realclean ; " + " rm -r tmp_build_dir *.mod; make -j 8 -f GNUmakefile_perftest") + +# This function runs a batch script with dependencies to perform the analysis +# when performance runs are done. +def process_analysis(): + dependencies = '' + f_log = open(cwd + 'log_jobids_tmp.txt','r') + line = f_log.readline() + print(line) + dependencies += line.split()[3] + ':' + batch_string = '' + batch_string += '#!/bin/bash\n' + batch_string += '#SBATCH --job-name=warpx_1node_read\n' + batch_string += '#SBATCH --time=00:05:00\n' + batch_string += '#SBATCH -C haswell\n' + batch_string += '#SBATCH -N 1\n' + batch_string += '#SBATCH -S 4\n' + batch_string += '#SBATCH -q regular\n' + batch_string += '#SBATCH -e read_error.txt\n' + batch_string += '#SBATCH -o read_output.txt\n' + batch_string += '#SBATCH --mail-type=end\n' + batch_string += '#SBATCH --account=m2852\n' + batch_string += 'python ' + __file__ + ' --no-recompile --compiler=' + \ + args.compiler + ' --architecture=' + args.architecture + \ + ' --mode=read' + ' --log_file=' + log_file + \ + ' --input_file=' + args.input_file + if do_commit == True: + batch_string += ' --commit' + if args.automated == True: + batch_string += ' --automated' + batch_string += '\n' + batch_file = 'slurm_perfread' + f_exe = open(batch_file,'w') + f_exe.write(batch_string) + f_exe.close() + os.system('chmod 700 ' + batch_file) + os.system('sbatch --dependency afterok:' + dependencies[0:-1] + ' ' + batch_file) + return 0 + +# Loop over the tests and return run time + details +# ------------------------------------------------- +if args.mode == 'run': + # Remove file log_jobids_tmp.txt if exists. + # This file contains the jobid of every perf test + # It is used to manage the analysis script dependencies + if os.path.isfile(cwd + 'log_jobids_tmp.txt'): + os.remove(cwd + 'log_jobids_tmp.txt') + res_dir = res_dir_base + res_dir += '_'.join([run_name, args.compiler,\ + args.architecture, str(n_node)]) + '/' + # Run the simulation. + run_batch_nnode(test_list, res_dir, bin_name, config_command,\ + architecture=args.architecture, Cname=module_Cname[args.architecture], \ + n_node=n_node) + os.chdir(cwd) + process_analysis() + +if args.mode == 'read': + # Create log_file for performance tests if does not exist + if not os.path.isfile(log_dir + log_file): + log_line = '## year month day input_file compiler architecture n_node n_mpi ' +\ + 'n_omp time_initialization time_one_iteration Redistribute '+\ + 'FillBoundary ParallelCopy CurrentDeposition FieldGather '+\ + 'ParthiclePush Copy EvolveEM Checkpoint '+\ + 'WriteParticles Write_FabArray '+\ + 'WriteMultiLevelPlotfile(unit: second) '+\ + 'RedistributeMPI\n' + f_log = open(log_dir + log_file, 'a') + f_log.write(log_line) + f_log.close() + for count, current_run in enumerate(test_list): + # Results folder + print('read ' + str(current_run)) + input_file = current_run[0] + # Do not read n_node = current_run[1], it is an external parameter + n_mpi = current_run[2] + n_omp = current_run[3] + n_steps = get_nsteps(cwd + input_file) + print('n_steps = ' + str(n_steps)) + res_dir = res_dir_base + res_dir += '_'.join([run_name, args.compiler,\ + args.architecture, str(n_node)]) + '/' + # Read performance data from the output file + output_filename = 'out_' + '_'.join([input_file, str(n_node), str(n_mpi), str(n_omp), str(count)]) + '.txt' + timing_list = read_run_perf(res_dir + output_filename, n_steps) + # Write performance data to the performance log file + log_line = ' '.join([year, month, day, input_file, args.compiler,\ + args.architecture, str(n_node), str(n_mpi),\ + str(n_omp)] + timing_list + ['\n']) + write_perf_logfile(log_dir + log_file, log_line) + + # Store test parameters fot record + dir_record_base = './perf_warpx_record/' + if not os.path.exists(dir_record_base): + os.mkdir(dir_record_base) + count = 0 + dir_record = dir_record_base + '_'.join([year, month, day]) + '_0' + while os.path.exists(dir_record): + count += 1 + dir_record = dir_record[:-1] + str(count) + os.mkdir(dir_record) + shutil.copy(__file__, dir_record) + shutil.copy(log_dir + log_file, dir_record) + for count, current_run in enumerate(test_list): + shutil.copy(current_run[0], dir_record) + + # Rename directory with precise date for archive purpose + res_dir_arch = res_dir_base + res_dir_arch += '_'.join([year, month, day, run_name, args.compiler,\ + args.architecture, str(n_node)]) + '/' + os.rename(res_dir, res_dir_arch) + + # Commit results to the Repo + if do_commit == True: + os.system('git add ' + log_dir + log_file + ';'\ + 'git commit -m "performance tests";'\ + 'git push -u origin master') + + # Plot file + import numpy as np + import matplotlib + matplotlib.use('Agg') + import matplotlib.pyplot as plt + filename0 = 'performance_log' + filename = filename0 + '.txt' + fontsize = 14 + matplotlib.rcParams.update({'font.size': fontsize}) + nsteps = 100. + nrepeat = 4 + legends = [ 'n_node', 'n_mpi', 'n_omp', 'time_initialization', 'time_one_iteration', \ + 'Redistribute', 'FillBoundary', 'ParallelCopy', 'CurrentDeposition', \ + 'FieldGather', 'ParthiclePush', 'Copy', 'EvolveEM', 'Checkpoint', \ + 'WriteParticles', 'Write_FabArray', 'WriteMultiLevelPlotfile', \ + 'RedistributeMPI'] + date = np.loadtxt( filename, usecols = np.arange(0, 3 )) + data = np.loadtxt( filename, usecols = np.arange(6, 6+len(legends)) ) + # Read run name + with open(filename) as f: + namelist_tmp = zip(*[line.split() for line in f])[3] + # Remove first line = comments + namelist = list(namelist_tmp[1:]) + selector_list = ['automated_test_1_uniform_rest_32ppc',\ + 'automated_test_2_uniform_rest_1ppc',\ + 'automated_test_3_uniform_drift_4ppc',\ + 'automated_test_4_labdiags_2ppc',\ + 'automated_test_5_loadimbalance',\ + 'automated_test_6_output_2ppc'] + selector_string = selector_list[0] + selector = [idx for idx in range(len(namelist)) if selector_string in namelist[idx]] + lin_date = date[:,0]+date[:,1]/12.+date[:,2]/366. + unique_lin_date = np.unique(lin_date) + my_xticks = unique_lin_date +# cmap = plt.get_cmap("tab20") + cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] + for selector_string in selector_list: + selector = [idx for idx in range(len(namelist)) if selector_string in namelist[idx]] + plt.figure(num=0, figsize=(8,4)) + plt.clf() + plt.title('warpx ' + selector_string) + for i in np.arange(data.shape[1]): + icolors = i-3 + if i>3 and (data[selector,i] > 5./100*data[selector,4]).any(): + plt.plot(lin_date[selector], data[selector,i],'+', ms=6, \ + mew=2, label=legends[i] ) + # plt.plot(lin_date[selector], data[selector,i],'+', ms=6, \ + # mew=2, label=legends[i], color=cmap(i) ) + plt.xlabel('date') + plt.ylabel('time/step (s)') + plt.grid() + plt.legend(loc='best') + plt.legend(bbox_to_anchor=(1.1, 1.05)) + plt.savefig( selector_string + '.pdf', bbox_inches='tight') + plt.savefig( selector_string + '.png', bbox_inches='tight') diff --git a/Tools/read_raw_data.py b/Tools/read_raw_data.py index e9745f08b..deb1f36a2 100644 --- a/Tools/read_raw_data.py +++ b/Tools/read_raw_data.py @@ -9,18 +9,18 @@ _component_names = ['Ex', 'Ey', 'Ez', 'Bx', 'By', 'Bz', 'jx', 'jy', 'jz', 'rho'] def read_data(plt_file): ''' - This function reads the raw (i.e. not averaged to cell centers) data + This function reads the raw (i.e. not averaged to cell centers) data from a WarpX plt file. The plt file must have been written with the - plot_raw_fields option turned on, so that it contains a raw_data + plot_raw_fields option turned on, so that it contains a raw_data sub-directory. This is only really useful for single-level data. Arguments: plt_file : An AMReX plt_file file. Must contain a raw_data directory. - + Returns: - A list of dictionaries where the keys are field name strings and the values + A list of dictionaries where the keys are field name strings and the values are numpy arrays. Each entry in the list corresponds to a different level. Example: @@ -34,7 +34,7 @@ def read_data(plt_file): raw_files = glob(plt_file + "/raw_fields/Level_*/") for raw_file in raw_files: field_names = _get_field_names(raw_file) - + data = {} for field in field_names: data[field] = _read_field(raw_file, field) @@ -48,19 +48,19 @@ def read_lab_snapshot(snapshot): ''' This reads the data from one of the lab frame snapshots generated when - WarpX is run with boosted frame diagnostics turned on. It returns a + WarpX is run with boosted frame diagnostics turned on. It returns a dictionary of numpy arrays, where each key corresponds to one of the data fields ("Ex", "By,", etc... ). These values are cell-centered. ''' - hdrs = glob(snapshot + "/Level_0/buffer?????_H") + hdrs = glob(snapshot + "/Level_0/buffer??????_H") hdrs.sort() boxes, file_names, offsets, header = _read_header(hdrs[0]) dom_lo, dom_hi = _combine_boxes(boxes) domain_size = dom_hi - dom_lo + 1 - + space_dim = len(dom_lo) if space_dim == 2: direction = 1 @@ -69,14 +69,14 @@ def read_lab_snapshot(snapshot): buffer_data = _read_buffer(snapshot, hdrs[0]) buffer_size = buffer_data['Bx'].shape[direction] - + data = {} for i in range(header.ncomp): if space_dim == 3: data[_component_names[i]] = np.zeros((domain_size[0], domain_size[1], buffer_size*len(hdrs))) elif space_dim == 2: data[_component_names[i]] = np.zeros((domain_size[0], buffer_size*len(hdrs))) - + for i, hdr in enumerate(hdrs): buffer_data = _read_buffer(snapshot, hdr) if data is None: @@ -87,7 +87,7 @@ def read_lab_snapshot(snapshot): data[k][:,:,buffer_size*i:buffer_size*(i+1)] = v[:,:,:] elif space_dim == 2: data[k][:,buffer_size*i:buffer_size*(i+1)] = v[:,:] - + return data @@ -103,7 +103,7 @@ def _string_to_numpy_array(s): def _line_to_numpy_arrays(line): lo_corner = _string_to_numpy_array(line[0][1:]) hi_corner = _string_to_numpy_array(line[1][:]) - node_type = _string_to_numpy_array(line[2][:-1]) + node_type = _string_to_numpy_array(line[2][:-1]) return lo_corner, hi_corner, node_type @@ -152,39 +152,39 @@ def _combine_boxes(boxes): def _read_field(raw_file, field_name): - header_file = raw_file + field + "_H" + header_file = raw_file + field_name + "_H" boxes, file_names, offsets, header = _read_header(header_file) ng = header.nghost - lo, hi = _combine_boxes(boxes) - data = np.zeros(hi - lo + 1) + dom_lo, dom_hi = _combine_boxes(boxes) + data = np.zeros(dom_hi - dom_lo + 1) for box, fn, offset in zip(boxes, file_names, offsets): - lo = box[0] - hi = box[1] + lo = box[0] - dom_lo + hi = box[1] - dom_lo shape = hi - lo + 1 with open(raw_file + fn, "rb") as f: f.seek(offset) f.readline() # always skip the first line arr = np.fromfile(f, 'float64', np.product(shape)) arr = arr.reshape(shape, order='F') - data[[slice(l,h+1) for l, h in zip(lo+ng, hi+ng)]] = arr + data[[slice(l,h+1) for l, h in zip(lo, hi)]] = arr return data def _read_buffer(snapshot, header_fn): - + boxes, file_names, offsets, header = _read_header(header_fn) ng = header.nghost dom_lo, dom_hi = _combine_boxes(boxes) - + all_data = {} for i in range(header.ncomp): all_data[_component_names[i]] = np.zeros(dom_hi - dom_lo + 1) - + for box, fn, offset in zip(boxes, file_names, offsets): lo = box[0] - dom_lo hi = box[1] - dom_lo @@ -213,7 +213,7 @@ if __name__ == "__main__": for level in range(2): for name, vals in data[level].items(): print(level, name, vals.shape, vals.min(), vals.max()) - + # make a projection along the z-axis of the 'jx' field for level 0 level = 0 plt.pcolormesh(data[level]['jx'].sum(axis=2)) @@ -223,4 +223,3 @@ if __name__ == "__main__": level = 1 plt.pcolormesh(data[level]['Bx_cp'].sum(axis=0)) plt.savefig('Bx_cp') - |