diff options
Diffstat (limited to '')
35 files changed, 178 insertions, 117 deletions
diff --git a/Docs/source/running_cpp/parameters.rst b/Docs/source/running_cpp/parameters.rst index 0121e7bf1..936dd262a 100644 --- a/Docs/source/running_cpp/parameters.rst +++ b/Docs/source/running_cpp/parameters.rst @@ -974,6 +974,10 @@ Numerics and algorithms value here will make the simulation unphysical, but will allow QED effects to become more apparent. Note that this option will only have an effect if the warpx.use_Hybrid_QED flag is also triggered. + * ``warpx.do_device_synchronize_before_profile`` (`bool`) optional (default `1`) + When running in an accelerated platform, whether to call a deviceSynchronize around profiling regions. + This allows the profiler to give meaningful timers, but (hardly) slows down the simulation. + * ``warpx.sort_int`` (`int`) optional (defaults: ``-1`` on CPU; ``4`` on GPU) If ``<=0``, do not sort particles. If ``>0``, sort particles by bin every ``sort_int`` iteration. It is turned on on GPUs for performance reasons (to improve memory locality). diff --git a/Source/BoundaryConditions/PML.H b/Source/BoundaryConditions/PML.H index b8ed0ff7a..67ef80dfe 100644 --- a/Source/BoundaryConditions/PML.H +++ b/Source/BoundaryConditions/PML.H @@ -11,6 +11,8 @@ #ifndef WARPX_PML_H_ #define WARPX_PML_H_ +#include "WarpXProfilerWrapper.H" + #include <AMReX_MultiFab.H> #include <AMReX_Geometry.H> diff --git a/Source/BoundaryConditions/PML.cpp b/Source/BoundaryConditions/PML.cpp index 0ec247ac4..615262cae 100644 --- a/Source/BoundaryConditions/PML.cpp +++ b/Source/BoundaryConditions/PML.cpp @@ -783,7 +783,7 @@ void PML::Exchange (MultiFab& pml, MultiFab& reg, const Geometry& geom, int do_pml_in_domain) { - BL_PROFILE("PML::Exchange"); + WARPX_PROFILE("PML::Exchange"); const IntVect& ngr = reg.nGrowVect(); const IntVect& ngp = pml.nGrowVect(); diff --git a/Source/BoundaryConditions/WarpXEvolvePML.cpp b/Source/BoundaryConditions/WarpXEvolvePML.cpp index 24442e54e..0c140b4cc 100644 --- a/Source/BoundaryConditions/WarpXEvolvePML.cpp +++ b/Source/BoundaryConditions/WarpXEvolvePML.cpp @@ -43,7 +43,7 @@ WarpX::DampPML (int lev, PatchType patch_type) { if (!do_pml) return; - BL_PROFILE("WarpX::DampPML()"); + WARPX_PROFILE("WarpX::DampPML()"); if (pml[lev]->ok()) { @@ -165,7 +165,7 @@ WarpX::DampJPML (int lev, PatchType patch_type) if (!do_pml) return; if (!do_pml_j_damping) return; - BL_PROFILE("WarpX::DampJPML()"); + WARPX_PROFILE("WarpX::DampJPML()"); if (pml[lev]->ok()) { diff --git a/Source/Diagnostics/BackTransformedDiagnostic.cpp b/Source/Diagnostics/BackTransformedDiagnostic.cpp index e458ee167..f4baea328 100644 --- a/Source/Diagnostics/BackTransformedDiagnostic.cpp +++ b/Source/Diagnostics/BackTransformedDiagnostic.cpp @@ -33,7 +33,7 @@ namespace Should be run only by the root process. */ void output_create(const std::string& file_path) { - BL_PROFILE("output_create"); + WARPX_PROFILE("output_create"); hid_t file = H5Fcreate(file_path.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); if (file < 0) { amrex::Abort("Error: could not create file at " + file_path); @@ -84,7 +84,7 @@ namespace void output_write_metadata(const std::string& file_path, const int istep, const Real time, const Real dt) { - BL_PROFILE("output_write_metadata"); + WARPX_PROFILE("output_write_metadata"); hid_t file = H5Fopen(file_path.c_str(), H5F_ACC_RDWR, H5P_DEFAULT); write_string_attribute(file, "software", "warpx"); @@ -120,7 +120,7 @@ namespace void output_create_field(const std::string& file_path, const std::string& field_path, const unsigned nx, const unsigned ny, const unsigned nz) { - BL_PROFILE("output_create_field"); + WARPX_PROFILE("output_create_field"); // Open the output. hid_t file = H5Fopen(file_path.c_str(), H5F_ACC_RDWR, H5P_DEFAULT); @@ -180,7 +180,7 @@ namespace long output_resize_particle_field(const std::string& file_path, const std::string& field_path, const long num_to_add) { - BL_PROFILE("output_resize_particle_field"); + WARPX_PROFILE("output_resize_particle_field"); // Open the output. hid_t file = H5Fopen(file_path.c_str(), H5F_ACC_RDWR, H5P_DEFAULT); @@ -219,7 +219,7 @@ namespace void output_write_particle_field(const std::string& file_path, const std::string& field_path, const Real* data_ptr, const long count, const long index) { - BL_PROFILE("output_write_particle_field"); + WARPX_PROFILE("output_write_particle_field"); MPI_Comm comm = MPI_COMM_WORLD; MPI_Info info = MPI_INFO_NULL; @@ -299,7 +299,7 @@ namespace */ void output_create_particle_field(const std::string& file_path, const std::string& field_path) { - BL_PROFILE("output_create_particle_field"); + WARPX_PROFILE("output_create_particle_field"); MPI_Comm comm = MPI_COMM_WORLD; MPI_Info info = MPI_INFO_NULL; @@ -350,7 +350,7 @@ namespace const int lo_x, const int lo_y, const int lo_z) { - BL_PROFILE("output_write_field"); + WARPX_PROFILE("output_write_field"); MPI_Comm comm = MPI_COMM_WORLD; MPI_Info info = MPI_INFO_NULL; @@ -539,7 +539,7 @@ BackTransformedDiagnostic(Real zmin_lab, Real zmax_lab, Real v_window_lab, { - BL_PROFILE("BackTransformedDiagnostic::BackTransformedDiagnostic"); + WARPX_PROFILE("BackTransformedDiagnostic::BackTransformedDiagnostic"); AMREX_ALWAYS_ASSERT(WarpX::do_back_transformed_fields or WarpX::do_back_transformed_particles); @@ -685,7 +685,7 @@ BackTransformedDiagnostic(Real zmin_lab, Real zmax_lab, Real v_window_lab, void BackTransformedDiagnostic::Flush(const Geometry& geom) { - BL_PROFILE("BackTransformedDiagnostic::Flush"); + WARPX_PROFILE("BackTransformedDiagnostic::Flush"); VisMF::Header::Version current_version = VisMF::GetHeaderVersion(); VisMF::SetHeaderVersion(amrex::VisMF::Header::NoFabHeader_v1); @@ -774,7 +774,7 @@ writeLabFrameData(const MultiFab* cell_centered_data, const MultiParticleContainer& mypc, const Geometry& geom, const Real t_boost, const Real dt) { - BL_PROFILE("BackTransformedDiagnostic::writeLabFrameData"); + WARPX_PROFILE("BackTransformedDiagnostic::writeLabFrameData"); VisMF::Header::Version current_version = VisMF::GetHeaderVersion(); VisMF::SetHeaderVersion(amrex::VisMF::Header::NoFabHeader_v1); @@ -1004,7 +1004,7 @@ BackTransformedDiagnostic:: writeParticleData(const WarpXParticleContainer::DiagnosticParticleData& pdata, const std::string& name, const int i_lab) { - BL_PROFILE("BackTransformedDiagnostic::writeParticleData"); + WARPX_PROFILE("BackTransformedDiagnostic::writeParticleData"); std::string field_name; std::ofstream ofs; @@ -1053,7 +1053,7 @@ void BackTransformedDiagnostic:: writeMetaData () { - BL_PROFILE("BackTransformedDiagnostic::writeMetaData"); + WARPX_PROFILE("BackTransformedDiagnostic::writeMetaData"); if (ParallelDescriptor::IOProcessor()) { const std::string fullpath = WarpX::lab_data_directory + "/snapshots"; diff --git a/Source/Diagnostics/ElectrostaticIO.cpp b/Source/Diagnostics/ElectrostaticIO.cpp index dfe04123d..a5b63c506 100644 --- a/Source/Diagnostics/ElectrostaticIO.cpp +++ b/Source/Diagnostics/ElectrostaticIO.cpp @@ -20,7 +20,7 @@ WritePlotFileES (const amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rho, const amrex::Vector<std::unique_ptr<amrex::MultiFab> >& phi, const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E) { - BL_PROFILE("WarpX::WritePlotFileES()"); + WARPX_PROFILE("WarpX::WritePlotFileES()"); VisMF::Header::Version current_version = VisMF::GetHeaderVersion(); VisMF::SetHeaderVersion(plotfile_headerversion); diff --git a/Source/Diagnostics/FieldIO.cpp b/Source/Diagnostics/FieldIO.cpp index c2248d3e2..b0c55053f 100644 --- a/Source/Diagnostics/FieldIO.cpp +++ b/Source/Diagnostics/FieldIO.cpp @@ -125,7 +125,7 @@ WriteOpenPMDFields( const std::string& filename, const MultiFab& mf, const Geometry& geom, const int iteration, const double time ) { - BL_PROFILE("WriteOpenPMDFields()"); + WARPX_PROFILE("WriteOpenPMDFields()"); const int ncomp = mf.nComp(); diff --git a/Source/Diagnostics/ParticleIO.cpp b/Source/Diagnostics/ParticleIO.cpp index ca9e86fdd..7b4d8897a 100644 --- a/Source/Diagnostics/ParticleIO.cpp +++ b/Source/Diagnostics/ParticleIO.cpp @@ -169,7 +169,7 @@ MultiParticleContainer::WriteHeader (std::ostream& os) const void PhysicalParticleContainer::ConvertUnits(ConvertDirection convert_direction) { - BL_PROFILE("PPC::ConvertUnits()"); + WARPX_PROFILE("PPC::ConvertUnits()"); // Compute conversion factor Real factor = 1; diff --git a/Source/Diagnostics/WarpXIO.cpp b/Source/Diagnostics/WarpXIO.cpp index 58fdccf22..adf9b409c 100644 --- a/Source/Diagnostics/WarpXIO.cpp +++ b/Source/Diagnostics/WarpXIO.cpp @@ -120,7 +120,7 @@ WarpX::WriteWarpXHeader(const std::string& name) const void WarpX::WriteCheckPointFile() const { - BL_PROFILE("WarpX::WriteCheckPointFile()"); + WARPX_PROFILE("WarpX::WriteCheckPointFile()"); VisMF::Header::Version current_version = VisMF::GetHeaderVersion(); VisMF::SetHeaderVersion(checkpoint_headerversion); @@ -203,7 +203,7 @@ WarpX::WriteCheckPointFile() const void WarpX::InitFromCheckpoint () { - BL_PROFILE("WarpX::InitFromCheckpoint()"); + WARPX_PROFILE("WarpX::InitFromCheckpoint()"); amrex::Print() << " Restart from checkpoint " << restart_chkfile << "\n"; @@ -421,7 +421,7 @@ WarpX::InitFromCheckpoint () std::unique_ptr<MultiFab> WarpX::GetCellCenteredData() { - BL_PROFILE("WarpX::GetCellCenteredData"); + WARPX_PROFILE("WarpX::GetCellCenteredData"); const int ng = 1; const int nc = 10; @@ -461,7 +461,7 @@ void WarpX::UpdateInSitu () const { #if defined(BL_USE_SENSEI_INSITU) || defined(AMREX_USE_ASCENT) - BL_PROFILE("WarpX::UpdateInSitu()"); + WARPX_PROFILE("WarpX::UpdateInSitu()"); // Average the fields from the simulation to the cell centers const int ngrow = 1; @@ -535,7 +535,7 @@ WarpX::prepareFields( void WarpX::WriteOpenPMDFile () const { - BL_PROFILE("WarpX::WriteOpenPMDFile()"); + WARPX_PROFILE("WarpX::WriteOpenPMDFile()"); #ifdef WARPX_USE_OPENPMD const auto step = istep[0]; @@ -559,7 +559,7 @@ WarpX::WriteOpenPMDFile () const void WarpX::WritePlotFile () const { - BL_PROFILE("WarpX::WritePlotFile()"); + WARPX_PROFILE("WarpX::WritePlotFile()"); const auto step = istep[0]; const std::string& plotfilename = amrex::Concatenate(plot_file,step); diff --git a/Source/Diagnostics/WarpXOpenPMD.cpp b/Source/Diagnostics/WarpXOpenPMD.cpp index cacf6367d..2d3e5940b 100644 --- a/Source/Diagnostics/WarpXOpenPMD.cpp +++ b/Source/Diagnostics/WarpXOpenPMD.cpp @@ -204,7 +204,7 @@ WarpXOpenPMDPlot::Init(openPMD::AccessType accessType) void WarpXOpenPMDPlot::WriteOpenPMDParticles(const std::unique_ptr<MultiParticleContainer>& mpc) { - BL_PROFILE("WarpXOpenPMDPlot::WriteOpenPMDParticles()"); + WARPX_PROFILE("WarpXOpenPMDPlot::WriteOpenPMDParticles()"); std::vector<std::string> species_names = mpc->GetSpeciesNames(); for (unsigned i = 0, n = species_names.size(); i < n; ++i) { @@ -541,7 +541,7 @@ WarpXOpenPMDPlot::WriteOpenPMDFields( //const std::string& filename, const double time ) const { //This is AMReX's tiny profiler. Possibly will apply it later - BL_PROFILE("WarpXOpenPMDPlot::WriteOpenPMDFields()"); + WARPX_PROFILE("WarpXOpenPMDPlot::WriteOpenPMDFields()"); AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_Series != nullptr, "openPMD series must be initialized"); diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp index f4cb61add..d17ec8694 100644 --- a/Source/Evolve/WarpXEvolveEM.cpp +++ b/Source/Evolve/WarpXEvolveEM.cpp @@ -30,7 +30,7 @@ using namespace amrex; void WarpX::EvolveEM (int numsteps) { - BL_PROFILE("WarpX::EvolveEM()"); + WARPX_PROFILE("WarpX::EvolveEM()"); Real cur_time = t_new[0]; static int last_plot_file_step = 0; diff --git a/Source/Evolve/WarpXEvolveES.cpp b/Source/Evolve/WarpXEvolveES.cpp index 77e037154..ba2ab8ce5 100644 --- a/Source/Evolve/WarpXEvolveES.cpp +++ b/Source/Evolve/WarpXEvolveES.cpp @@ -21,7 +21,7 @@ WarpX::EvolveES (int numsteps) { amrex::Print() << "Running in electrostatic mode \n"; - BL_PROFILE("WarpX::EvolveES()"); + WARPX_PROFILE("WarpX::EvolveES()"); Real cur_time = t_new[0]; static int last_plot_file_step = 0; static int last_check_file_step = 0; diff --git a/Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridSpectralSolver.cpp b/Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridSpectralSolver.cpp index cb6b7794f..978129cf2 100644 --- a/Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridSpectralSolver.cpp +++ b/Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridSpectralSolver.cpp @@ -376,12 +376,12 @@ WarpX::FreeFFT (int lev) void WarpX::PushPSATD_hybridFFT (int lev, amrex::Real /* dt */) { - BL_PROFILE_VAR_NS("WarpXFFT::CopyDualGrid", blp_copy); - BL_PROFILE_VAR_NS("PICSAR::FftPushEB", blp_push_eb); + WARPX_PROFILE_VAR_NS("WarpXFFT::CopyDualGrid", blp_copy); + WARPX_PROFILE_VAR_NS("PICSAR::FftPushEB", blp_push_eb); auto period_fp = geom[lev].periodicity(); - BL_PROFILE_VAR_START(blp_copy); + WARPX_PROFILE_VAR_START(blp_copy); Efield_fp_fft[lev][0]->ParallelCopy(*Efield_fp[lev][0], 0, 0, 1, Efield_fp[lev][0]->nGrow(), 0, period_fp); Efield_fp_fft[lev][1]->ParallelCopy(*Efield_fp[lev][1], 0, 0, 1, Efield_fp[lev][1]->nGrow(), 0, period_fp); Efield_fp_fft[lev][2]->ParallelCopy(*Efield_fp[lev][2], 0, 0, 1, Efield_fp[lev][2]->nGrow(), 0, period_fp); @@ -392,9 +392,9 @@ WarpX::PushPSATD_hybridFFT (int lev, amrex::Real /* dt */) current_fp_fft[lev][1]->ParallelCopy(*current_fp[lev][1], 0, 0, 1, current_fp[lev][1]->nGrow(), 0, period_fp); current_fp_fft[lev][2]->ParallelCopy(*current_fp[lev][2], 0, 0, 1, current_fp[lev][2]->nGrow(), 0, period_fp); rho_fp_fft[lev]->ParallelCopy(*rho_fp[lev], 0, 0, 2, rho_fp[lev]->nGrow(), 0, period_fp); - BL_PROFILE_VAR_STOP(blp_copy); + WARPX_PROFILE_VAR_STOP(blp_copy); - BL_PROFILE_VAR_START(blp_push_eb); + WARPX_PROFILE_VAR_START(blp_push_eb); if (Efield_fp_fft[lev][0]->local_size() == 1) //Only one FFT patch on this MPI { @@ -435,16 +435,16 @@ WarpX::PushPSATD_hybridFFT (int lev, amrex::Real /* dt */) { amrex::Abort("WarpX::PushPSATD: TODO"); } - BL_PROFILE_VAR_STOP(blp_push_eb); + WARPX_PROFILE_VAR_STOP(blp_push_eb); - BL_PROFILE_VAR_START(blp_copy); + WARPX_PROFILE_VAR_START(blp_copy); CopyDataFromFFTToValid(*Efield_fp[lev][0], *Efield_fp_fft[lev][0], ba_valid_fp_fft[lev], geom[lev]); CopyDataFromFFTToValid(*Efield_fp[lev][1], *Efield_fp_fft[lev][1], ba_valid_fp_fft[lev], geom[lev]); CopyDataFromFFTToValid(*Efield_fp[lev][2], *Efield_fp_fft[lev][2], ba_valid_fp_fft[lev], geom[lev]); CopyDataFromFFTToValid(*Bfield_fp[lev][0], *Bfield_fp_fft[lev][0], ba_valid_fp_fft[lev], geom[lev]); CopyDataFromFFTToValid(*Bfield_fp[lev][1], *Bfield_fp_fft[lev][1], ba_valid_fp_fft[lev], geom[lev]); CopyDataFromFFTToValid(*Bfield_fp[lev][2], *Bfield_fp_fft[lev][2], ba_valid_fp_fft[lev], geom[lev]); - BL_PROFILE_VAR_STOP(blp_copy); + WARPX_PROFILE_VAR_STOP(blp_copy); if (lev > 0) { diff --git a/Source/FieldSolver/SpectralSolver/SpectralSolver.H b/Source/FieldSolver/SpectralSolver/SpectralSolver.H index 65f975682..2f6428fc6 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralSolver.H +++ b/Source/FieldSolver/SpectralSolver/SpectralSolver.H @@ -40,10 +40,7 @@ class SpectralSolver * (in the spectral field specified by `field_index`) */ void ForwardTransform( const amrex::MultiFab& mf, const int field_index, - const int i_comp=0 ){ - BL_PROFILE("SpectralSolver::ForwardTransform"); - field_data.ForwardTransform( mf, field_index, i_comp ); - }; + const int i_comp=0 ); /** * \brief Transform spectral field specified by `field_index` back to @@ -51,21 +48,12 @@ class SpectralSolver */ void BackwardTransform( amrex::MultiFab& mf, const int field_index, - const int i_comp=0 ){ - BL_PROFILE("SpectralSolver::BackwardTransform"); - field_data.BackwardTransform( mf, field_index, i_comp ); - }; + const int i_comp=0 ); /** * \brief Update the fields in spectral space, over one timestep */ - void pushSpectralFields(){ - BL_PROFILE("SpectralSolver::pushSpectralFields"); - // Virtual function: the actual function used here depends - // on the sub-class of `SpectralBaseAlgorithm` that was - // initialized in the constructor of `SpectralSolver` - algorithm->pushSpectralFields( field_data ); - }; + void pushSpectralFields(); private: SpectralFieldData field_data; // Store field in spectral space diff --git a/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp b/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp index c24a7af69..9df981528 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp @@ -9,7 +9,8 @@ #include <PsatdAlgorithm.H> #include <GalileanAlgorithm.H> #include <PMLPsatdAlgorithm.H> - +#include "WarpX.H" +#include "WarpXProfilerWrapper.H" /* \brief Initialize the spectral Maxwell solver * @@ -62,4 +63,31 @@ SpectralSolver::SpectralSolver( field_data = SpectralFieldData( realspace_ba, k_space, dm, algorithm->getRequiredNumberOfFields() ); -}; +} + +void +SpectralSolver::ForwardTransform( const amrex::MultiFab& mf, + const int field_index, + const int i_comp ) +{ + WARPX_PROFILE("SpectralSolver::ForwardTransform"); + field_data.ForwardTransform( mf, field_index, i_comp ); +} + +void +SpectralSolver::BackwardTransform( amrex::MultiFab& mf, + const int field_index, + const int i_comp ) +{ + WARPX_PROFILE("SpectralSolver::BackwardTransform"); + field_data.BackwardTransform( mf, field_index, i_comp ); +} + +void +SpectralSolver::pushSpectralFields(){ + WARPX_PROFILE("SpectralSolver::pushSpectralFields"); + // Virtual function: the actual function used here depends + // on the sub-class of `SpectralBaseAlgorithm` that was + // initialized in the constructor of `SpectralSolver` + algorithm->pushSpectralFields( field_data ); +} diff --git a/Source/FieldSolver/WarpXPushFieldsEM.cpp b/Source/FieldSolver/WarpXPushFieldsEM.cpp index 74db3cac4..dd9a41254 100644 --- a/Source/FieldSolver/WarpXPushFieldsEM.cpp +++ b/Source/FieldSolver/WarpXPushFieldsEM.cpp @@ -107,7 +107,7 @@ WarpX::EvolveB (amrex::Real a_dt) void WarpX::EvolveB (int lev, amrex::Real a_dt) { - BL_PROFILE("WarpX::EvolveB()"); + WARPX_PROFILE("WarpX::EvolveB()"); EvolveB(lev, PatchType::fine, a_dt); if (lev > 0) { @@ -208,7 +208,7 @@ WarpX::EvolveE (amrex::Real a_dt) void WarpX::EvolveE (int lev, amrex::Real a_dt) { - BL_PROFILE("WarpX::EvolveE()"); + WARPX_PROFILE("WarpX::EvolveE()"); EvolveE(lev, PatchType::fine, a_dt); if (lev > 0) { @@ -529,7 +529,7 @@ WarpX::EvolveF (int lev, PatchType patch_type, amrex::Real a_dt, DtType a_dt_typ { if (!do_dive_cleaning) return; - BL_PROFILE("WarpX::EvolveF()"); + WARPX_PROFILE("WarpX::EvolveF()"); static constexpr Real mu_c2 = PhysConst::mu0*PhysConst::c*PhysConst::c; diff --git a/Source/FieldSolver/WarpX_QED_Field_Pushers.cpp b/Source/FieldSolver/WarpX_QED_Field_Pushers.cpp index ef297e351..3e1034c99 100644 --- a/Source/FieldSolver/WarpX_QED_Field_Pushers.cpp +++ b/Source/FieldSolver/WarpX_QED_Field_Pushers.cpp @@ -48,7 +48,7 @@ WarpX::Hybrid_QED_Push (amrex::Vector<amrex::Real> dt) void WarpX::Hybrid_QED_Push (int lev, Real a_dt) { - BL_PROFILE("WarpX::Hybrid_QED_Push()"); + WARPX_PROFILE("WarpX::Hybrid_QED_Push()"); Hybrid_QED_Push(lev, PatchType::fine, a_dt); if (lev > 0) { diff --git a/Source/Filter/BilinearFilter.cpp b/Source/Filter/BilinearFilter.cpp index ba7bc30f5..67067bdc1 100644 --- a/Source/Filter/BilinearFilter.cpp +++ b/Source/Filter/BilinearFilter.cpp @@ -51,7 +51,7 @@ namespace { } void BilinearFilter::ComputeStencils(){ - BL_PROFILE("BilinearFilter::ComputeStencils()"); + WARPX_PROFILE("BilinearFilter::ComputeStencils()"); stencil_length_each_dir = npass_each_dir; stencil_length_each_dir += 1.; #if (AMREX_SPACEDIM == 3) diff --git a/Source/Filter/Filter.cpp b/Source/Filter/Filter.cpp index dbe13747e..ed1aa383b 100644 --- a/Source/Filter/Filter.cpp +++ b/Source/Filter/Filter.cpp @@ -26,7 +26,7 @@ using namespace amrex; void Filter::ApplyStencil (MultiFab& dstmf, const MultiFab& srcmf, int scomp, int dcomp, int ncomp) { - BL_PROFILE("BilinearFilter::ApplyStencil(MultiFab)"); + WARPX_PROFILE("BilinearFilter::ApplyStencil(MultiFab)"); ncomp = std::min(ncomp, srcmf.nComp()); for (MFIter mfi(dstmf); mfi.isValid(); ++mfi) @@ -69,7 +69,7 @@ void Filter::ApplyStencil (FArrayBox& dstfab, const FArrayBox& srcfab, const Box& tbx, int scomp, int dcomp, int ncomp) { - BL_PROFILE("BilinearFilter::ApplyStencil(FArrayBox)"); + WARPX_PROFILE("BilinearFilter::ApplyStencil(FArrayBox)"); ncomp = std::min(ncomp, srcfab.nComp()); const auto& src = srcfab.array(); const auto& dst = dstfab.array(); @@ -153,7 +153,7 @@ void Filter::DoFilter (const Box& tbx, void Filter::ApplyStencil (amrex::MultiFab& dstmf, const amrex::MultiFab& srcmf, int scomp, int dcomp, int ncomp) { - BL_PROFILE("BilinearFilter::ApplyStencil()"); + WARPX_PROFILE("BilinearFilter::ApplyStencil()"); ncomp = std::min(ncomp, srcmf.nComp()); #ifdef _OPENMP #pragma omp parallel @@ -189,7 +189,7 @@ void Filter::ApplyStencil (amrex::FArrayBox& dstfab, const amrex::FArrayBox& srcfab, const amrex::Box& tbx, int scomp, int dcomp, int ncomp) { - BL_PROFILE("BilinearFilter::ApplyStencil(FArrayBox)"); + WARPX_PROFILE("BilinearFilter::ApplyStencil(FArrayBox)"); ncomp = std::min(ncomp, srcfab.nComp()); FArrayBox tmpfab; const Box& gbx = amrex::grow(tbx,stencil_length_each_dir-1); diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp index 66100eb16..c82244d63 100644 --- a/Source/Initialization/WarpXInitData.cpp +++ b/Source/Initialization/WarpXInitData.cpp @@ -26,7 +26,7 @@ using namespace amrex; void WarpX::InitData () { - BL_PROFILE("WarpX::InitData()"); + WARPX_PROFILE("WarpX::InitData()"); if (restart_chkfile.empty()) { diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp index 1d0857a45..8c4273e1f 100644 --- a/Source/Laser/LaserParticleContainer.cpp +++ b/Source/Laser/LaserParticleContainer.cpp @@ -380,11 +380,11 @@ LaserParticleContainer::Evolve (int lev, const MultiFab*, const MultiFab*, const MultiFab*, Real t, Real dt, DtType a_dt_type) { - BL_PROFILE("Laser::Evolve()"); - BL_PROFILE_VAR_NS("Laser::Evolve::Copy", blp_copy); - BL_PROFILE_VAR_NS("Laser::ParticlePush", blp_pp); - BL_PROFILE_VAR_NS("Laser::CurrentDepo", blp_cd); - BL_PROFILE_VAR_NS("Laser::Evolve::Accumulate", blp_accumulate); + WARPX_PROFILE("Laser::Evolve()"); + WARPX_PROFILE_VAR_NS("Laser::Evolve::Copy", blp_copy); + WARPX_PROFILE_VAR_NS("Laser::ParticlePush", blp_pp); + WARPX_PROFILE_VAR_NS("Laser::CurrentDepo", blp_cd); + WARPX_PROFILE_VAR_NS("Laser::Evolve::Accumulate", blp_accumulate); Real t_lab = t; if (WarpX::gamma_boost > 1) { @@ -445,7 +445,7 @@ LaserParticleContainer::Evolve (int lev, // // Particle Push // - BL_PROFILE_VAR_START(blp_pp); + WARPX_PROFILE_VAR_START(blp_pp); // Find the coordinates of the particles in the emission plane calculate_laser_plane_coordinates(pti, np, plane_Xp.dataPtr(), @@ -461,7 +461,7 @@ LaserParticleContainer::Evolve (int lev, update_laser_particle(pti, np, uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), wp.dataPtr(), amplitude_E.dataPtr(), dt); - BL_PROFILE_VAR_STOP(blp_pp); + WARPX_PROFILE_VAR_STOP(blp_pp); // // Current Deposition diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index fcc7343a6..550fdc825 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -59,7 +59,7 @@ WarpX::ExchangeWithPmlF (int lev) void WarpX::UpdateAuxilaryData () { - BL_PROFILE("UpdateAuxilaryData()"); + WARPX_PROFILE("UpdateAuxilaryData()"); if (Bfield_aux[0][0]->ixType() == Bfield_fp[0][0]->ixType()) { UpdateAuxilaryDataSameType(); @@ -550,7 +550,7 @@ WarpX::FillBoundaryAux (int lev, IntVect ng) void WarpX::SyncCurrent () { - BL_PROFILE("SyncCurrent()"); + WARPX_PROFILE("SyncCurrent()"); // Restrict fine patch current onto the coarse patch, before // summing the guard cells of the fine patch @@ -585,7 +585,7 @@ interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const std::array< amrex::MultiFab *, 3 > const & coarse, int const refinement_ratio) { - BL_PROFILE("interpolateCurrentFineToCoarse()"); + WARPX_PROFILE("interpolateCurrentFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch @@ -617,7 +617,7 @@ interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const void WarpX::SyncRho () { - BL_PROFILE("SyncRho()"); + WARPX_PROFILE("SyncRho()"); if (!rho_fp[0]) return; const int ncomp = rho_fp[0]->nComp(); @@ -643,7 +643,7 @@ WarpX::SyncRho () void interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio) { - BL_PROFILE("interpolateDensityFineToCoarse()"); + WARPX_PROFILE("interpolateDensityFineToCoarse()"); BL_ASSERT(refinement_ratio == 2); const IntVect& ng = (fine.nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch const int nc = fine.nComp(); diff --git a/Source/Parallelization/WarpXRegrid.cpp b/Source/Parallelization/WarpXRegrid.cpp index 54166e8ce..7b762606f 100644 --- a/Source/Parallelization/WarpXRegrid.cpp +++ b/Source/Parallelization/WarpXRegrid.cpp @@ -14,8 +14,8 @@ using namespace amrex; void WarpX::LoadBalance () { - BL_PROFILE_REGION("LoadBalance"); - BL_PROFILE("WarpX::LoadBalance()"); + WARPX_PROFILE_REGION("LoadBalance"); + WARPX_PROFILE("WarpX::LoadBalance()"); AMREX_ALWAYS_ASSERT(costs[0] != nullptr); diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp index 28bcbd220..ebb79787e 100644 --- a/Source/Particles/MultiParticleContainer.cpp +++ b/Source/Particles/MultiParticleContainer.cpp @@ -494,7 +494,7 @@ MultiParticleContainer Vector<WarpXParticleContainer::DiagnosticParticleData>& parts) const { - BL_PROFILE("MultiParticleContainer::GetLabFrameData"); + WARPX_PROFILE("MultiParticleContainer::GetLabFrameData"); // Loop over particle species for (int i = 0; i < nspecies_back_transformed_diagnostics; ++i){ @@ -639,7 +639,7 @@ MultiParticleContainer::getSpeciesID (std::string product_str) void MultiParticleContainer::doFieldIonization () { - BL_PROFILE("MPC::doFieldIonization"); + WARPX_PROFILE("MPC::doFieldIonization"); // Loop over all species. // Ionized particles in pc_source create particles in pc_product @@ -684,7 +684,7 @@ MultiParticleContainer::doFieldIonization () void MultiParticleContainer::doCoulombCollisions () { - BL_PROFILE("MPC::doCoulombCollisions"); + WARPX_PROFILE("MPC::doCoulombCollisions"); for (int i = 0; i < ncollisions; ++i) { diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index dd0693cb0..35a52107b 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -314,7 +314,7 @@ PhysicalParticleContainer::CheckAndAddParticle(Real x, Real y, Real z, void PhysicalParticleContainer::AddParticles (int lev) { - BL_PROFILE("PhysicalParticleContainer::AddParticles()"); + WARPX_PROFILE("PhysicalParticleContainer::AddParticles()"); if (plasma_injector->add_single_particle) { AddNParticles(lev, 1, @@ -361,7 +361,7 @@ PhysicalParticleContainer::AddParticles (int lev) void PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) { - BL_PROFILE("PhysicalParticleContainer::AddPlasma"); + WARPX_PROFILE("PhysicalParticleContainer::AddPlasma"); // If no part_realbox is provided, initialize particles in the whole domain const Geometry& geom = Geom(lev); @@ -924,7 +924,7 @@ PhysicalParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<Mul Vector<std::unique_ptr<MultiFab> >& rho, Real t, Real dt) { - BL_PROFILE("PPC::EvolveES()"); + WARPX_PROFILE("PPC::EvolveES()"); int num_levels = rho.size(); for (int lev = 0; lev < num_levels; ++lev) { @@ -1111,11 +1111,11 @@ PhysicalParticleContainer::Evolve (int lev, const MultiFab* cBx, const MultiFab* cBy, const MultiFab* cBz, Real t, Real dt, DtType a_dt_type) { - BL_PROFILE("PPC::Evolve()"); - BL_PROFILE_VAR_NS("PPC::Evolve::Copy", blp_copy); - BL_PROFILE_VAR_NS("PPC::FieldGather", blp_fg); - BL_PROFILE_VAR_NS("PPC::EvolveOpticalDepth", blp_ppc_qed_ev); - BL_PROFILE_VAR_NS("PPC::ParticlePush", blp_ppc_pp); + WARPX_PROFILE("PPC::Evolve()"); + WARPX_PROFILE_VAR_NS("PPC::Evolve::Copy", blp_copy); + WARPX_PROFILE_VAR_NS("PPC::FieldGather", blp_fg); + WARPX_PROFILE_VAR_NS("PPC::EvolveOpticalDepth", blp_ppc_qed_ev); + WARPX_PROFILE_VAR_NS("PPC::ParticlePush", blp_ppc_pp); const std::array<Real,3>& dx = WarpX::CellSize(lev); const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0)); @@ -1246,7 +1246,7 @@ PhysicalParticleContainer::Evolve (int lev, // // Field Gather of Aux Data (i.e., the full solution) // - BL_PROFILE_VAR_START(blp_fg); + WARPX_PROFILE_VAR_START(blp_fg); FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp, exfab, eyfab, ezfab, bxfab, byfab, bzfab, Ex.nGrow(), e_is_nodal, @@ -1289,23 +1289,23 @@ PhysicalParticleContainer::Evolve (int lev, lev, lev-1); } - BL_PROFILE_VAR_STOP(blp_fg); + WARPX_PROFILE_VAR_STOP(blp_fg); #ifdef WARPX_QED // //Evolve Optical Depth // - BL_PROFILE_VAR_START(blp_ppc_qed_ev); + WARPX_PROFILE_VAR_START(blp_ppc_qed_ev); EvolveOpticalDepth(pti, dt); - BL_PROFILE_VAR_STOP(blp_ppc_qed_ev); + WARPX_PROFILE_VAR_STOP(blp_ppc_qed_ev); #endif // // Particle Push // - BL_PROFILE_VAR_START(blp_ppc_pp); + WARPX_PROFILE_VAR_START(blp_ppc_pp); PushPX(pti, dt, a_dt_type); - BL_PROFILE_VAR_STOP(blp_ppc_pp); + WARPX_PROFILE_VAR_STOP(blp_ppc_pp); // // Current Deposition @@ -1797,7 +1797,7 @@ PhysicalParticleContainer::PushP (int lev, Real dt, const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez, const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz) { - BL_PROFILE("PhysicalParticleContainer::PushP"); + WARPX_PROFILE("PhysicalParticleContainer::PushP"); if (do_not_push) return; @@ -1950,7 +1950,7 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real const Real t_lab, const Real dt, DiagnosticParticles& diagnostic_particles) { - BL_PROFILE("PhysicalParticleContainer::GetParticleSlice"); + WARPX_PROFILE("PhysicalParticleContainer::GetParticleSlice"); // Assume that the boost in the positive z direction. #if (AMREX_SPACEDIM == 2) @@ -2331,7 +2331,7 @@ void PhysicalParticleContainer::InitIonizationModule () IonizationFilterFunc PhysicalParticleContainer::getIonizationFunc () { - BL_PROFILE("PPC::getIonizationFunc"); + WARPX_PROFILE("PPC::getIonizationFunc"); return IonizationFilterFunc{ionization_energies.dataPtr(), adk_prefactor.dataPtr(), diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp index 88259b3a4..c3152049a 100644 --- a/Source/Particles/RigidInjectedParticleContainer.cpp +++ b/Source/Particles/RigidInjectedParticleContainer.cpp @@ -381,7 +381,7 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt, const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez, const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz) { - BL_PROFILE("RigidInjectedParticleContainer::PushP"); + WARPX_PROFILE("RigidInjectedParticleContainer::PushP"); if (do_not_push) return; diff --git a/Source/Particles/Sorting/Partition.cpp b/Source/Particles/Sorting/Partition.cpp index c25c24d5d..3c0ad7965 100644 --- a/Source/Particles/Sorting/Partition.cpp +++ b/Source/Particles/Sorting/Partition.cpp @@ -46,7 +46,7 @@ PhysicalParticleContainer::PartitionParticlesInBuffers( iMultiFab const* gather_masks, RealVector& uxp, RealVector& uyp, RealVector& uzp, RealVector& wp) { - BL_PROFILE("PPC::Evolve::partition"); + WARPX_PROFILE("PPC::Evolve::partition"); // Initialize temporary arrays Gpu::DeviceVector<int> inexflag; diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 59c38dcac..d77e35b6b 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -237,8 +237,8 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0)); Real q = this->charge; - BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); - BL_PROFILE_VAR_NS("PPC::CurrentDeposition", blp_deposit); + WARPX_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); + WARPX_PROFILE_VAR_NS("PPC::CurrentDeposition", blp_deposit); // Get tile box where current is deposited. @@ -313,7 +313,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, } } - BL_PROFILE_VAR_START(blp_deposit); + WARPX_PROFILE_VAR_START(blp_deposit); if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) { if (WarpX::nox == 1){ doEsirkepovDepositionShapeN<1>( @@ -355,16 +355,16 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, xyzmin, lo, q); } } - BL_PROFILE_VAR_STOP(blp_deposit); + WARPX_PROFILE_VAR_STOP(blp_deposit); #ifndef AMREX_USE_GPU - BL_PROFILE_VAR_START(blp_accumulate); + WARPX_PROFILE_VAR_START(blp_accumulate); // CPU, tiling: atomicAdd local_jx into jx // (same for jx and jz) (*jx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, jx->nComp()); (*jy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, jy->nComp()); (*jz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, jz->nComp()); - BL_PROFILE_VAR_STOP(blp_accumulate); + WARPX_PROFILE_VAR_STOP(blp_accumulate); #endif } @@ -407,8 +407,8 @@ WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector& wp, const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0)); const Real q = this->charge; - BL_PROFILE_VAR_NS("PPC::ChargeDeposition", blp_ppc_chd); - BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); + WARPX_PROFILE_VAR_NS("PPC::ChargeDeposition", blp_ppc_chd); + WARPX_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); // Get tile box where charge is deposited. // The tile box is different when depositing in the buffers (depos_lev<lev) @@ -465,7 +465,7 @@ WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector& wp, // Indices of the lower bound const Dim3 lo = lbound(tilebox); - BL_PROFILE_VAR_START(blp_ppc_chd); + WARPX_PROFILE_VAR_START(blp_ppc_chd); if (WarpX::nox == 1){ doChargeDepositionShapeN<1>(GetPosition, wp.dataPtr()+offset, ion_lev, rho_arr, np_to_depose, dx, xyzmin, lo, q); @@ -476,14 +476,14 @@ WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector& wp, doChargeDepositionShapeN<3>(GetPosition, wp.dataPtr()+offset, ion_lev, rho_arr, np_to_depose, dx, xyzmin, lo, q); } - BL_PROFILE_VAR_STOP(blp_ppc_chd); + WARPX_PROFILE_VAR_STOP(blp_ppc_chd); #ifndef AMREX_USE_GPU - BL_PROFILE_VAR_START(blp_accumulate); + WARPX_PROFILE_VAR_START(blp_accumulate); (*rho)[pti].atomicAdd(local_rho[thread_num], tb, tb, 0, icomp*nc, nc); - BL_PROFILE_VAR_STOP(blp_accumulate); + WARPX_PROFILE_VAR_STOP(blp_accumulate); #endif } @@ -717,7 +717,7 @@ WarpXParticleContainer::PushX (amrex::Real dt) void WarpXParticleContainer::PushX (int lev, amrex::Real dt) { - BL_PROFILE("WPC::PushX()"); + WARPX_PROFILE("WPC::PushX()"); if (do_not_push) return; diff --git a/Source/Utils/Make.package b/Source/Utils/Make.package index 7e814ba89..022ff286a 100644 --- a/Source/Utils/Make.package +++ b/Source/Utils/Make.package @@ -12,6 +12,7 @@ CEXE_sources += WarpXAlgorithmSelection.cpp CEXE_headers += NCIGodfreyTables.H CEXE_headers += WarpX_Complex.H CEXE_headers += IonizationEnergiesTable.H +CEXE_headers += WarpXProfilerWrapper.H INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Utils VPATH_LOCATIONS += $(WARPX_HOME)/Source/Utils diff --git a/Source/Utils/WarpXMovingWindow.cpp b/Source/Utils/WarpXMovingWindow.cpp index 8f1666b2f..d7bdaf45f 100644 --- a/Source/Utils/WarpXMovingWindow.cpp +++ b/Source/Utils/WarpXMovingWindow.cpp @@ -235,7 +235,7 @@ WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir, IntVect ng_extra, amrex::Real external_field, bool useparser, ParserWrapper<3> *field_parser) { - BL_PROFILE("WarpX::shiftMF()"); + WARPX_PROFILE("WarpX::shiftMF()"); const BoxArray& ba = mf.boxArray(); const DistributionMapping& dm = mf.DistributionMap(); const int nc = mf.nComp(); diff --git a/Source/Utils/WarpXProfilerWrapper.H b/Source/Utils/WarpXProfilerWrapper.H new file mode 100644 index 000000000..6d18e22ba --- /dev/null +++ b/Source/Utils/WarpXProfilerWrapper.H @@ -0,0 +1,27 @@ +/* Copyright 2020 Axel Huebl, Maxence Thevenet + * + * This file is part of WarpX. + * + * License: BSD-3-Clause-LBNL + */ + +#ifndef WARPX_PROFILERWRAPPER_H_ +#define WARPX_PROFILERWRAPPER_H_ + +#include "AMReX_BLProfiler.H" +#include "AMReX_GpuDevice.H" + +static void doDeviceSynchronize(int do_device_synchronize) +{ + if ( do_device_synchronize ) + amrex::Gpu::synchronize(); +} + +#define WARPX_PROFILE(fname) doDeviceSynchronize(WarpX::do_device_synchronize_before_profile); BL_PROFILE(fname) +#define WARPX_PROFILE_VAR(fname, vname) doDeviceSynchronize(WarpX::do_device_synchronize_before_profile); BL_PROFILE_VAR(fname, vname) +#define WARPX_PROFILE_VAR_NS(fname, vname) doDeviceSynchronize(WarpX::do_device_synchronize_before_profile); BL_PROFILE_VAR_NS(fname, vname) +#define WARPX_PROFILE_VAR_START(vname) doDeviceSynchronize(WarpX::do_device_synchronize_before_profile); BL_PROFILE_VAR_START(vname) +#define WARPX_PROFILE_VAR_STOP(vname) doDeviceSynchronize(WarpX::do_device_synchronize_before_profile); BL_PROFILE_VAR_STOP(vname) +#define WARPX_PROFILE_REGION(rname) doDeviceSynchronize(WarpX::do_device_synchronize_before_profile); BL_PROFILE_REGION(rname) + +#endif // WARPX_PROFILERWRAPPER_H_ diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp index 1805bb4ed..63cf72aa2 100644 --- a/Source/Utils/WarpXUtil.cpp +++ b/Source/Utils/WarpXUtil.cpp @@ -126,7 +126,7 @@ void ConvertLabParamsToBoost() * zmin and zmax. */ void NullifyMF(amrex::MultiFab& mf, int lev, amrex::Real zmin, amrex::Real zmax){ - BL_PROFILE("WarpX::NullifyMF()"); + WARPX_PROFILE("WarpX::NullifyMF()"); #ifdef _OPENMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif diff --git a/Source/WarpX.H b/Source/WarpX.H index 9b456f663..0f11e6040 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -173,6 +173,7 @@ public: static int do_subcycling; + static bool do_device_synchronize_before_profile; static bool safe_guard_cells; // buffers diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 46f5ab731..d3f419d98 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -15,6 +15,7 @@ #include <WarpXUtil.H> #include <WarpXAlgorithmSelection.H> #include <WarpX_FDTD.H> +#include "WarpXProfilerWrapper.H" #include <AMReX_ParmParse.H> #include <AMReX_MultiFabUtil.H> @@ -144,6 +145,12 @@ int WarpX::n_current_deposition_buffer = -1; int WarpX::do_nodal = false; +#ifdef AMREX_USE_GPU +bool WarpX::do_device_synchronize_before_profile = true; +#else +bool WarpX::do_device_synchronize_before_profile = false; +#endif + WarpX* WarpX::m_instance = nullptr; WarpX& @@ -376,6 +383,8 @@ WarpX::ReadParameters () ReadBoostedFrameParameters(gamma_boost, beta_boost, boost_direction); + pp.query("do_device_synchronize_before_profile", do_device_synchronize_before_profile); + // pp.query returns 1 if argument zmax_plasma_to_compute_max_step is // specified by the user, 0 otherwise. do_compute_max_step_from_zmax = @@ -1109,7 +1118,7 @@ WarpX::RefRatio (int lev) void WarpX::Evolve (int numsteps) { - BL_PROFILE_REGION("WarpX::Evolve()"); + WARPX_PROFILE_REGION("WarpX::Evolve()"); #ifdef WARPX_DO_ELECTROSTATIC if (do_electrostatic) { diff --git a/Source/main.cpp b/Source/main.cpp index fc705bdf0..13a26e615 100644 --- a/Source/main.cpp +++ b/Source/main.cpp @@ -8,6 +8,7 @@ */ #include <WarpX.H> #include <WarpXUtil.H> +#include "WarpXProfilerWrapper.H" #include <AMReX.H> #include <AMReX_ParmParse.H> @@ -34,7 +35,7 @@ int main(int argc, char* argv[]) ConvertLabParamsToBoost(); - BL_PROFILE_VAR("main()", pmain); + WARPX_PROFILE_VAR("main()", pmain); const Real strt_total = amrex::second(); @@ -55,7 +56,7 @@ int main(int argc, char* argv[]) } } - BL_PROFILE_VAR_STOP(pmain); + WARPX_PROFILE_VAR_STOP(pmain); amrex::Finalize(); #if defined AMREX_USE_MPI |