35 files changed, 178 insertions, 117 deletions
diff --git a/Docs/source/running_cpp/parameters.rst b/Docs/source/running_cpp/parameters.rst
index 0121e7bf1..936dd262a 100644
--- a/Docs/source/running_cpp/parameters.rst
+++ b/Docs/source/running_cpp/parameters.rst
@@ -974,6 +974,10 @@ Numerics and algorithms
      value here will make the simulation unphysical, but will allow QED effects to become more apparent.
      Note that this option will only have an effect if the warpx.use_Hybrid_QED flag is also triggered.
 
+ * ``warpx.do_device_synchronize_before_profile`` (`bool`) optional (default `1`)
+    When running in an accelerated platform, whether to call a deviceSynchronize around profiling regions.
+    This allows the profiler to give meaningful timers, but (hardly) slows down the simulation.
+
  * ``warpx.sort_int`` (`int`) optional (defaults: ``-1`` on CPU; ``4`` on GPU)
      If ``<=0``, do not sort particles. If ``>0``, sort particles by bin every ``sort_int`` iteration.
      It is turned on on GPUs for performance reasons (to improve memory locality).
diff --git a/Source/BoundaryConditions/PML.H b/Source/BoundaryConditions/PML.H
index b8ed0ff7a..67ef80dfe 100644
--- a/Source/BoundaryConditions/PML.H
+++ b/Source/BoundaryConditions/PML.H
@@ -11,6 +11,8 @@
 #ifndef WARPX_PML_H_
 #define WARPX_PML_H_
 
+#include "WarpXProfilerWrapper.H"
+
 #include <AMReX_MultiFab.H>
 #include <AMReX_Geometry.H>
 
diff --git a/Source/BoundaryConditions/PML.cpp b/Source/BoundaryConditions/PML.cpp
index 0ec247ac4..615262cae 100644
--- a/Source/BoundaryConditions/PML.cpp
+++ b/Source/BoundaryConditions/PML.cpp
@@ -783,7 +783,7 @@ void
 PML::Exchange (MultiFab& pml, MultiFab& reg, const Geometry& geom,
                 int do_pml_in_domain)
 {
-    BL_PROFILE("PML::Exchange");
+    WARPX_PROFILE("PML::Exchange");
 
     const IntVect& ngr = reg.nGrowVect();
     const IntVect& ngp = pml.nGrowVect();
diff --git a/Source/BoundaryConditions/WarpXEvolvePML.cpp b/Source/BoundaryConditions/WarpXEvolvePML.cpp
index 24442e54e..0c140b4cc 100644
--- a/Source/BoundaryConditions/WarpXEvolvePML.cpp
+++ b/Source/BoundaryConditions/WarpXEvolvePML.cpp
@@ -43,7 +43,7 @@ WarpX::DampPML (int lev, PatchType patch_type)
 {
     if (!do_pml) return;
 
-    BL_PROFILE("WarpX::DampPML()");
+    WARPX_PROFILE("WarpX::DampPML()");
 
     if (pml[lev]->ok())
     {
@@ -165,7 +165,7 @@ WarpX::DampJPML (int lev, PatchType patch_type)
     if (!do_pml) return;
     if (!do_pml_j_damping) return;
 
-    BL_PROFILE("WarpX::DampJPML()");
+    WARPX_PROFILE("WarpX::DampJPML()");
 
     if (pml[lev]->ok())
     {
diff --git a/Source/Diagnostics/BackTransformedDiagnostic.cpp b/Source/Diagnostics/BackTransformedDiagnostic.cpp
index e458ee167..f4baea328 100644
--- a/Source/Diagnostics/BackTransformedDiagnostic.cpp
+++ b/Source/Diagnostics/BackTransformedDiagnostic.cpp
@@ -33,7 +33,7 @@ namespace
       Should be run only by the root process.
     */
     void output_create(const std::string& file_path) {
-        BL_PROFILE("output_create");
+        WARPX_PROFILE("output_create");
         hid_t file = H5Fcreate(file_path.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
         if (file < 0) {
             amrex::Abort("Error: could not create file at " + file_path);
@@ -84,7 +84,7 @@ namespace
     void output_write_metadata(const std::string& file_path,
                                const int istep, const Real time, const Real dt)
     {
-        BL_PROFILE("output_write_metadata");
+        WARPX_PROFILE("output_write_metadata");
         hid_t file = H5Fopen(file_path.c_str(), H5F_ACC_RDWR, H5P_DEFAULT);
 
         write_string_attribute(file, "software", "warpx");
@@ -120,7 +120,7 @@ namespace
     void output_create_field(const std::string& file_path, const std::string& field_path,
                              const unsigned nx, const unsigned ny, const unsigned nz)
     {
-        BL_PROFILE("output_create_field");
+        WARPX_PROFILE("output_create_field");
 
         // Open the output.
         hid_t file = H5Fopen(file_path.c_str(), H5F_ACC_RDWR, H5P_DEFAULT);
@@ -180,7 +180,7 @@ namespace
     long output_resize_particle_field(const std::string& file_path, const std::string& field_path,
                                       const long num_to_add)
     {
-        BL_PROFILE("output_resize_particle_field");
+        WARPX_PROFILE("output_resize_particle_field");
 
         // Open the output.
         hid_t file = H5Fopen(file_path.c_str(), H5F_ACC_RDWR, H5P_DEFAULT);
@@ -219,7 +219,7 @@ namespace
     void output_write_particle_field(const std::string& file_path, const std::string& field_path,
                                      const Real* data_ptr, const long count, const long index)
     {
-        BL_PROFILE("output_write_particle_field");
+        WARPX_PROFILE("output_write_particle_field");
 
         MPI_Comm comm = MPI_COMM_WORLD;
         MPI_Info info = MPI_INFO_NULL;
@@ -299,7 +299,7 @@ namespace
     */
     void output_create_particle_field(const std::string& file_path, const std::string& field_path)
     {
-        BL_PROFILE("output_create_particle_field");
+        WARPX_PROFILE("output_create_particle_field");
 
         MPI_Comm comm = MPI_COMM_WORLD;
         MPI_Info info = MPI_INFO_NULL;
@@ -350,7 +350,7 @@ namespace
                             const int lo_x, const int lo_y, const int lo_z)
     {
 
-        BL_PROFILE("output_write_field");
+        WARPX_PROFILE("output_write_field");
 
         MPI_Comm comm = MPI_COMM_WORLD;
         MPI_Info info = MPI_INFO_NULL;
@@ -539,7 +539,7 @@ BackTransformedDiagnostic(Real zmin_lab, Real zmax_lab, Real v_window_lab,
 {
 
 
-    BL_PROFILE("BackTransformedDiagnostic::BackTransformedDiagnostic");
+    WARPX_PROFILE("BackTransformedDiagnostic::BackTransformedDiagnostic");
 
     AMREX_ALWAYS_ASSERT(WarpX::do_back_transformed_fields or
                         WarpX::do_back_transformed_particles);
@@ -685,7 +685,7 @@ BackTransformedDiagnostic(Real zmin_lab, Real zmax_lab, Real v_window_lab,
 
 void BackTransformedDiagnostic::Flush(const Geometry& geom)
 {
-    BL_PROFILE("BackTransformedDiagnostic::Flush");
+    WARPX_PROFILE("BackTransformedDiagnostic::Flush");
 
     VisMF::Header::Version current_version = VisMF::GetHeaderVersion();
     VisMF::SetHeaderVersion(amrex::VisMF::Header::NoFabHeader_v1);
@@ -774,7 +774,7 @@ writeLabFrameData(const MultiFab* cell_centered_data,
                   const MultiParticleContainer& mypc,
                   const Geometry& geom, const Real t_boost, const Real dt) {
 
-    BL_PROFILE("BackTransformedDiagnostic::writeLabFrameData");
+    WARPX_PROFILE("BackTransformedDiagnostic::writeLabFrameData");
     VisMF::Header::Version current_version = VisMF::GetHeaderVersion();
     VisMF::SetHeaderVersion(amrex::VisMF::Header::NoFabHeader_v1);
 
@@ -1004,7 +1004,7 @@ BackTransformedDiagnostic::
 writeParticleData(const WarpXParticleContainer::DiagnosticParticleData& pdata,
                   const std::string& name, const int i_lab)
 {
-    BL_PROFILE("BackTransformedDiagnostic::writeParticleData");
+    WARPX_PROFILE("BackTransformedDiagnostic::writeParticleData");
 
     std::string field_name;
     std::ofstream ofs;
@@ -1053,7 +1053,7 @@ void
 BackTransformedDiagnostic::
 writeMetaData ()
 {
-    BL_PROFILE("BackTransformedDiagnostic::writeMetaData");
+    WARPX_PROFILE("BackTransformedDiagnostic::writeMetaData");
 
     if (ParallelDescriptor::IOProcessor()) {
         const std::string fullpath = WarpX::lab_data_directory + "/snapshots";
diff --git a/Source/Diagnostics/ElectrostaticIO.cpp b/Source/Diagnostics/ElectrostaticIO.cpp
index dfe04123d..a5b63c506 100644
--- a/Source/Diagnostics/ElectrostaticIO.cpp
+++ b/Source/Diagnostics/ElectrostaticIO.cpp
@@ -20,7 +20,7 @@ WritePlotFileES (const amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rho,
                  const amrex::Vector<std::unique_ptr<amrex::MultiFab> >& phi,
                  const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>, 3> >& E)
 {
-    BL_PROFILE("WarpX::WritePlotFileES()");
+    WARPX_PROFILE("WarpX::WritePlotFileES()");
 
     VisMF::Header::Version current_version = VisMF::GetHeaderVersion();
     VisMF::SetHeaderVersion(plotfile_headerversion);
diff --git a/Source/Diagnostics/FieldIO.cpp b/Source/Diagnostics/FieldIO.cpp
index c2248d3e2..b0c55053f 100644
--- a/Source/Diagnostics/FieldIO.cpp
+++ b/Source/Diagnostics/FieldIO.cpp
@@ -125,7 +125,7 @@ WriteOpenPMDFields( const std::string& filename,
                   const MultiFab& mf, const Geometry& geom,
                   const int iteration, const double time )
 {
-  BL_PROFILE("WriteOpenPMDFields()");
+  WARPX_PROFILE("WriteOpenPMDFields()");
 
   const int ncomp = mf.nComp();
 
diff --git a/Source/Diagnostics/ParticleIO.cpp b/Source/Diagnostics/ParticleIO.cpp
index ca9e86fdd..7b4d8897a 100644
--- a/Source/Diagnostics/ParticleIO.cpp
+++ b/Source/Diagnostics/ParticleIO.cpp
@@ -169,7 +169,7 @@ MultiParticleContainer::WriteHeader (std::ostream& os) const
 void
 PhysicalParticleContainer::ConvertUnits(ConvertDirection convert_direction)
 {
-    BL_PROFILE("PPC::ConvertUnits()");
+    WARPX_PROFILE("PPC::ConvertUnits()");
 
     // Compute conversion factor
     Real factor = 1;
diff --git a/Source/Diagnostics/WarpXIO.cpp b/Source/Diagnostics/WarpXIO.cpp
index 58fdccf22..adf9b409c 100644
--- a/Source/Diagnostics/WarpXIO.cpp
+++ b/Source/Diagnostics/WarpXIO.cpp
@@ -120,7 +120,7 @@ WarpX::WriteWarpXHeader(const std::string& name) const
 void
 WarpX::WriteCheckPointFile() const
 {
-    BL_PROFILE("WarpX::WriteCheckPointFile()");
+    WARPX_PROFILE("WarpX::WriteCheckPointFile()");
 
     VisMF::Header::Version current_version = VisMF::GetHeaderVersion();
     VisMF::SetHeaderVersion(checkpoint_headerversion);
@@ -203,7 +203,7 @@ WarpX::WriteCheckPointFile() const
 void
 WarpX::InitFromCheckpoint ()
 {
-    BL_PROFILE("WarpX::InitFromCheckpoint()");
+    WARPX_PROFILE("WarpX::InitFromCheckpoint()");
 
     amrex::Print() << "  Restart from checkpoint " << restart_chkfile << "\n";
 
@@ -421,7 +421,7 @@ WarpX::InitFromCheckpoint ()
 std::unique_ptr<MultiFab>
 WarpX::GetCellCenteredData() {
 
-    BL_PROFILE("WarpX::GetCellCenteredData");
+    WARPX_PROFILE("WarpX::GetCellCenteredData");
 
     const int ng =  1;
     const int nc = 10;
@@ -461,7 +461,7 @@ void
 WarpX::UpdateInSitu () const
 {
 #if defined(BL_USE_SENSEI_INSITU) || defined(AMREX_USE_ASCENT)
-    BL_PROFILE("WarpX::UpdateInSitu()");
+    WARPX_PROFILE("WarpX::UpdateInSitu()");
 
     // Average the fields from the simulation to the cell centers
     const int ngrow = 1;
@@ -535,7 +535,7 @@ WarpX::prepareFields(
 void
 WarpX::WriteOpenPMDFile () const
 {
-    BL_PROFILE("WarpX::WriteOpenPMDFile()");
+    WARPX_PROFILE("WarpX::WriteOpenPMDFile()");
 
 #ifdef WARPX_USE_OPENPMD
     const auto step = istep[0];
@@ -559,7 +559,7 @@ WarpX::WriteOpenPMDFile () const
 void
 WarpX::WritePlotFile () const
 {
-    BL_PROFILE("WarpX::WritePlotFile()");
+    WARPX_PROFILE("WarpX::WritePlotFile()");
 
     const auto step = istep[0];
     const std::string& plotfilename = amrex::Concatenate(plot_file,step);
diff --git a/Source/Diagnostics/WarpXOpenPMD.cpp b/Source/Diagnostics/WarpXOpenPMD.cpp
index cacf6367d..2d3e5940b 100644
--- a/Source/Diagnostics/WarpXOpenPMD.cpp
+++ b/Source/Diagnostics/WarpXOpenPMD.cpp
@@ -204,7 +204,7 @@ WarpXOpenPMDPlot::Init(openPMD::AccessType accessType)
 void
 WarpXOpenPMDPlot::WriteOpenPMDParticles(const std::unique_ptr<MultiParticleContainer>& mpc)
 {
-  BL_PROFILE("WarpXOpenPMDPlot::WriteOpenPMDParticles()");
+  WARPX_PROFILE("WarpXOpenPMDPlot::WriteOpenPMDParticles()");
   std::vector<std::string> species_names = mpc->GetSpeciesNames();
 
   for (unsigned i = 0, n = species_names.size(); i < n; ++i) {
@@ -541,7 +541,7 @@ WarpXOpenPMDPlot::WriteOpenPMDFields( //const std::string& filename,
                       const double time ) const
 {
   //This is AMReX's tiny profiler. Possibly will apply it later
-  BL_PROFILE("WarpXOpenPMDPlot::WriteOpenPMDFields()");
+  WARPX_PROFILE("WarpXOpenPMDPlot::WriteOpenPMDFields()");
 
   AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_Series != nullptr, "openPMD series must be initialized");
 
diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp
index f4cb61add..d17ec8694 100644
--- a/Source/Evolve/WarpXEvolveEM.cpp
+++ b/Source/Evolve/WarpXEvolveEM.cpp
@@ -30,7 +30,7 @@ using namespace amrex;
 void
 WarpX::EvolveEM (int numsteps)
 {
-    BL_PROFILE("WarpX::EvolveEM()");
+    WARPX_PROFILE("WarpX::EvolveEM()");
 
     Real cur_time = t_new[0];
     static int last_plot_file_step = 0;
diff --git a/Source/Evolve/WarpXEvolveES.cpp b/Source/Evolve/WarpXEvolveES.cpp
index 77e037154..ba2ab8ce5 100644
--- a/Source/Evolve/WarpXEvolveES.cpp
+++ b/Source/Evolve/WarpXEvolveES.cpp
@@ -21,7 +21,7 @@ WarpX::EvolveES (int numsteps) {
 
     amrex::Print() << "Running in electrostatic mode \n";
 
-    BL_PROFILE("WarpX::EvolveES()");
+    WARPX_PROFILE("WarpX::EvolveES()");
     Real cur_time = t_new[0];
     static int last_plot_file_step = 0;
     static int last_check_file_step = 0;
diff --git a/Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridSpectralSolver.cpp b/Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridSpectralSolver.cpp
index cb6b7794f..978129cf2 100644
--- a/Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridSpectralSolver.cpp
+++ b/Source/FieldSolver/PicsarHybridSpectralSolver/PicsarHybridSpectralSolver.cpp
@@ -376,12 +376,12 @@ WarpX::FreeFFT (int lev)
 void
 WarpX::PushPSATD_hybridFFT (int lev, amrex::Real /* dt */)
 {
-    BL_PROFILE_VAR_NS("WarpXFFT::CopyDualGrid", blp_copy);
-    BL_PROFILE_VAR_NS("PICSAR::FftPushEB", blp_push_eb);
+    WARPX_PROFILE_VAR_NS("WarpXFFT::CopyDualGrid", blp_copy);
+    WARPX_PROFILE_VAR_NS("PICSAR::FftPushEB", blp_push_eb);
 
     auto period_fp = geom[lev].periodicity();
 
-    BL_PROFILE_VAR_START(blp_copy);
+    WARPX_PROFILE_VAR_START(blp_copy);
     Efield_fp_fft[lev][0]->ParallelCopy(*Efield_fp[lev][0], 0, 0, 1, Efield_fp[lev][0]->nGrow(), 0, period_fp);
     Efield_fp_fft[lev][1]->ParallelCopy(*Efield_fp[lev][1], 0, 0, 1, Efield_fp[lev][1]->nGrow(), 0, period_fp);
     Efield_fp_fft[lev][2]->ParallelCopy(*Efield_fp[lev][2], 0, 0, 1, Efield_fp[lev][2]->nGrow(), 0, period_fp);
@@ -392,9 +392,9 @@ WarpX::PushPSATD_hybridFFT (int lev, amrex::Real /* dt */)
     current_fp_fft[lev][1]->ParallelCopy(*current_fp[lev][1], 0, 0, 1, current_fp[lev][1]->nGrow(), 0, period_fp);
     current_fp_fft[lev][2]->ParallelCopy(*current_fp[lev][2], 0, 0, 1, current_fp[lev][2]->nGrow(), 0, period_fp);
     rho_fp_fft[lev]->ParallelCopy(*rho_fp[lev], 0, 0, 2, rho_fp[lev]->nGrow(), 0, period_fp);
-    BL_PROFILE_VAR_STOP(blp_copy);
+    WARPX_PROFILE_VAR_STOP(blp_copy);
 
-    BL_PROFILE_VAR_START(blp_push_eb);
+    WARPX_PROFILE_VAR_START(blp_push_eb);
     if (Efield_fp_fft[lev][0]->local_size() == 1)
        //Only one FFT patch on this MPI
     {
@@ -435,16 +435,16 @@ WarpX::PushPSATD_hybridFFT (int lev, amrex::Real /* dt */)
     {
         amrex::Abort("WarpX::PushPSATD: TODO");
     }
-    BL_PROFILE_VAR_STOP(blp_push_eb);
+    WARPX_PROFILE_VAR_STOP(blp_push_eb);
 
-    BL_PROFILE_VAR_START(blp_copy);
+    WARPX_PROFILE_VAR_START(blp_copy);
     CopyDataFromFFTToValid(*Efield_fp[lev][0], *Efield_fp_fft[lev][0], ba_valid_fp_fft[lev], geom[lev]);
     CopyDataFromFFTToValid(*Efield_fp[lev][1], *Efield_fp_fft[lev][1], ba_valid_fp_fft[lev], geom[lev]);
     CopyDataFromFFTToValid(*Efield_fp[lev][2], *Efield_fp_fft[lev][2], ba_valid_fp_fft[lev], geom[lev]);
     CopyDataFromFFTToValid(*Bfield_fp[lev][0], *Bfield_fp_fft[lev][0], ba_valid_fp_fft[lev], geom[lev]);
     CopyDataFromFFTToValid(*Bfield_fp[lev][1], *Bfield_fp_fft[lev][1], ba_valid_fp_fft[lev], geom[lev]);
     CopyDataFromFFTToValid(*Bfield_fp[lev][2], *Bfield_fp_fft[lev][2], ba_valid_fp_fft[lev], geom[lev]);
-    BL_PROFILE_VAR_STOP(blp_copy);
+    WARPX_PROFILE_VAR_STOP(blp_copy);
 
     if (lev > 0)
     {
diff --git a/Source/FieldSolver/SpectralSolver/SpectralSolver.H b/Source/FieldSolver/SpectralSolver/SpectralSolver.H
index 65f975682..2f6428fc6 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralSolver.H
+++ b/Source/FieldSolver/SpectralSolver/SpectralSolver.H
@@ -40,10 +40,7 @@ class SpectralSolver
          *  (in the spectral field specified by `field_index`) */
         void ForwardTransform( const amrex::MultiFab& mf,
                                const int field_index,
-                               const int i_comp=0 ){
-            BL_PROFILE("SpectralSolver::ForwardTransform");
-            field_data.ForwardTransform( mf, field_index, i_comp );
-        };
+                               const int i_comp=0 );
 
         /**
          * \brief Transform spectral field specified by `field_index` back to
@@ -51,21 +48,12 @@ class SpectralSolver
          */
         void BackwardTransform( amrex::MultiFab& mf,
                                 const int field_index,
-                                const int i_comp=0 ){
-            BL_PROFILE("SpectralSolver::BackwardTransform");
-            field_data.BackwardTransform( mf, field_index, i_comp );
-        };
+                                const int i_comp=0 );
 
         /**
          * \brief Update the fields in spectral space, over one timestep
          */
-        void pushSpectralFields(){
-            BL_PROFILE("SpectralSolver::pushSpectralFields");
-            // Virtual function: the actual function used here depends
-            // on the sub-class of `SpectralBaseAlgorithm` that was
-            // initialized in the constructor of `SpectralSolver`
-            algorithm->pushSpectralFields( field_data );
-        };
+        void pushSpectralFields();
 
     private:
         SpectralFieldData field_data; // Store field in spectral space
diff --git a/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp b/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp
index c24a7af69..9df981528 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralSolver.cpp
@@ -9,7 +9,8 @@
 #include <PsatdAlgorithm.H>
 #include <GalileanAlgorithm.H>
 #include <PMLPsatdAlgorithm.H>
-
+#include "WarpX.H"
+#include "WarpXProfilerWrapper.H"
 
 /* \brief Initialize the spectral Maxwell solver
  *
@@ -62,4 +63,31 @@ SpectralSolver::SpectralSolver(
     field_data = SpectralFieldData( realspace_ba, k_space, dm,
             algorithm->getRequiredNumberOfFields() );
 
-};
+}
+
+void
+SpectralSolver::ForwardTransform( const amrex::MultiFab& mf,
+                                  const int field_index,
+                                  const int i_comp )
+{
+    WARPX_PROFILE("SpectralSolver::ForwardTransform");
+    field_data.ForwardTransform( mf, field_index, i_comp );
+}
+
+void
+SpectralSolver::BackwardTransform( amrex::MultiFab& mf,
+                                   const int field_index,
+                                   const int i_comp )
+{
+    WARPX_PROFILE("SpectralSolver::BackwardTransform");
+    field_data.BackwardTransform( mf, field_index, i_comp );
+}
+
+void
+SpectralSolver::pushSpectralFields(){
+    WARPX_PROFILE("SpectralSolver::pushSpectralFields");
+    // Virtual function: the actual function used here depends
+    // on the sub-class of `SpectralBaseAlgorithm` that was
+    // initialized in the constructor of `SpectralSolver`
+    algorithm->pushSpectralFields( field_data );
+}
diff --git a/Source/FieldSolver/WarpXPushFieldsEM.cpp b/Source/FieldSolver/WarpXPushFieldsEM.cpp
index 74db3cac4..dd9a41254 100644
--- a/Source/FieldSolver/WarpXPushFieldsEM.cpp
+++ b/Source/FieldSolver/WarpXPushFieldsEM.cpp
@@ -107,7 +107,7 @@ WarpX::EvolveB (amrex::Real a_dt)
 void
 WarpX::EvolveB (int lev, amrex::Real a_dt)
 {
-    BL_PROFILE("WarpX::EvolveB()");
+    WARPX_PROFILE("WarpX::EvolveB()");
     EvolveB(lev, PatchType::fine, a_dt);
     if (lev > 0)
     {
@@ -208,7 +208,7 @@ WarpX::EvolveE (amrex::Real a_dt)
 void
 WarpX::EvolveE (int lev, amrex::Real a_dt)
 {
-    BL_PROFILE("WarpX::EvolveE()");
+    WARPX_PROFILE("WarpX::EvolveE()");
     EvolveE(lev, PatchType::fine, a_dt);
     if (lev > 0)
     {
@@ -529,7 +529,7 @@ WarpX::EvolveF (int lev, PatchType patch_type, amrex::Real a_dt, DtType a_dt_typ
 {
     if (!do_dive_cleaning) return;
 
-    BL_PROFILE("WarpX::EvolveF()");
+    WARPX_PROFILE("WarpX::EvolveF()");
 
     static constexpr Real mu_c2 = PhysConst::mu0*PhysConst::c*PhysConst::c;
 
diff --git a/Source/FieldSolver/WarpX_QED_Field_Pushers.cpp b/Source/FieldSolver/WarpX_QED_Field_Pushers.cpp
index ef297e351..3e1034c99 100644
--- a/Source/FieldSolver/WarpX_QED_Field_Pushers.cpp
+++ b/Source/FieldSolver/WarpX_QED_Field_Pushers.cpp
@@ -48,7 +48,7 @@ WarpX::Hybrid_QED_Push (amrex::Vector<amrex::Real> dt)
 void
 WarpX::Hybrid_QED_Push (int lev, Real a_dt)
 {
-    BL_PROFILE("WarpX::Hybrid_QED_Push()");
+    WARPX_PROFILE("WarpX::Hybrid_QED_Push()");
     Hybrid_QED_Push(lev, PatchType::fine, a_dt);
     if (lev > 0)
     {
diff --git a/Source/Filter/BilinearFilter.cpp b/Source/Filter/BilinearFilter.cpp
index ba7bc30f5..67067bdc1 100644
--- a/Source/Filter/BilinearFilter.cpp
+++ b/Source/Filter/BilinearFilter.cpp
@@ -51,7 +51,7 @@ namespace {
 }
 
 void BilinearFilter::ComputeStencils(){
-    BL_PROFILE("BilinearFilter::ComputeStencils()");
+    WARPX_PROFILE("BilinearFilter::ComputeStencils()");
     stencil_length_each_dir = npass_each_dir;
     stencil_length_each_dir += 1.;
 #if (AMREX_SPACEDIM == 3)
diff --git a/Source/Filter/Filter.cpp b/Source/Filter/Filter.cpp
index dbe13747e..ed1aa383b 100644
--- a/Source/Filter/Filter.cpp
+++ b/Source/Filter/Filter.cpp
@@ -26,7 +26,7 @@ using namespace amrex;
 void
 Filter::ApplyStencil (MultiFab& dstmf, const MultiFab& srcmf, int scomp, int dcomp, int ncomp)
 {
-    BL_PROFILE("BilinearFilter::ApplyStencil(MultiFab)");
+    WARPX_PROFILE("BilinearFilter::ApplyStencil(MultiFab)");
     ncomp = std::min(ncomp, srcmf.nComp());
 
     for (MFIter mfi(dstmf); mfi.isValid(); ++mfi)
@@ -69,7 +69,7 @@ void
 Filter::ApplyStencil (FArrayBox& dstfab, const FArrayBox& srcfab,
                       const Box& tbx, int scomp, int dcomp, int ncomp)
 {
-    BL_PROFILE("BilinearFilter::ApplyStencil(FArrayBox)");
+    WARPX_PROFILE("BilinearFilter::ApplyStencil(FArrayBox)");
     ncomp = std::min(ncomp, srcfab.nComp());
     const auto& src = srcfab.array();
     const auto& dst = dstfab.array();
@@ -153,7 +153,7 @@ void Filter::DoFilter (const Box& tbx,
 void
 Filter::ApplyStencil (amrex::MultiFab& dstmf, const amrex::MultiFab& srcmf, int scomp, int dcomp, int ncomp)
 {
-    BL_PROFILE("BilinearFilter::ApplyStencil()");
+    WARPX_PROFILE("BilinearFilter::ApplyStencil()");
     ncomp = std::min(ncomp, srcmf.nComp());
 #ifdef _OPENMP
 #pragma omp parallel
@@ -189,7 +189,7 @@ void
 Filter::ApplyStencil (amrex::FArrayBox& dstfab, const amrex::FArrayBox& srcfab,
                       const amrex::Box& tbx, int scomp, int dcomp, int ncomp)
 {
-    BL_PROFILE("BilinearFilter::ApplyStencil(FArrayBox)");
+    WARPX_PROFILE("BilinearFilter::ApplyStencil(FArrayBox)");
     ncomp = std::min(ncomp, srcfab.nComp());
     FArrayBox tmpfab;
     const Box& gbx = amrex::grow(tbx,stencil_length_each_dir-1);
diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp
index 66100eb16..c82244d63 100644
--- a/Source/Initialization/WarpXInitData.cpp
+++ b/Source/Initialization/WarpXInitData.cpp
@@ -26,7 +26,7 @@ using namespace amrex;
 void
 WarpX::InitData ()
 {
-    BL_PROFILE("WarpX::InitData()");
+    WARPX_PROFILE("WarpX::InitData()");
 
     if (restart_chkfile.empty())
     {
diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp
index 1d0857a45..8c4273e1f 100644
--- a/Source/Laser/LaserParticleContainer.cpp
+++ b/Source/Laser/LaserParticleContainer.cpp
@@ -380,11 +380,11 @@ LaserParticleContainer::Evolve (int lev,
                                 const MultiFab*, const MultiFab*, const MultiFab*,
                                 Real t, Real dt, DtType a_dt_type)
 {
-    BL_PROFILE("Laser::Evolve()");
-    BL_PROFILE_VAR_NS("Laser::Evolve::Copy", blp_copy);
-    BL_PROFILE_VAR_NS("Laser::ParticlePush", blp_pp);
-    BL_PROFILE_VAR_NS("Laser::CurrentDepo", blp_cd);
-    BL_PROFILE_VAR_NS("Laser::Evolve::Accumulate", blp_accumulate);
+    WARPX_PROFILE("Laser::Evolve()");
+    WARPX_PROFILE_VAR_NS("Laser::Evolve::Copy", blp_copy);
+    WARPX_PROFILE_VAR_NS("Laser::ParticlePush", blp_pp);
+    WARPX_PROFILE_VAR_NS("Laser::CurrentDepo", blp_cd);
+    WARPX_PROFILE_VAR_NS("Laser::Evolve::Accumulate", blp_accumulate);
 
     Real t_lab = t;
     if (WarpX::gamma_boost > 1) {
@@ -445,7 +445,7 @@ LaserParticleContainer::Evolve (int lev,
             //
             // Particle Push
             //
-            BL_PROFILE_VAR_START(blp_pp);
+            WARPX_PROFILE_VAR_START(blp_pp);
             // Find the coordinates of the particles in the emission plane
             calculate_laser_plane_coordinates(pti, np,
                                               plane_Xp.dataPtr(),
@@ -461,7 +461,7 @@ LaserParticleContainer::Evolve (int lev,
             update_laser_particle(pti, np, uxp.dataPtr(), uyp.dataPtr(),
                                   uzp.dataPtr(), wp.dataPtr(),
                                   amplitude_E.dataPtr(), dt);
-            BL_PROFILE_VAR_STOP(blp_pp);
+            WARPX_PROFILE_VAR_STOP(blp_pp);
 
             //
             // Current Deposition
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index fcc7343a6..550fdc825 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -59,7 +59,7 @@ WarpX::ExchangeWithPmlF (int lev)
 void
 WarpX::UpdateAuxilaryData ()
 {
-    BL_PROFILE("UpdateAuxilaryData()");
+    WARPX_PROFILE("UpdateAuxilaryData()");
 
     if (Bfield_aux[0][0]->ixType() == Bfield_fp[0][0]->ixType()) {
         UpdateAuxilaryDataSameType();
@@ -550,7 +550,7 @@ WarpX::FillBoundaryAux (int lev, IntVect ng)
 void
 WarpX::SyncCurrent ()
 {
-    BL_PROFILE("SyncCurrent()");
+    WARPX_PROFILE("SyncCurrent()");
 
     // Restrict fine patch current onto the coarse patch, before
     // summing the guard cells of the fine patch
@@ -585,7 +585,7 @@ interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const
                                  std::array< amrex::MultiFab       *, 3 > const & coarse,
                                  int const refinement_ratio)
 {
-    BL_PROFILE("interpolateCurrentFineToCoarse()");
+    WARPX_PROFILE("interpolateCurrentFineToCoarse()");
     BL_ASSERT(refinement_ratio == 2);
     const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch
 
@@ -617,7 +617,7 @@ interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const
 void
 WarpX::SyncRho ()
 {
-    BL_PROFILE("SyncRho()");
+    WARPX_PROFILE("SyncRho()");
 
     if (!rho_fp[0]) return;
     const int ncomp = rho_fp[0]->nComp();
@@ -643,7 +643,7 @@ WarpX::SyncRho ()
 void
 interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio)
 {
-    BL_PROFILE("interpolateDensityFineToCoarse()");
+    WARPX_PROFILE("interpolateDensityFineToCoarse()");
     BL_ASSERT(refinement_ratio == 2);
     const IntVect& ng = (fine.nGrowVect() + 1) / refinement_ratio;  // add equivalent no. of guards to coarse patch
     const int nc = fine.nComp();
diff --git a/Source/Parallelization/WarpXRegrid.cpp b/Source/Parallelization/WarpXRegrid.cpp
index 54166e8ce..7b762606f 100644
--- a/Source/Parallelization/WarpXRegrid.cpp
+++ b/Source/Parallelization/WarpXRegrid.cpp
@@ -14,8 +14,8 @@ using namespace amrex;
 void
 WarpX::LoadBalance ()
 {
-    BL_PROFILE_REGION("LoadBalance");
-    BL_PROFILE("WarpX::LoadBalance()");
+    WARPX_PROFILE_REGION("LoadBalance");
+    WARPX_PROFILE("WarpX::LoadBalance()");
 
     AMREX_ALWAYS_ASSERT(costs[0] != nullptr);
 
diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp
index 28bcbd220..ebb79787e 100644
--- a/Source/Particles/MultiParticleContainer.cpp
+++ b/Source/Particles/MultiParticleContainer.cpp
@@ -494,7 +494,7 @@ MultiParticleContainer
                    Vector<WarpXParticleContainer::DiagnosticParticleData>& parts) const
 {
 
-    BL_PROFILE("MultiParticleContainer::GetLabFrameData");
+    WARPX_PROFILE("MultiParticleContainer::GetLabFrameData");
 
     // Loop over particle species
     for (int i = 0; i < nspecies_back_transformed_diagnostics; ++i){
@@ -639,7 +639,7 @@ MultiParticleContainer::getSpeciesID (std::string product_str)
 void
 MultiParticleContainer::doFieldIonization ()
 {
-    BL_PROFILE("MPC::doFieldIonization");
+    WARPX_PROFILE("MPC::doFieldIonization");
 
     // Loop over all species.
     // Ionized particles in pc_source create particles in pc_product
@@ -684,7 +684,7 @@ MultiParticleContainer::doFieldIonization ()
 void
 MultiParticleContainer::doCoulombCollisions ()
 {
-    BL_PROFILE("MPC::doCoulombCollisions");
+    WARPX_PROFILE("MPC::doCoulombCollisions");
 
     for (int i = 0; i < ncollisions; ++i)
     {
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index dd0693cb0..35a52107b 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -314,7 +314,7 @@ PhysicalParticleContainer::CheckAndAddParticle(Real x, Real y, Real z,
 void
 PhysicalParticleContainer::AddParticles (int lev)
 {
-    BL_PROFILE("PhysicalParticleContainer::AddParticles()");
+    WARPX_PROFILE("PhysicalParticleContainer::AddParticles()");
 
     if (plasma_injector->add_single_particle) {
         AddNParticles(lev, 1,
@@ -361,7 +361,7 @@ PhysicalParticleContainer::AddParticles (int lev)
 void
 PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
 {
-    BL_PROFILE("PhysicalParticleContainer::AddPlasma");
+    WARPX_PROFILE("PhysicalParticleContainer::AddPlasma");
 
     // If no part_realbox is provided, initialize particles in the whole domain
     const Geometry& geom = Geom(lev);
@@ -924,7 +924,7 @@ PhysicalParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<Mul
                                      Vector<std::unique_ptr<MultiFab> >& rho,
                                      Real t, Real dt)
 {
-    BL_PROFILE("PPC::EvolveES()");
+    WARPX_PROFILE("PPC::EvolveES()");
 
     int num_levels = rho.size();
     for (int lev = 0; lev < num_levels; ++lev) {
@@ -1111,11 +1111,11 @@ PhysicalParticleContainer::Evolve (int lev,
                                    const MultiFab* cBx, const MultiFab* cBy, const MultiFab* cBz,
                                    Real t, Real dt, DtType a_dt_type)
 {
-    BL_PROFILE("PPC::Evolve()");
-    BL_PROFILE_VAR_NS("PPC::Evolve::Copy", blp_copy);
-    BL_PROFILE_VAR_NS("PPC::FieldGather", blp_fg);
-    BL_PROFILE_VAR_NS("PPC::EvolveOpticalDepth", blp_ppc_qed_ev);
-    BL_PROFILE_VAR_NS("PPC::ParticlePush", blp_ppc_pp);
+    WARPX_PROFILE("PPC::Evolve()");
+    WARPX_PROFILE_VAR_NS("PPC::Evolve::Copy", blp_copy);
+    WARPX_PROFILE_VAR_NS("PPC::FieldGather", blp_fg);
+    WARPX_PROFILE_VAR_NS("PPC::EvolveOpticalDepth", blp_ppc_qed_ev);
+    WARPX_PROFILE_VAR_NS("PPC::ParticlePush", blp_ppc_pp);
 
     const std::array<Real,3>& dx = WarpX::CellSize(lev);
     const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0));
@@ -1246,7 +1246,7 @@ PhysicalParticleContainer::Evolve (int lev,
                 //
                 // Field Gather of Aux Data (i.e., the full solution)
                 //
-                BL_PROFILE_VAR_START(blp_fg);
+                WARPX_PROFILE_VAR_START(blp_fg);
                 FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp,
                             exfab, eyfab, ezfab, bxfab, byfab, bzfab,
                             Ex.nGrow(), e_is_nodal,
@@ -1289,23 +1289,23 @@ PhysicalParticleContainer::Evolve (int lev,
                                 lev, lev-1);
                 }
 
-                BL_PROFILE_VAR_STOP(blp_fg);
+                WARPX_PROFILE_VAR_STOP(blp_fg);
 
 #ifdef WARPX_QED
                 //
                 //Evolve Optical Depth
                 //
-                BL_PROFILE_VAR_START(blp_ppc_qed_ev);
+                WARPX_PROFILE_VAR_START(blp_ppc_qed_ev);
                 EvolveOpticalDepth(pti, dt);
-                BL_PROFILE_VAR_STOP(blp_ppc_qed_ev);
+                WARPX_PROFILE_VAR_STOP(blp_ppc_qed_ev);
 #endif
 
                 //
                 // Particle Push
                 //
-                BL_PROFILE_VAR_START(blp_ppc_pp);
+                WARPX_PROFILE_VAR_START(blp_ppc_pp);
                 PushPX(pti, dt, a_dt_type);
-                BL_PROFILE_VAR_STOP(blp_ppc_pp);
+                WARPX_PROFILE_VAR_STOP(blp_ppc_pp);
 
                 //
                 // Current Deposition
@@ -1797,7 +1797,7 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
                                   const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
                                   const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz)
 {
-    BL_PROFILE("PhysicalParticleContainer::PushP");
+    WARPX_PROFILE("PhysicalParticleContainer::PushP");
 
     if (do_not_push) return;
 
@@ -1950,7 +1950,7 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real
                                                  const Real t_lab, const Real dt,
                                                  DiagnosticParticles& diagnostic_particles)
 {
-    BL_PROFILE("PhysicalParticleContainer::GetParticleSlice");
+    WARPX_PROFILE("PhysicalParticleContainer::GetParticleSlice");
 
     // Assume that the boost in the positive z direction.
 #if (AMREX_SPACEDIM == 2)
@@ -2331,7 +2331,7 @@ void PhysicalParticleContainer::InitIonizationModule ()
 IonizationFilterFunc
 PhysicalParticleContainer::getIonizationFunc ()
 {
-    BL_PROFILE("PPC::getIonizationFunc");
+    WARPX_PROFILE("PPC::getIonizationFunc");
 
     return IonizationFilterFunc{ionization_energies.dataPtr(),
                                 adk_prefactor.dataPtr(),
diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp
index 88259b3a4..c3152049a 100644
--- a/Source/Particles/RigidInjectedParticleContainer.cpp
+++ b/Source/Particles/RigidInjectedParticleContainer.cpp
@@ -381,7 +381,7 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
                                        const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
                                        const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz)
 {
-    BL_PROFILE("RigidInjectedParticleContainer::PushP");
+    WARPX_PROFILE("RigidInjectedParticleContainer::PushP");
 
     if (do_not_push) return;
 
diff --git a/Source/Particles/Sorting/Partition.cpp b/Source/Particles/Sorting/Partition.cpp
index c25c24d5d..3c0ad7965 100644
--- a/Source/Particles/Sorting/Partition.cpp
+++ b/Source/Particles/Sorting/Partition.cpp
@@ -46,7 +46,7 @@ PhysicalParticleContainer::PartitionParticlesInBuffers(
     iMultiFab const* gather_masks,
     RealVector& uxp, RealVector& uyp, RealVector& uzp, RealVector& wp)
 {
-    BL_PROFILE("PPC::Evolve::partition");
+    WARPX_PROFILE("PPC::Evolve::partition");
 
     // Initialize temporary arrays
     Gpu::DeviceVector<int> inexflag;
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 59c38dcac..d77e35b6b 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -237,8 +237,8 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
     const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0));
     Real q = this->charge;
 
-    BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
-    BL_PROFILE_VAR_NS("PPC::CurrentDeposition", blp_deposit);
+    WARPX_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
+    WARPX_PROFILE_VAR_NS("PPC::CurrentDeposition", blp_deposit);
 
 
     // Get tile box where current is deposited.
@@ -313,7 +313,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
         }
     }
 
-    BL_PROFILE_VAR_START(blp_deposit);
+    WARPX_PROFILE_VAR_START(blp_deposit);
     if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) {
         if        (WarpX::nox == 1){
             doEsirkepovDepositionShapeN<1>(
@@ -355,16 +355,16 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
                 xyzmin, lo, q);
         }
     }
-    BL_PROFILE_VAR_STOP(blp_deposit);
+    WARPX_PROFILE_VAR_STOP(blp_deposit);
 
 #ifndef AMREX_USE_GPU
-    BL_PROFILE_VAR_START(blp_accumulate);
+    WARPX_PROFILE_VAR_START(blp_accumulate);
     // CPU, tiling: atomicAdd local_jx into jx
     // (same for jx and jz)
     (*jx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, jx->nComp());
     (*jy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, jy->nComp());
     (*jz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, jz->nComp());
-    BL_PROFILE_VAR_STOP(blp_accumulate);
+    WARPX_PROFILE_VAR_STOP(blp_accumulate);
 #endif
 }
 
@@ -407,8 +407,8 @@ WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector& wp,
     const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0));
     const Real q = this->charge;
 
-    BL_PROFILE_VAR_NS("PPC::ChargeDeposition", blp_ppc_chd);
-    BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
+    WARPX_PROFILE_VAR_NS("PPC::ChargeDeposition", blp_ppc_chd);
+    WARPX_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
 
     // Get tile box where charge is deposited.
     // The tile box is different when depositing in the buffers (depos_lev<lev)
@@ -465,7 +465,7 @@ WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector& wp,
     // Indices of the lower bound
     const Dim3 lo = lbound(tilebox);
 
-    BL_PROFILE_VAR_START(blp_ppc_chd);
+    WARPX_PROFILE_VAR_START(blp_ppc_chd);
     if        (WarpX::nox == 1){
         doChargeDepositionShapeN<1>(GetPosition, wp.dataPtr()+offset, ion_lev,
                                     rho_arr, np_to_depose, dx, xyzmin, lo, q);
@@ -476,14 +476,14 @@ WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector& wp,
         doChargeDepositionShapeN<3>(GetPosition, wp.dataPtr()+offset, ion_lev,
                                     rho_arr, np_to_depose, dx, xyzmin, lo, q);
     }
-    BL_PROFILE_VAR_STOP(blp_ppc_chd);
+    WARPX_PROFILE_VAR_STOP(blp_ppc_chd);
 
 #ifndef AMREX_USE_GPU
-    BL_PROFILE_VAR_START(blp_accumulate);
+    WARPX_PROFILE_VAR_START(blp_accumulate);
 
     (*rho)[pti].atomicAdd(local_rho[thread_num], tb, tb, 0, icomp*nc, nc);
 
-    BL_PROFILE_VAR_STOP(blp_accumulate);
+    WARPX_PROFILE_VAR_STOP(blp_accumulate);
 #endif
 }
 
@@ -717,7 +717,7 @@ WarpXParticleContainer::PushX (amrex::Real dt)
 void
 WarpXParticleContainer::PushX (int lev, amrex::Real dt)
 {
-    BL_PROFILE("WPC::PushX()");
+    WARPX_PROFILE("WPC::PushX()");
 
     if (do_not_push) return;
 
diff --git a/Source/Utils/Make.package b/Source/Utils/Make.package
index 7e814ba89..022ff286a 100644
--- a/Source/Utils/Make.package
+++ b/Source/Utils/Make.package
@@ -12,6 +12,7 @@ CEXE_sources += WarpXAlgorithmSelection.cpp
 CEXE_headers += NCIGodfreyTables.H
 CEXE_headers += WarpX_Complex.H
 CEXE_headers += IonizationEnergiesTable.H
+CEXE_headers += WarpXProfilerWrapper.H
 
 INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Utils
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Utils
diff --git a/Source/Utils/WarpXMovingWindow.cpp b/Source/Utils/WarpXMovingWindow.cpp
index 8f1666b2f..d7bdaf45f 100644
--- a/Source/Utils/WarpXMovingWindow.cpp
+++ b/Source/Utils/WarpXMovingWindow.cpp
@@ -235,7 +235,7 @@ WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir,
                 IntVect ng_extra, amrex::Real external_field, bool useparser,
                 ParserWrapper<3> *field_parser)
 {
-    BL_PROFILE("WarpX::shiftMF()");
+    WARPX_PROFILE("WarpX::shiftMF()");
     const BoxArray& ba = mf.boxArray();
     const DistributionMapping& dm = mf.DistributionMap();
     const int nc = mf.nComp();
diff --git a/Source/Utils/WarpXProfilerWrapper.H b/Source/Utils/WarpXProfilerWrapper.H
new file mode 100644
index 000000000..6d18e22ba
--- /dev/null
+++ b/Source/Utils/WarpXProfilerWrapper.H
@@ -0,0 +1,27 @@
+/* Copyright 2020 Axel Huebl, Maxence Thevenet
+ *
+ * This file is part of WarpX.
+ *
+ * License: BSD-3-Clause-LBNL
+ */
+
+#ifndef WARPX_PROFILERWRAPPER_H_
+#define WARPX_PROFILERWRAPPER_H_
+
+#include "AMReX_BLProfiler.H"
+#include "AMReX_GpuDevice.H"
+
+static void doDeviceSynchronize(int do_device_synchronize)
+{
+    if ( do_device_synchronize )
+        amrex::Gpu::synchronize();
+}
+
+#define WARPX_PROFILE(fname) doDeviceSynchronize(WarpX::do_device_synchronize_before_profile); BL_PROFILE(fname)
+#define WARPX_PROFILE_VAR(fname, vname) doDeviceSynchronize(WarpX::do_device_synchronize_before_profile); BL_PROFILE_VAR(fname, vname)
+#define WARPX_PROFILE_VAR_NS(fname, vname) doDeviceSynchronize(WarpX::do_device_synchronize_before_profile); BL_PROFILE_VAR_NS(fname, vname)
+#define WARPX_PROFILE_VAR_START(vname) doDeviceSynchronize(WarpX::do_device_synchronize_before_profile); BL_PROFILE_VAR_START(vname)
+#define WARPX_PROFILE_VAR_STOP(vname) doDeviceSynchronize(WarpX::do_device_synchronize_before_profile); BL_PROFILE_VAR_STOP(vname)
+#define WARPX_PROFILE_REGION(rname) doDeviceSynchronize(WarpX::do_device_synchronize_before_profile); BL_PROFILE_REGION(rname)
+
+#endif // WARPX_PROFILERWRAPPER_H_
diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp
index 1805bb4ed..63cf72aa2 100644
--- a/Source/Utils/WarpXUtil.cpp
+++ b/Source/Utils/WarpXUtil.cpp
@@ -126,7 +126,7 @@ void ConvertLabParamsToBoost()
  * zmin and zmax.
  */
 void NullifyMF(amrex::MultiFab& mf, int lev, amrex::Real zmin, amrex::Real zmax){
-    BL_PROFILE("WarpX::NullifyMF()");
+    WARPX_PROFILE("WarpX::NullifyMF()");
 #ifdef _OPENMP
 #pragma omp parallel if (Gpu::notInLaunchRegion())
 #endif
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 9b456f663..0f11e6040 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -173,6 +173,7 @@ public:
 
     static int do_subcycling;
 
+    static bool do_device_synchronize_before_profile;
     static bool safe_guard_cells;
 
     // buffers
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 46f5ab731..d3f419d98 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -15,6 +15,7 @@
 #include <WarpXUtil.H>
 #include <WarpXAlgorithmSelection.H>
 #include <WarpX_FDTD.H>
+#include "WarpXProfilerWrapper.H"
 
 #include <AMReX_ParmParse.H>
 #include <AMReX_MultiFabUtil.H>
@@ -144,6 +145,12 @@ int WarpX::n_current_deposition_buffer = -1;
 
 int WarpX::do_nodal = false;
 
+#ifdef AMREX_USE_GPU
+bool WarpX::do_device_synchronize_before_profile = true;
+#else
+bool WarpX::do_device_synchronize_before_profile = false;
+#endif
+
 WarpX* WarpX::m_instance = nullptr;
 
 WarpX&
@@ -376,6 +383,8 @@ WarpX::ReadParameters ()
 
         ReadBoostedFrameParameters(gamma_boost, beta_boost, boost_direction);
 
+        pp.query("do_device_synchronize_before_profile", do_device_synchronize_before_profile);
+
         // pp.query returns 1 if argument zmax_plasma_to_compute_max_step is
         // specified by the user, 0 otherwise.
         do_compute_max_step_from_zmax =
@@ -1109,7 +1118,7 @@ WarpX::RefRatio (int lev)
 
 void
 WarpX::Evolve (int numsteps) {
-    BL_PROFILE_REGION("WarpX::Evolve()");
+    WARPX_PROFILE_REGION("WarpX::Evolve()");
 
 #ifdef WARPX_DO_ELECTROSTATIC
     if (do_electrostatic) {
diff --git a/Source/main.cpp b/Source/main.cpp
index fc705bdf0..13a26e615 100644
--- a/Source/main.cpp
+++ b/Source/main.cpp
@@ -8,6 +8,7 @@
  */
 #include <WarpX.H>
 #include <WarpXUtil.H>
+#include "WarpXProfilerWrapper.H"
 
 #include <AMReX.H>
 #include <AMReX_ParmParse.H>
@@ -34,7 +35,7 @@ int main(int argc, char* argv[])
 
     ConvertLabParamsToBoost();
 
-    BL_PROFILE_VAR("main()", pmain);
+    WARPX_PROFILE_VAR("main()", pmain);
 
     const Real strt_total = amrex::second();
 
@@ -55,7 +56,7 @@ int main(int argc, char* argv[])
         }
     }
 
-    BL_PROFILE_VAR_STOP(pmain);
+    WARPX_PROFILE_VAR_STOP(pmain);
 
     amrex::Finalize();
 #if defined AMREX_USE_MPI