7 files changed, 319 insertions, 26 deletions
diff --git a/Source/Diagnostics/ReducedDiags/CMakeLists.txt b/Source/Diagnostics/ReducedDiags/CMakeLists.txt
index 559cac5fb..e823bcffe 100644
--- a/Source/Diagnostics/ReducedDiags/CMakeLists.txt
+++ b/Source/Diagnostics/ReducedDiags/CMakeLists.txt
@@ -7,4 +7,5 @@ target_sources(WarpX
     ParticleEnergy.cpp
     ParticleHistogram.cpp
     ReducedDiags.cpp
+    FieldMaximum.cpp
 )
diff --git a/Source/Diagnostics/ReducedDiags/FieldEnergy.cpp b/Source/Diagnostics/ReducedDiags/FieldEnergy.cpp
index c70711d66..82a633681 100644
--- a/Source/Diagnostics/ReducedDiags/FieldEnergy.cpp
+++ b/Source/Diagnostics/ReducedDiags/FieldEnergy.cpp
@@ -35,8 +35,9 @@ FieldEnergy::FieldEnergy (std::string rd_name)
     pp.query("max_level", nLevel);
     nLevel += 1;
 
+    constexpr int noutputs = 3; // total energy, E-field energy and B-field energy
     // resize data array
-    m_data.resize(3*nLevel,0.0);
+    m_data.resize(noutputs*nLevel,0.0);
 
     if (ParallelDescriptor::IOProcessor())
     {
@@ -51,16 +52,19 @@ FieldEnergy::FieldEnergy (std::string rd_name)
             ofs << "[1]step()";
             ofs << m_sep;
             ofs << "[2]time(s)";
+            constexpr int shift_total = 3;
+            constexpr int shift_E = 4;
+            constexpr int shift_B = 5;
             for (int lev = 0; lev < nLevel; ++lev)
             {
                 ofs << m_sep;
-                ofs << "[" + std::to_string(3+3*lev) + "]";
+                ofs << "[" + std::to_string(shift_total+noutputs*lev) + "]";
                 ofs << "total_lev"+std::to_string(lev)+"(J)";
                 ofs << m_sep;
-                ofs << "[" + std::to_string(4+3*lev) + "]";
+                ofs << "[" + std::to_string(shift_E+noutputs*lev) + "]";
                 ofs << "E_lev"+std::to_string(lev)+"(J)";
                 ofs << m_sep;
-                ofs << "[" + std::to_string(5+3*lev) + "]";
+                ofs << "[" + std::to_string(shift_B+noutputs*lev) + "]";
                 ofs << "B_lev"+std::to_string(lev)+"(J)";
             }
             ofs << std::endl;
@@ -83,7 +87,7 @@ void FieldEnergy::ComputeDiags (int step)
     auto & warpx = WarpX::GetInstance();
 
     // get number of level
-    auto nLevel = warpx.finestLevel() + 1;
+    const auto nLevel = warpx.finestLevel() + 1;
 
     // loop over refinement levels
     for (int lev = 0; lev < nLevel; ++lev)
@@ -106,21 +110,27 @@ void FieldEnergy::ComputeDiags (int step)
 #endif
 
         // compute E squared
-        Real tmpx = Ex.norm2(0,geom.periodicity());
-        Real tmpy = Ey.norm2(0,geom.periodicity());
-        Real tmpz = Ez.norm2(0,geom.periodicity());
-        Real Es = tmpx*tmpx + tmpy*tmpy + tmpz*tmpz;
+        Real const tmpEx = Ex.norm2(0,geom.periodicity());
+        Real const tmpEy = Ey.norm2(0,geom.periodicity());
+        Real const tmpEz = Ez.norm2(0,geom.periodicity());
+        Real const Es = tmpEx*tmpEx + tmpEy*tmpEy + tmpEz*tmpEz;
 
         // compute B squared
-        tmpx = Bx.norm2(0,geom.periodicity());
-        tmpy = By.norm2(0,geom.periodicity());
-        tmpz = Bz.norm2(0,geom.periodicity());
-        Real Bs = tmpx*tmpx + tmpy*tmpy + tmpz*tmpz;
+        Real const tmpBx = Bx.norm2(0,geom.periodicity());
+        Real const tmpBy = By.norm2(0,geom.periodicity());
+        Real const tmpBz = Bz.norm2(0,geom.periodicity());
+        Real const Bs = tmpBx*tmpBx + tmpBy*tmpBy + tmpBz*tmpBz;
+
+        constexpr int noutputs = 3; // total energy, E-field energy and B-field energy
+        constexpr int index_total = 0;
+        constexpr int index_E = 1;
+        constexpr int index_B = 2;
 
         // save data
-        m_data[lev*3+1] = 0.5 * Es * PhysConst::ep0 * dV;
-        m_data[lev*3+2] = 0.5 * Bs / PhysConst::mu0 * dV;
-        m_data[lev*3+0] = m_data[lev*3+1] + m_data[lev*3+2];
+        m_data[lev*noutputs+index_E] = 0.5_rt * Es * PhysConst::ep0 * dV;
+        m_data[lev*noutputs+index_B] = 0.5_rt * Bs / PhysConst::mu0 * dV;
+        m_data[lev*noutputs+index_total] = m_data[lev*noutputs+index_E] +
+                                           m_data[lev*noutputs+index_B];
 
     }
     // end loop over refinement levels
diff --git a/Source/Diagnostics/ReducedDiags/FieldMaximum.H b/Source/Diagnostics/ReducedDiags/FieldMaximum.H
new file mode 100644
index 000000000..31953e280
--- /dev/null
+++ b/Source/Diagnostics/ReducedDiags/FieldMaximum.H
@@ -0,0 +1,30 @@
+/* Copyright 2020 Neil Zaim, Yinjian Zhao
+ *
+ * This file is part of WarpX.
+ *
+ * License: BSD-3-Clause-LBNL
+ */
+
+#ifndef WARPX_DIAGNOSTICS_REDUCEDDIAGS_FIELDMAXIMUM_H_
+#define WARPX_DIAGNOSTICS_REDUCEDDIAGS_FIELDMAXIMUM_H_
+
+#include "ReducedDiags.H"
+
+/**
+ *  This class mainly contains a function that computes the maximum value of each component
+ * of the EM field and of the vector norm of the E and B fields.
+ */
+class FieldMaximum : public ReducedDiags
+{
+public:
+
+    /** constructor
+     *  @param[in] rd_name reduced diags names */
+    FieldMaximum(std::string rd_name);
+
+    /** This function computes the maximum value of Ex, Ey, Ez, |E|, Bx, By, Bz and |B| */
+    virtual void ComputeDiags(int step) override final;
+
+};
+
+#endif // WARPX_DIAGNOSTICS_REDUCEDDIAGS_FIELDMAXIMUM_H_
diff --git a/Source/Diagnostics/ReducedDiags/FieldMaximum.cpp b/Source/Diagnostics/ReducedDiags/FieldMaximum.cpp
new file mode 100644
index 000000000..294f21525
--- /dev/null
+++ b/Source/Diagnostics/ReducedDiags/FieldMaximum.cpp
@@ -0,0 +1,252 @@
+/* Copyright 2020 Neil Zaim, Yinjian Zhao
+ *
+ * This file is part of WarpX.
+ *
+ * License: BSD-3-Clause-LBNL
+ */
+
+#include "FieldMaximum.H"
+#include "WarpX.H"
+#include "Utils/CoarsenIO.H"
+
+using namespace amrex;
+
+// constructor
+FieldMaximum::FieldMaximum (std::string rd_name)
+: ReducedDiags{rd_name}
+{
+
+    // RZ coordinate is not working
+#if (defined WARPX_DIM_RZ)
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(false,
+        "FieldMaximum reduced diagnostics does not work for RZ coordinate.");
+#endif
+
+    // read number of levels
+    int nLevel = 0;
+    ParmParse pp("amr");
+    pp.query("max_level", nLevel);
+    nLevel += 1;
+
+    constexpr int noutputs = 8; // total energy, E-field energy and B-field energy
+    // resize data array
+    m_data.resize(noutputs*nLevel,0.0); // max of Ex,Ey,Ez,|E|,Bx,By,Bz and |B|
+
+    if (ParallelDescriptor::IOProcessor())
+    {
+        if ( m_IsNotRestart )
+        {
+            // open file
+            std::ofstream ofs;
+            ofs.open(m_path + m_rd_name + "." + m_extension,
+                std::ofstream::out | std::ofstream::app);
+            // write header row
+            ofs << "#";
+            ofs << "[1]step()";
+            ofs << m_sep;
+            ofs << "[2]time(s)";
+            constexpr int shift_Ex = 3;
+            constexpr int shift_Ey = 4;
+            constexpr int shift_Ez = 5;
+            constexpr int shift_absE = 6;
+            constexpr int shift_Bx = 7;
+            constexpr int shift_By = 8;
+            constexpr int shift_Bz = 9;
+            constexpr int shift_absB = 10;
+            for (int lev = 0; lev < nLevel; ++lev)
+            {
+                ofs << m_sep;
+                ofs << "[" + std::to_string(shift_Ex+noutputs*lev) + "]";
+                ofs << "max_Ex_lev"+std::to_string(lev)+" (V/m)";
+                ofs << m_sep;
+                ofs << "[" + std::to_string(shift_Ey+noutputs*lev) + "]";
+                ofs << "max_Ey_lev"+std::to_string(lev)+" (V/m)";
+                ofs << m_sep;
+                ofs << "[" + std::to_string(shift_Ez+noutputs*lev) + "]";
+                ofs << "max_Ez_lev"+std::to_string(lev)+" (V/m)";
+                ofs << m_sep;
+                ofs << "[" + std::to_string(shift_absE+noutputs*lev) + "]";
+                ofs << "max_|E|_lev"+std::to_string(lev)+" (V/m)";
+                ofs << m_sep;
+                ofs << "[" + std::to_string(shift_Bx+noutputs*lev) + "]";
+                ofs << "max_Bx_lev"+std::to_string(lev)+" (T)";
+                ofs << m_sep;
+                ofs << "[" + std::to_string(shift_By+noutputs*lev) + "]";
+                ofs << "max_By_lev"+std::to_string(lev)+" (T)";
+                ofs << m_sep;
+                ofs << "[" + std::to_string(shift_Bz+noutputs*lev) + "]";
+                ofs << "max_Bz_lev"+std::to_string(lev)+" (T)";
+                ofs << m_sep;
+                ofs << "[" + std::to_string(shift_absB+noutputs*lev) + "]";
+                ofs << "max_|B|_lev"+std::to_string(lev)+" (T)";
+            }
+            ofs << std::endl;
+            // close file
+            ofs.close();
+        }
+    }
+
+}
+// end constructor
+
+// function that computes maximum field values
+void FieldMaximum::ComputeDiags (int step)
+{
+    // Judge if the diags should be done
+    if ( (step+1) % m_freq != 0 ) { return; }
+
+    // get WarpX class object
+    auto & warpx = WarpX::GetInstance();
+
+    // get number of level
+    const auto nLevel = warpx.finestLevel() + 1;
+
+    // loop over refinement levels
+    for (int lev = 0; lev < nLevel; ++lev)
+    {
+
+        // get MultiFab data at lev
+        const MultiFab & Ex = warpx.getEfield(lev,0);
+        const MultiFab & Ey = warpx.getEfield(lev,1);
+        const MultiFab & Ez = warpx.getEfield(lev,2);
+        const MultiFab & Bx = warpx.getBfield(lev,0);
+        const MultiFab & By = warpx.getBfield(lev,1);
+        const MultiFab & Bz = warpx.getBfield(lev,2);
+
+        constexpr int noutputs = 8; // max of Ex,Ey,Ez,|E|,Bx,By,Bz and |B|
+        constexpr int index_Ex = 0;
+        constexpr int index_Ey = 1;
+        constexpr int index_Ez = 2;
+        constexpr int index_absE = 3;
+        constexpr int index_Bx = 4;
+        constexpr int index_By = 5;
+        constexpr int index_Bz = 6;
+        constexpr int index_absB = 7;
+
+        // get Maximums of E field components
+        m_data[lev*noutputs+index_Ex] = Ex.norm0();
+        m_data[lev*noutputs+index_Ey] = Ey.norm0();
+        m_data[lev*noutputs+index_Ez] = Ez.norm0();
+
+        // get Maximums of B field components
+        m_data[lev*noutputs+index_Bx] = Bx.norm0();
+        m_data[lev*noutputs+index_By] = By.norm0();
+        m_data[lev*noutputs+index_Bz] = Bz.norm0();
+
+        // General preparation of interpolation and reduction to compute the maximum value of
+        // |E| and |B|
+        const GpuArray<int,3> cellCenteredtype{AMREX_D_DECL(0,0,0)};
+        const GpuArray<int,3> reduction_coarsening_ratio{AMREX_D_DECL(1,1,1)};
+        constexpr int reduction_comp = 0;
+
+        // Prepare reduction for maximum value of |E|
+        ReduceOps<ReduceOpMax> reduceE_op;
+        ReduceData<Real> reduceE_data(reduceE_op);
+        using ReduceTuple = typename decltype(reduceE_data)::Type;
+
+        // Prepare interpolation of E field components to cell center
+        GpuArray<int,3> Extype;
+        GpuArray<int,3> Eytype;
+        GpuArray<int,3> Eztype;
+        for (int i = 0; i < AMREX_SPACEDIM; ++i){
+            Extype[i] = Ex.ixType()[i];
+            Eytype[i] = Ey.ixType()[i];
+            Eztype[i] = Ez.ixType()[i];
+        }
+#if   (AMREX_SPACEDIM == 2)
+        Extype[2] = 0;
+        Eytype[2] = 0;
+        Eztype[2] = 0;
+#endif
+
+        // MFIter loop to interpolate E field components to cell center and get maximum value
+        // of |E|
+#ifdef _OPENMP
+#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
+#endif
+        for ( MFIter mfi(Ex, TilingIfNotGPU()); mfi.isValid(); ++mfi )
+        {
+            // Make the box cell centered to avoid including ghost cells in the calculation
+            const Box& box = enclosedCells(mfi.nodaltilebox());
+            const auto& arrEx = Ex[mfi].array();
+            const auto& arrEy = Ey[mfi].array();
+            const auto& arrEz = Ez[mfi].array();
+            reduceE_op.eval(box, reduceE_data,
+            [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
+            {
+                const Real Ex_interp = CoarsenIO::Interp(arrEx, Extype, cellCenteredtype,
+                                        reduction_coarsening_ratio, i, j, k, reduction_comp);
+                const Real Ey_interp = CoarsenIO::Interp(arrEy, Eytype, cellCenteredtype,
+                                        reduction_coarsening_ratio, i, j, k, reduction_comp);
+                const Real Ez_interp = CoarsenIO::Interp(arrEz, Eztype, cellCenteredtype,
+                                        reduction_coarsening_ratio, i, j, k, reduction_comp);
+                return Ex_interp*Ex_interp + Ey_interp*Ey_interp + Ez_interp*Ez_interp;
+            });
+        }
+
+        Real hv_E = amrex::get<0>(reduceE_data.value()); // highest value of |E|**2
+        // MPI reduce
+        ParallelDescriptor::ReduceRealMax(hv_E);
+
+        m_data[lev*noutputs+index_absE] = std::sqrt(hv_E);
+
+        // Prepare reduction for maximum value of |B|
+        ReduceOps<ReduceOpMax> reduceB_op;
+        ReduceData<Real> reduceB_data(reduceB_op);
+        using ReduceTuple = typename decltype(reduceB_data)::Type;
+
+        // Prepare interpolation of B field components to cell center
+        GpuArray<int,3> Bxtype;
+        GpuArray<int,3> Bytype;
+        GpuArray<int,3> Bztype;
+        for (int i = 0; i < AMREX_SPACEDIM; ++i){
+            Bxtype[i] = Bx.ixType()[i];
+            Bytype[i] = By.ixType()[i];
+            Bztype[i] = Bz.ixType()[i];
+        }
+#if   (AMREX_SPACEDIM == 2)
+        Bxtype[2] = 0;
+        Bytype[2] = 0;
+        Bztype[2] = 0;
+#endif
+
+        // MFIter loop to interpolate B field components to cell center and get maximum value
+        // of |B|
+#ifdef _OPENMP
+#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
+#endif
+        for ( MFIter mfi(Ex, TilingIfNotGPU()); mfi.isValid(); ++mfi )
+        {
+            // Make the box cell centered to avoid including ghost cells in the calculation
+            const Box& box = enclosedCells(mfi.nodaltilebox());
+            const auto& arrBx = Bx[mfi].array();
+            const auto& arrBy = By[mfi].array();
+            const auto& arrBz = Bz[mfi].array();
+
+            reduceB_op.eval(box, reduceB_data,
+            [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
+            {
+                const Real Bx_interp = CoarsenIO::Interp(arrBx, Bxtype, cellCenteredtype,
+                                        reduction_coarsening_ratio, i, j, k, reduction_comp);
+                const Real By_interp = CoarsenIO::Interp(arrBy, Bytype, cellCenteredtype,
+                                        reduction_coarsening_ratio, i, j, k, reduction_comp);
+                const Real Bz_interp = CoarsenIO::Interp(arrBz, Bztype, cellCenteredtype,
+                                        reduction_coarsening_ratio, i, j, k, reduction_comp);
+                return Bx_interp*Bx_interp + By_interp*By_interp + Bz_interp*Bz_interp;
+            });
+        }
+
+        Real hv_B = amrex::get<0>(reduceB_data.value()); // highest value of |B|**2
+        // MPI reduce
+        ParallelDescriptor::ReduceRealMax(hv_B);
+
+        m_data[lev*noutputs+index_absB] = std::sqrt(hv_B);
+    }
+    // end loop over refinement levels
+
+    /* m_data now contains up-to-date values for:
+     *  [max(Ex),max(Ey),max(Ez),max(|E|),
+     *   max(Bx),max(By),max(Bz),max(|B|)] */
+
+}
+// end void FieldMaximum::ComputeDiags
diff --git a/Source/Diagnostics/ReducedDiags/Make.package b/Source/Diagnostics/ReducedDiags/Make.package
index 3be9858a8..174d5d89c 100644
--- a/Source/Diagnostics/ReducedDiags/Make.package
+++ b/Source/Diagnostics/ReducedDiags/Make.package
@@ -5,5 +5,6 @@ CEXE_sources += FieldEnergy.cpp
 CEXE_sources += BeamRelevant.cpp
 CEXE_sources += LoadBalanceCosts.cpp
 CEXE_sources += ParticleHistogram.cpp
+CEXE_sources += FieldMaximum.cpp
 
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Diagnostics/ReducedDiags
diff --git a/Source/Diagnostics/ReducedDiags/MultiReducedDiags.cpp b/Source/Diagnostics/ReducedDiags/MultiReducedDiags.cpp
index bc8a8fb9a..69ffad6ee 100644
--- a/Source/Diagnostics/ReducedDiags/MultiReducedDiags.cpp
+++ b/Source/Diagnostics/ReducedDiags/MultiReducedDiags.cpp
@@ -10,6 +10,7 @@
 #include "BeamRelevant.H"
 #include "ParticleEnergy.H"
 #include "FieldEnergy.H"
+#include "FieldMaximum.H"
 #include "MultiReducedDiags.H"
 
 #include <AMReX_ParmParse.H>
@@ -54,6 +55,11 @@ MultiReducedDiags::MultiReducedDiags ()
             m_multi_rd[i_rd].reset
                 ( new FieldEnergy(m_rd_names[i_rd]));
         }
+        else if (rd_type.compare("FieldMaximum") == 0)
+        {
+            m_multi_rd[i_rd].reset
+                ( new FieldMaximum(m_rd_names[i_rd]));
+        }
         else if (rd_type.compare("BeamRelevant") == 0)
         {
             m_multi_rd[i_rd].reset
@@ -101,7 +107,6 @@ void MultiReducedDiags::WriteToFile (int step)
     // loop over all reduced diags
     for (int i_rd = 0; i_rd < static_cast<int>(m_rd_names.size()); ++i_rd)
     {
-
         // Judge if the diags should be done
         if ( (step+1) % m_multi_rd[i_rd]->m_freq != 0 ) { continue; }
 
diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp
index 9bcb19b2e..ec4abdea9 100644
--- a/Source/Particles/MultiParticleContainer.cpp
+++ b/Source/Particles/MultiParticleContainer.cpp
@@ -1088,17 +1088,11 @@ MultiParticleContainer::doQEDSchwinger ()
     const MultiFab & By = warpx.getBfield(level_0,1);
     const MultiFab & Bz = warpx.getBfield(level_0,2);
 
-    MFItInfo info;
-    if (TilingIfNotGPU()) {
-        info.EnableTiling();
-    }
 #ifdef _OPENMP
-    info.SetDynamic(WarpX::do_dynamic_scheduling);
-#pragma omp parallel
+#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
 #endif
-
-    for (MFIter mfi(Ex, info); mfi.isValid(); ++mfi )
-    {
+     for (MFIter mfi(Ex, TilingIfNotGPU()); mfi.isValid(); ++mfi )
+     {
         // Make the box cell centered to avoid creating particles twice on the tile edges
         const Box& box = enclosedCells(mfi.nodaltilebox());