35 files changed, 1394 insertions, 467 deletions
diff --git a/Source/Diagnostics/FieldIO.cpp b/Source/Diagnostics/FieldIO.cpp
index e1bb8cb54..9c38f1d68 100644
--- a/Source/Diagnostics/FieldIO.cpp
+++ b/Source/Diagnostics/FieldIO.cpp
@@ -352,6 +352,27 @@ AverageAndPackVectorField( MultiFab& mf_avg,
     }
 }
 
+/** \brief Takes all of the components of the three fields and
+ * averages and packs them into the MultiFab mf_avg.
+ */
+void
+AverageAndPackVectorFieldComponents (MultiFab& mf_avg,
+                                     const std::array< std::unique_ptr<MultiFab>, 3 >& vector_field,
+                                     const DistributionMapping& dm,
+                                     int& dcomp, const int ngrow )
+{
+    if (vector_field[0]->nComp() > 1) {
+        std::array<std::unique_ptr<MultiFab>,3> vector_field_component;
+        for (int icomp=0 ; icomp < vector_field[0]->nComp() ; icomp++) {
+            vector_field_component[0].reset(new MultiFab(*vector_field[0], amrex::make_alias, icomp, 1));
+            vector_field_component[1].reset(new MultiFab(*vector_field[1], amrex::make_alias, icomp, 1));
+            vector_field_component[2].reset(new MultiFab(*vector_field[2], amrex::make_alias, icomp, 1));
+            AverageAndPackVectorField(mf_avg, vector_field_component, dm, dcomp, ngrow);
+            dcomp += 3;
+        }
+    }
+}
+
 /** \brief Take a MultiFab `scalar_field`
  * averages it to the cell center, and stores the
  * resulting MultiFab in mf_avg (in the components dcomp)
@@ -378,6 +399,74 @@ AverageAndPackScalarField( MultiFab& mf_avg,
     }
 }
 
+/** \brief Takes the specified component of the scalar and
+ * averages and packs it into the MultiFab mf_avg.
+ */
+void
+AverageAndPackScalarFieldComponent (MultiFab& mf_avg,
+                                    const MultiFab& scalar_field,
+                                    const int icomp,
+                                    const int dcomp, const int ngrow )
+{
+    MultiFab scalar_field_component(scalar_field, amrex::make_alias, icomp, 1);
+    AverageAndPackScalarField(mf_avg, scalar_field_component, dcomp, ngrow);
+}
+
+/** \brief Generate mode variable name
+ */
+std::string
+ComponentName(std::string fieldname, int mode, std::string type)
+{
+    if (type == "real") {
+        return fieldname + "_" + std::to_string(mode) + "_" + "real";
+    } else if (type == "imag") {
+        return fieldname + "_" + std::to_string(mode) + "_" + "imag";
+    } else {
+        AMREX_ALWAYS_ASSERT( false );
+    }
+    // This should never be done
+    return "";
+}
+
+/* \brief Copy vector field component data into the MultiFab that will be written out
+ */
+void
+CopyVectorFieldComponentsToMultiFab (int lev, amrex::Vector<MultiFab>& mf_avg, MultiFab& mf_tmp,
+                                     int icomp, int& dcomp, int ngrow,
+                                     std::string fieldname, Vector<std::string>& varnames)
+{
+    if (mf_tmp.nComp() > 3) {
+        if (lev==0) varnames.push_back(ComponentName(fieldname, 0, "real"));
+        MultiFab::Copy( mf_avg[lev], mf_tmp, 3+icomp, dcomp++, 1, ngrow);
+        int const nmodes = mf_tmp.nComp()/6;
+        for (int mode=1 ; mode < nmodes ; mode++) {
+            if (lev==0) varnames.push_back(ComponentName(fieldname, mode, "real"));
+            MultiFab::Copy( mf_avg[lev], mf_tmp, 3*2*mode+icomp, dcomp++, 1, ngrow);
+            if (lev==0) varnames.push_back(ComponentName(fieldname, mode, "imag"));
+            MultiFab::Copy( mf_avg[lev], mf_tmp, 3*2*mode+3+icomp, dcomp++, 1, ngrow);
+        }
+    }
+}
+
+/* \brief Copy scalar field component data into the MultiFab that will be written out
+ */
+void
+CopyScalarFieldComponentsToMultiFab (int lev, amrex::Vector<MultiFab>& mf_avg, MultiFab& mf_tmp,
+                                     int& dcomp, int ngrow, int n_rz_azimuthal_modes,
+                                     std::string fieldname, Vector<std::string>& varnames)
+{
+    if (n_rz_azimuthal_modes > 1) {
+        if (lev==0) varnames.push_back(ComponentName(fieldname, 0, "real"));
+        AverageAndPackScalarFieldComponent(mf_avg[lev], mf_tmp, 0, dcomp++, ngrow);
+        for (int mode=1 ; mode < n_rz_azimuthal_modes ; mode++) {
+            if (lev==0) varnames.push_back(ComponentName(fieldname, mode, "real"));
+            AverageAndPackScalarFieldComponent(mf_avg[lev], mf_tmp, 2*mode-1, dcomp++, ngrow);
+            if (lev==0) varnames.push_back(ComponentName(fieldname, mode, "imag"));
+            AverageAndPackScalarFieldComponent(mf_avg[lev], mf_tmp, 2*mode  , dcomp++, ngrow);
+        }
+    }
+}
+
 /** \brief Add variable names to the list.
  */
 void
@@ -387,6 +476,15 @@ AddToVarNames (Vector<std::string>& varnames,
     for(auto coord:coords) varnames.push_back(name+coord+suffix);
 }
 
+/** \brief Add RZ variable names to the list.
+ */
+void
+AddToVarNamesRZ (Vector<std::string>& varnames,
+                 std::string name, std::string suffix) {
+    auto coords = {"r", "theta", "z"};
+    for(auto coord:coords) varnames.push_back(name+coord+suffix);
+}
+
 /** \brief Write the different fields that are meant for output,
  * into the vector of MultiFab `mf_avg` (one MultiFab per level)
  * after averaging them to the cell centers.
@@ -396,11 +494,28 @@ WarpX::AverageAndPackFields ( Vector<std::string>& varnames,
                               amrex::Vector<MultiFab>& mf_avg, const int ngrow) const
 {
     // Count how many different fields should be written (ncomp)
-    const int ncomp = fields_to_plot.size()
+    int ncomp = fields_to_plot.size()
         + static_cast<int>(plot_finepatch)*6
         + static_cast<int>(plot_crsepatch)*6
         + static_cast<int>(costs[0] != nullptr and plot_costs);
 
+    // Add in the RZ modes
+    if (n_rz_azimuthal_modes > 1) {
+        for (std::string field : fields_to_plot) {
+            if (is_in_vector({"Ex", "Ey", "Ez", "Bx", "By", "Bz", "jx", "jy", "jz", "rho", "F"}, {field})) {
+                ncomp += 2*n_rz_azimuthal_modes - 1;
+            }
+        }
+        if (plot_finepatch) {
+            ncomp += 6*(2*n_rz_azimuthal_modes - 1);
+        }
+    }
+
+    int nvecs = 3;
+    if (n_rz_azimuthal_modes > 1) {
+        nvecs += 3*(2*n_rz_azimuthal_modes - 1);
+    }
+
     // Loop over levels of refinement
     for (int lev = 0; lev <= finest_level; ++lev)
     {
@@ -413,19 +528,25 @@ WarpX::AverageAndPackFields ( Vector<std::string>& varnames,
         // Build mf_tmp_E is at least one component of E is requested
         if (is_in_vector(fields_to_plot, {"Ex", "Ey", "Ez"} )){
             // Allocate temp MultiFab with 3 components
-            mf_tmp_E = MultiFab(grids[lev], dmap[lev], 3, ngrow);
+            mf_tmp_E = MultiFab(grids[lev], dmap[lev], nvecs, ngrow);
             // Fill MultiFab mf_tmp_E with averaged E
             AverageAndPackVectorField(mf_tmp_E, Efield_aux[lev], dmap[lev], 0, ngrow);
+            int dcomp = 3;
+            AverageAndPackVectorFieldComponents(mf_tmp_E, Efield_aux[lev], dmap[lev], dcomp, ngrow);
         }
         // Same for B
         if (is_in_vector(fields_to_plot, {"Bx", "By", "Bz"} )){
-            mf_tmp_B = MultiFab(grids[lev], dmap[lev], 3, ngrow);
+            mf_tmp_B = MultiFab(grids[lev], dmap[lev], nvecs, ngrow);
             AverageAndPackVectorField(mf_tmp_B, Bfield_aux[lev], dmap[lev], 0, ngrow);
+            int dcomp = 3;
+            AverageAndPackVectorFieldComponents(mf_tmp_B, Bfield_aux[lev], dmap[lev], dcomp, ngrow);
         }
         // Same for J
         if (is_in_vector(fields_to_plot, {"jx", "jy", "jz"} )){
-            mf_tmp_J = MultiFab(grids[lev], dmap[lev], 3, ngrow);
+            mf_tmp_J = MultiFab(grids[lev], dmap[lev], nvecs, ngrow);
             AverageAndPackVectorField(mf_tmp_J, current_fp[lev], dmap[lev], 0, ngrow);
+            int dcomp = 3;
+            AverageAndPackVectorFieldComponents(mf_tmp_J, current_fp[lev], dmap[lev], dcomp, ngrow);
         }
 
         int dcomp;
@@ -433,37 +554,51 @@ WarpX::AverageAndPackFields ( Vector<std::string>& varnames,
         // mf_avg[lev] add the corresponding names to `varnames`.
         // plot_fine_patch and plot_coarse_patch are treated separately
         // (after this for loop).
-        for (dcomp=0; dcomp<fields_to_plot.size(); dcomp++){
-            std::string fieldname = fields_to_plot[dcomp];
+        dcomp = 0;
+        for (int ifield=0; ifield<fields_to_plot.size(); ifield++){
+            std::string fieldname = fields_to_plot[ifield];
             if(lev==0) varnames.push_back(fieldname);
             if        (fieldname == "Ex"){
-                MultiFab::Copy( mf_avg[lev], mf_tmp_E, 0, dcomp, 1, ngrow);
+                MultiFab::Copy( mf_avg[lev], mf_tmp_E, 0, dcomp++, 1, ngrow);
+                CopyVectorFieldComponentsToMultiFab(lev, mf_avg, mf_tmp_E, 0, dcomp, ngrow, "Er", varnames);
             } else if (fieldname == "Ey"){
-                MultiFab::Copy( mf_avg[lev], mf_tmp_E, 1, dcomp, 1, ngrow);
+                MultiFab::Copy( mf_avg[lev], mf_tmp_E, 1, dcomp++, 1, ngrow);
+                CopyVectorFieldComponentsToMultiFab(lev, mf_avg, mf_tmp_E, 1, dcomp, ngrow, "Etheta", varnames);
             } else if (fieldname == "Ez"){
-                MultiFab::Copy( mf_avg[lev], mf_tmp_E, 2, dcomp, 1, ngrow);
+                MultiFab::Copy( mf_avg[lev], mf_tmp_E, 2, dcomp++, 1, ngrow);
+                CopyVectorFieldComponentsToMultiFab(lev, mf_avg, mf_tmp_E, 2, dcomp, ngrow, "Ez", varnames);
             } else if (fieldname == "Bx"){
-                MultiFab::Copy( mf_avg[lev], mf_tmp_B, 0, dcomp, 1, ngrow);
+                MultiFab::Copy( mf_avg[lev], mf_tmp_B, 0, dcomp++, 1, ngrow);
+                CopyVectorFieldComponentsToMultiFab(lev, mf_avg, mf_tmp_B, 0, dcomp, ngrow, "Br", varnames);
             } else if (fieldname == "By"){
-                MultiFab::Copy( mf_avg[lev], mf_tmp_B, 1, dcomp, 1, ngrow);
+                MultiFab::Copy( mf_avg[lev], mf_tmp_B, 1, dcomp++, 1, ngrow);
+                CopyVectorFieldComponentsToMultiFab(lev, mf_avg, mf_tmp_B, 1, dcomp, ngrow, "Btheta", varnames);
             } else if (fieldname == "Bz"){
-                MultiFab::Copy( mf_avg[lev], mf_tmp_B, 2, dcomp, 1, ngrow);
+                MultiFab::Copy( mf_avg[lev], mf_tmp_B, 2, dcomp++, 1, ngrow);
+                CopyVectorFieldComponentsToMultiFab(lev, mf_avg, mf_tmp_B, 2, dcomp, ngrow, "Bz", varnames);
             } else if (fieldname == "jx"){
-                MultiFab::Copy( mf_avg[lev], mf_tmp_J, 0, dcomp, 1, ngrow);
+                MultiFab::Copy( mf_avg[lev], mf_tmp_J, 0, dcomp++, 1, ngrow);
+                CopyVectorFieldComponentsToMultiFab(lev, mf_avg, mf_tmp_J, 0, dcomp, ngrow, "jr", varnames);
             } else if (fieldname == "jy"){
-                MultiFab::Copy( mf_avg[lev], mf_tmp_J, 1, dcomp, 1, ngrow);
+                MultiFab::Copy( mf_avg[lev], mf_tmp_J, 1, dcomp++, 1, ngrow);
+                CopyVectorFieldComponentsToMultiFab(lev, mf_avg, mf_tmp_J, 1, dcomp, ngrow, "jtheta", varnames);
             } else if (fieldname == "jz"){
-                MultiFab::Copy( mf_avg[lev], mf_tmp_J, 2, dcomp, 1, ngrow);
+                MultiFab::Copy( mf_avg[lev], mf_tmp_J, 2, dcomp++, 1, ngrow);
+                CopyVectorFieldComponentsToMultiFab(lev, mf_avg, mf_tmp_J, 2, dcomp, ngrow, "jz", varnames);
             } else if (fieldname == "rho"){
-                AverageAndPackScalarField( mf_avg[lev], *rho_fp[lev], dcomp, ngrow );
+                AverageAndPackScalarField( mf_avg[lev], *rho_fp[lev], dcomp++, ngrow );
+                CopyScalarFieldComponentsToMultiFab(lev, mf_avg, *rho_fp[lev], dcomp, ngrow, n_rz_azimuthal_modes,
+                                                    fieldname, varnames);
             } else if (fieldname == "F"){
-                AverageAndPackScalarField( mf_avg[lev], *F_fp[lev], dcomp, ngrow);
+                AverageAndPackScalarField( mf_avg[lev], *F_fp[lev], dcomp++, ngrow );
+                CopyScalarFieldComponentsToMultiFab(lev, mf_avg, *F_fp[lev], dcomp, ngrow, n_rz_azimuthal_modes,
+                                                    fieldname, varnames);
             } else if (fieldname == "part_per_cell") {
                 MultiFab temp_dat(grids[lev],mf_avg[lev].DistributionMap(),1,0);
                 temp_dat.setVal(0);
                 // MultiFab containing number of particles in each cell
                 mypc->Increment(temp_dat, lev);
-                AverageAndPackScalarField( mf_avg[lev], temp_dat, dcomp, ngrow );
+                AverageAndPackScalarField( mf_avg[lev], temp_dat, dcomp++, ngrow );
             } else if (fieldname == "part_per_grid"){
                 const Vector<long>& npart_in_grid = mypc->NumberOfParticlesInGrid(lev);
                 // MultiFab containing number of particles per grid
@@ -473,7 +608,7 @@ WarpX::AverageAndPackFields ( Vector<std::string>& varnames,
 #endif
                 for (MFIter mfi(mf_avg[lev]); mfi.isValid(); ++mfi) {
                     (mf_avg[lev])[mfi].setVal(static_cast<Real>(npart_in_grid[mfi.index()]),
-                                              dcomp);
+                                              dcomp++);
                 }
             } else if (fieldname == "part_per_proc"){
                 const Vector<long>& npart_in_grid = mypc->NumberOfParticlesInGrid(lev);
@@ -486,7 +621,7 @@ WarpX::AverageAndPackFields ( Vector<std::string>& varnames,
                 for (MFIter mfi(mf_avg[lev]); mfi.isValid(); ++mfi) {
                     n_per_proc += npart_in_grid[mfi.index()];
                 }
-                mf_avg[lev].setVal(static_cast<Real>(n_per_proc), dcomp,1);
+                mf_avg[lev].setVal(static_cast<Real>(n_per_proc), dcomp++,1);
             } else if (fieldname == "proc_number"){
                 // MultiFab containing the Processor ID
 #ifdef _OPENMP
@@ -494,11 +629,11 @@ WarpX::AverageAndPackFields ( Vector<std::string>& varnames,
 #endif
                 for (MFIter mfi(mf_avg[lev]); mfi.isValid(); ++mfi) {
                     (mf_avg[lev])[mfi].setVal(static_cast<Real>(ParallelDescriptor::MyProc()),
-                                              dcomp);
+                                              dcomp++);
                 }
             } else if (fieldname == "divB"){
                 if (do_nodal) amrex::Abort("TODO: do_nodal && plot divb");
-                ComputeDivB(mf_avg[lev], dcomp,
+                ComputeDivB(mf_avg[lev], dcomp++,
                             {Bfield_aux[lev][0].get(),
                                     Bfield_aux[lev][1].get(),
                                     Bfield_aux[lev][2].get()},
@@ -512,7 +647,7 @@ WarpX::AverageAndPackFields ( Vector<std::string>& varnames,
                                      Efield_aux[lev][1].get(),
                                      Efield_aux[lev][2].get()},
                              WarpX::CellSize(lev) );
-                AverageAndPackScalarField( mf_avg[lev], dive, dcomp, ngrow );
+                AverageAndPackScalarField( mf_avg[lev], dive, dcomp++, ngrow );
             } else {
                 amrex::Abort("unknown field in fields_to_plot: " + fieldname);
             }
@@ -520,11 +655,31 @@ WarpX::AverageAndPackFields ( Vector<std::string>& varnames,
         if (plot_finepatch)
         {
             AverageAndPackVectorField( mf_avg[lev], Efield_fp[lev], dmap[lev], dcomp, ngrow );
-            if (lev == 0) AddToVarNames(varnames, "E", "_fp");
             dcomp += 3;
+            AverageAndPackVectorFieldComponents(mf_avg[lev], Efield_fp[lev], dmap[lev], dcomp, ngrow);
+            if (lev == 0) {
+                AddToVarNames(varnames, "E", "_fp");
+                if (n_rz_azimuthal_modes > 1) {
+                    AddToVarNamesRZ(varnames, "E", ComponentName("_fp", 0, "real"));
+                    for (int mode=1 ; mode < n_rz_azimuthal_modes ; mode++) {
+                        AddToVarNamesRZ(varnames, "E", ComponentName("_fp", mode, "real"));
+                        AddToVarNamesRZ(varnames, "E", ComponentName("_fp", mode, "imag"));
+                    }
+                }
+            }
             AverageAndPackVectorField( mf_avg[lev], Bfield_fp[lev], dmap[lev], dcomp, ngrow );
-            if (lev == 0) AddToVarNames(varnames, "B", "_fp");
             dcomp += 3;
+            AverageAndPackVectorFieldComponents(mf_avg[lev], Bfield_fp[lev], dmap[lev], dcomp, ngrow);
+            if (lev == 0) {
+                AddToVarNames(varnames, "B", "_fp");
+                if (n_rz_azimuthal_modes > 1) {
+                    AddToVarNamesRZ(varnames, "B", ComponentName("_fp", 0, "real"));
+                    for (int mode=1 ; mode < n_rz_azimuthal_modes ; mode++) {
+                        AddToVarNamesRZ(varnames, "B", ComponentName("_fp", mode, "real"));
+                        AddToVarNamesRZ(varnames, "B", ComponentName("_fp", mode, "imag"));
+                    }
+                }
+            }
         }
 
         if (plot_crsepatch)
diff --git a/Source/Diagnostics/WarpXIO.cpp b/Source/Diagnostics/WarpXIO.cpp
index 31a5f700d..d7a46f7b7 100644
--- a/Source/Diagnostics/WarpXIO.cpp
+++ b/Source/Diagnostics/WarpXIO.cpp
@@ -527,13 +527,17 @@ WarpX::WritePlotFile () const
     }
 
 #ifdef WARPX_USE_OPENPMD
-    m_OpenPMDPlotWriter->SetStep(istep[0]);
-    if (dump_openpmd)
-      m_OpenPMDPlotWriter->WriteOpenPMDFields(varnames,
-                          *output_mf[0], output_geom[0], istep[0], t_new[0] );
+    if (dump_openpmd) {
+        m_OpenPMDPlotWriter->SetStep(istep[0]);
+        // fields: only dumped for coarse level
+        m_OpenPMDPlotWriter->WriteOpenPMDFields(
+            varnames, *output_mf[0], output_geom[0], istep[0], t_new[0]);
+        // particles: all (reside only on locally finest level)
+        m_OpenPMDPlotWriter->WriteOpenPMDParticles(mypc);
+    }
 #endif
 
-    if (dump_plotfiles ||  dump_openpmd) {
+    if (dump_plotfiles) {
 
     // Write the fields contained in `mf_avg`, and corresponding to the
     // names `varnames`, into a plotfile.
@@ -616,11 +620,6 @@ WarpX::WritePlotFile () const
         }
     }
 
-#ifdef WARPX_USE_OPENPMD
-    // Write openPMD format: only for level 0
-    if (dump_openpmd)
-        m_OpenPMDPlotWriter->WriteOpenPMDParticles(mypc);
-#endif
     // leaving the option of binary output through AMREx around
     // regardless of openPMD. This can be adjusted later
     {
diff --git a/Source/Diagnostics/WarpXOpenPMD.H b/Source/Diagnostics/WarpXOpenPMD.H
index c79a12066..e4b48d605 100644
--- a/Source/Diagnostics/WarpXOpenPMD.H
+++ b/Source/Diagnostics/WarpXOpenPMD.H
@@ -1,10 +1,20 @@
 #ifndef WARPX_OPEN_PMD_H_
 #define WARPX_OPEN_PMD_H_
 
-#include <MultiParticleContainer.H> // has AMReX Vector etc used below
+#include "WarpXParticleContainer.H"
+#include "MultiParticleContainer.H" // PIdx
+
+#include <AMReX_ParallelDescriptor.H>
+#include <AMReX_REAL.H>
+#include <AMReX_Utility.H>
 
 #include <openPMD/openPMD.hpp>
 
+#include <memory>
+#include <string>
+#include <vector>
+
+
 //
 // helper class
 //
@@ -61,10 +71,14 @@ private:
 class WarpXOpenPMDPlot
 {
 public:
-  // not  using const string, to  allow std::move to be effective
-  WarpXOpenPMDPlot(bool, std::string& filetype);
+  /** Initialize openPMD I/O routines
+   *
+   * @param oneFilePerTS write one file per timestep
+   * @param filetype file backend, e.g. "bp" or "h5"
+   * @param fieldPMLdirections PML field solver, @see WarpX::getPMLdirections()
+   */
+  WarpXOpenPMDPlot(bool oneFilePerTS, std::string filetype, std::vector<bool> fieldPMLdirections);
 
-  //WarpXOpenPMDPlot(const std::string& dir, const std::string& fileType);
   ~WarpXOpenPMDPlot();
 
   void SetStep(int ts);
@@ -82,12 +96,14 @@ private:
   void Init(//const std::string& filename,
         openPMD::AccessType accessType);
 
-  /** This function sets up the entries for storing the particle positions in an openPMD  species
+  /** This function sets up the entries for storing the particle positions, global IDs, and constant records (charge, mass)
   *
-  * @param[in]  currSpecies The openPMD species
-  * @param[in]  np  Number of particles
+  * @param[in] pc          WarpX particle container
+  * @param[in] currSpecies Corresponding openPMD species
+  * @param[in] np          Number of particles
   */
-  void SetupPos(openPMD::ParticleSpecies& currSpecies,
+  void SetupPos(const std::unique_ptr<WarpXParticleContainer>& pc,
+        openPMD::ParticleSpecies& currSpecies,
         const unsigned long long& np) const ;
 
   /** This function sets up the entries for particle properties
@@ -149,6 +165,9 @@ private:
   bool m_OneFilePerTS = true;  //! write in openPMD fileBased manner for individual time steps
   std::string m_OpenPMDFileType = "bp"; //! MPI-parallel openPMD backend: bp or h5
   int m_CurrentStep  = -1;
+
+  // meta data
+  std::vector< bool > m_fieldPMLdirections; //! @see WarpX::getPMLdirections()
 };
 
 
diff --git a/Source/Diagnostics/WarpXOpenPMD.cpp b/Source/Diagnostics/WarpXOpenPMD.cpp
index 05c2066de..ed2bf8020 100644
--- a/Source/Diagnostics/WarpXOpenPMD.cpp
+++ b/Source/Diagnostics/WarpXOpenPMD.cpp
@@ -1,12 +1,95 @@
 #include "WarpXOpenPMD.H"
+#include "WarpXAlgorithmSelection.H"
 #include "FieldIO.H"  // for getReversedVec
 
+#include <algorithm>
+#include <cstdint>
+#include <map>
+#include <set>
+#include <string>
+#include <sstream>
+#include <tuple>
+#include <utility>
+
+
+namespace detail
+{
+
+    /** Convert AMReX AoS .id and .cpu to a globally unique particle ID
+     */
+    union GlobalID {
+        struct { int id; int cpu; }; //! MPI-rank local ID (and rank/cpu)
+        uint64_t global_id; //! global ID that is unique in the whole simulation
+    };
+    static_assert(sizeof(int) * 2u <= sizeof(uint64_t), "int size might cause collisions in global IDs");
+
+    /** Unclutter a real_names to openPMD record
+     *
+     * @param fullName name as in real_names variable
+     * @return pair of openPMD record and component name
+     */
+    inline std::pair< std::string, std::string >
+    name2openPMD( std::string const& fullName )
+    {
+        std::string record_name = fullName;
+        std::string component_name = openPMD::RecordComponent::SCALAR;
+        std::size_t startComp = fullName.find_last_of("_");
+
+        if( startComp != std::string::npos ) {  // non-scalar
+            record_name = fullName.substr(0, startComp);
+            component_name = fullName.substr(startComp + 1u);
+        }
+        return make_pair(record_name, component_name);
+    }
+
+    /** Get the openPMD physical dimensionality of a record
+     *
+     * @param record_name name of the openPMD record
+     * @return map with base quantities and power scaling
+     */
+    std::map< openPMD::UnitDimension, double >
+    getUnitDimension( std::string const & record_name )
+    {
+
+        if( record_name == "position" ) return {
+            {openPMD::UnitDimension::L,  1.}
+        };
+        else if( record_name == "positionOffset" ) return {
+            {openPMD::UnitDimension::L,  1.}
+        };
+        else if( record_name == "momentum" ) return {
+            {openPMD::UnitDimension::L,  1.},
+            {openPMD::UnitDimension::M,  1.},
+            {openPMD::UnitDimension::T, -1.}
+        };
+        else if( record_name == "charge" ) return {
+            {openPMD::UnitDimension::T,  1.},
+            {openPMD::UnitDimension::I,  1.}
+        };
+        else if( record_name == "mass" ) return {
+            {openPMD::UnitDimension::M,  1.}
+        };
+        else if( record_name == "E" ) return {
+            {openPMD::UnitDimension::L,  1.},
+            {openPMD::UnitDimension::M,  1.},
+            {openPMD::UnitDimension::T, -3.},
+            {openPMD::UnitDimension::I, -1.},
+        };
+        else if( record_name == "B" ) return {
+            {openPMD::UnitDimension::M,  1.},
+            {openPMD::UnitDimension::I, -1.},
+            {openPMD::UnitDimension::T, -2.}
+        };
+        else return {};
+    }
+}
+
 WarpXOpenPMDPlot::WarpXOpenPMDPlot(bool oneFilePerTS,
-                   std::string& openPMDFileType)
+    std::string openPMDFileType, std::vector<bool> fieldPMLdirections)
   :m_Series(nullptr),
    m_OneFilePerTS(oneFilePerTS),
-   m_OpenPMDFileType(std::move(openPMDFileType))
-   //m_OpenPMDFileType(openPMDFileType)
+   m_OpenPMDFileType(std::move(openPMDFileType)),
+   m_fieldPMLdirections(std::move(fieldPMLdirections))
 {}
 
 WarpXOpenPMDPlot::~WarpXOpenPMDPlot()
@@ -74,8 +157,17 @@ WarpXOpenPMDPlot::Init(openPMD::AccessType accessType)
     else
     m_Series = std::make_unique<openPMD::Series>(filename, accessType);
 
-  // actually default is "particles"  by openPMD.
-  m_Series->setParticlesPath("particles");
+    // input file / simulation setup author
+    if( WarpX::authors.size() > 0u )
+        m_Series->setAuthor( WarpX::authors );
+    // more natural naming for PIC
+    m_Series->setMeshesPath( "fields" );
+    // conform to ED-PIC extension of openPMD
+    uint32_t const openPMD_ED_PIC = 1u;
+    m_Series->setOpenPMDextension( openPMD_ED_PIC );
+    // meta info
+    m_Series->setSoftware( "WarpX" );
+    m_Series->setSoftwareVersion( WarpX::Version() ) ;
 }
 
 
@@ -89,23 +181,27 @@ WarpXOpenPMDPlot::WriteOpenPMDParticles(const std::unique_ptr<MultiParticleConta
     auto& pc  = mpc->GetUniqueContainer(i);
     if (pc->plot_species) {
 
+      // names of amrex::Real and int particle attributes in SoA data
       amrex::Vector<std::string> real_names;
       amrex::Vector<std::string> int_names;
       amrex::Vector<int> int_flags;
 
-      real_names.push_back("weight");
+      // see openPMD ED-PIC extension for namings
+      // note: an underscore separates the record name from its component
+      //       for non-scalar records
+      real_names.push_back("weighting");
 
       real_names.push_back("momentum_x");
       real_names.push_back("momentum_y");
       real_names.push_back("momentum_z");
 
-      real_names.push_back("Ex");
-      real_names.push_back("Ey");
-      real_names.push_back("Ez");
+      real_names.push_back("E_x");
+      real_names.push_back("E_y");
+      real_names.push_back("E_z");
 
-      real_names.push_back("Bx");
-      real_names.push_back("By");
-      real_names.push_back("Bz");
+      real_names.push_back("B_x");
+      real_names.push_back("B_y");
+      real_names.push_back("B_z");
 
 #ifdef WARPX_DIM_RZ
       real_names.push_back("theta");
@@ -159,21 +255,55 @@ WarpXOpenPMDPlot::SavePlotFile (const std::unique_ptr<WarpXParticleContainer>& p
 
   openPMD::Iteration currIteration = m_Series->iterations[iteration];
   openPMD::ParticleSpecies currSpecies = currIteration.particles[name];
+  // meta data for ED-PIC extension
+  currSpecies.setAttribute( "particleShape", double( WarpX::noz ) );
+  // TODO allow this per direction in the openPMD standard, ED-PIC extension?
+  currSpecies.setAttribute( "particleShapes", [](){
+      return std::vector< double >{
+          double(WarpX::nox),
+#if AMREX_SPACEDIM==3
+          double(WarpX::noy),
+#endif
+          double(WarpX::noz)
+      };
+  }() );
+  currSpecies.setAttribute( "particlePush", [](){
+      switch( WarpX::particle_pusher_algo ) {
+          case ParticlePusherAlgo::Boris : return "Boris";
+          case ParticlePusherAlgo::Vay : return "Vay";
+          case ParticlePusherAlgo::HigueraCary : return "HigueraCary";
+          default: return "other";
+      }
+  }() );
+  currSpecies.setAttribute( "particleInterpolation", [](){
+      switch( WarpX::field_gathering_algo ) {
+          case GatheringAlgo::EnergyConserving : return "energyConserving";
+          case GatheringAlgo::MomentumConserving : return "momentumConserving";
+          default: return "other";
+      }
+  }() );
+  currSpecies.setAttribute( "particleSmoothing", "none" );
+  currSpecies.setAttribute( "currentDeposition", [](){
+      switch( WarpX::current_deposition_algo ) {
+          case CurrentDepositionAlgo::Esirkepov : return "Esirkepov";
+          default: return "directMorseNielson";
+      }
+  }() );
+
   //
   // define positions & offsets
   //
-  SetupPos(currSpecies, counter.GetTotalNumParticles());
+  SetupPos(pc, currSpecies, counter.GetTotalNumParticles());
   SetupRealProperties(currSpecies, write_real_comp, real_comp_names, counter.GetTotalNumParticles());
 
   // forces the files created by all processors! this is the key to resolve RZ storage issue!!
   m_Series->flush();
   for (auto currentLevel = 0; currentLevel <= pc->finestLevel(); currentLevel++)
     {
-      //long numParticles = counter.m_ParticleSizeAtRank[currentLevel]
-      unsigned long long const numParticles = counter.m_ParticleSizeAtRank[currentLevel];
-      unsigned long long offset = counter.m_ParticleOffsetAtRank[currentLevel];
+      uint64_t const numParticles = static_cast<uint64_t>( counter.m_ParticleSizeAtRank[currentLevel] );
+      uint64_t offset = static_cast<uint64_t>( counter.m_ParticleOffsetAtRank[currentLevel] );
 
-      if (0 == numParticles)
+      if (0u == numParticles)
           return;
 
       // pc->NumIntComp() & NumRealComp() are protected,
@@ -181,22 +311,33 @@ WarpXOpenPMDPlot::SavePlotFile (const std::unique_ptr<WarpXParticleContainer>& p
       // soa num real attributes = PIdx::nattribs, and num int in soa is 0
 
       for (WarpXParIter pti(*pc, currentLevel); pti.isValid(); ++pti) {
-         auto numParticleOnTile = pti.numParticles();
+         auto const numParticleOnTile = pti.numParticles();
+         uint64_t const numParticleOnTile64 = static_cast<uint64_t>( numParticleOnTile );
 
-         // get position from aos
+         // get position and particle ID from aos
+         // note: this implementation iterates the AoS 4x but saves memory... iterating once could be beneficial, too
          const auto& aos = pti.GetArrayOfStructs();  // size =  numParticlesOnTile
-         { // :: Save Postions, 1D-3D::
-           // this code is tested  with laser 3d,  not tested with 2D examples...
+         {
+           // Save positions
            std::vector<std::string> axisNames={"x", "y", "z"};
-
            for (auto currDim = 0; currDim < AMREX_SPACEDIM; currDim++) {
-                std::vector<amrex::ParticleReal> curr(numParticleOnTile, 0);
+                std::vector<amrex::ParticleReal> curr(numParticleOnTile, 0.);
                 for (auto i=0; i<numParticleOnTile; i++) {
                      curr[i] = aos[i].m_rdata.pos[currDim];
                 }
-                currSpecies["position"][axisNames[currDim]].storeChunk(curr, {offset}, {static_cast<unsigned long long>(numParticleOnTile)});
+                currSpecies["position"][axisNames[currDim]].storeChunk(curr, {offset}, {numParticleOnTile64});
                 m_Series->flush();
            }
+
+           // save particle ID after converting it to a globally unique ID
+           std::vector<uint64_t> ids(numParticleOnTile, 0.);
+           for (auto i=0; i<numParticleOnTile; i++) {
+               detail::GlobalID const nextID = { aos[i].m_idata.id, aos[i].m_idata.cpu };
+               ids[i] = nextID.global_id;
+           }
+           auto const scalar = openPMD::RecordComponent::SCALAR;
+           currSpecies["id"][scalar].storeChunk(ids, {offset}, {numParticleOnTile64});
+           m_Series->flush();
         }
          //  save properties
          SaveRealProperty(pti,
@@ -204,7 +345,7 @@ WarpXOpenPMDPlot::SavePlotFile (const std::unique_ptr<WarpXParticleContainer>& p
              offset,
              write_real_comp, real_comp_names);
 
-         offset += numParticleOnTile;
+         offset += numParticleOnTile64;
       }
     }
 }
@@ -224,8 +365,11 @@ WarpXOpenPMDPlot::SetupRealProperties(openPMD::ParticleSpecies& currSpecies,
   auto counter = std::min(write_real_comp.size(), real_comp_names.size());
   for (int i = 0; i < counter; ++i)
     if (write_real_comp[i]) {
-      auto& particleVar = currSpecies[real_comp_names[i]];
-      auto& particleVarComp = particleVar[openPMD::RecordComponent::SCALAR];
+      // handle scalar and non-scalar records by name
+      std::string record_name, component_name;
+      std::tie(record_name, component_name) = detail::name2openPMD(real_comp_names[i]);
+
+      auto& particleVarComp = currSpecies[record_name][component_name];
       particleVarComp.resetDataset(particlesLineup);
     }
 }
@@ -233,35 +377,40 @@ WarpXOpenPMDPlot::SetupRealProperties(openPMD::ParticleSpecies& currSpecies,
 void
 WarpXOpenPMDPlot::SaveRealProperty(WarpXParIter& pti,
                        openPMD::ParticleSpecies& currSpecies,
-                       unsigned long long offset,
-                       const amrex::Vector<int>& write_real_comp,
-                       const amrex::Vector<std::string>& real_comp_names) const
+                       unsigned long long const offset,
+                       amrex::Vector<int> const& write_real_comp,
+                       amrex::Vector<std::string> const& real_comp_names) const
 
 {
   int numOutputReal = 0;
   int const totalRealAttrs = m_NumAoSRealAttributes + m_NumSoARealAttributes;
 
-  for (int i = 0; i < totalRealAttrs; ++i)
-    if (write_real_comp[i])
+  for( int i = 0; i < totalRealAttrs; ++i )
+    if( write_real_comp[i] )
       ++numOutputReal;
 
-  auto numParticleOnTile = pti.numParticles();
-  const auto& aos = pti.GetArrayOfStructs();  // size =  numParticlesOnTile
-  const auto& soa = pti.GetStructOfArrays();
+  auto const numParticleOnTile = pti.numParticles();
+  auto const& aos = pti.GetArrayOfStructs();  // size =  numParticlesOnTile
+  auto const& soa = pti.GetStructOfArrays();
 
   // properties are saved separately
   {
-    for (auto idx=0; idx<m_NumAoSRealAttributes; idx++) {
-      if (write_real_comp[idx]) {
-          auto& currVar = currSpecies[real_comp_names[idx]][openPMD::RecordComponent::SCALAR];
-          typename amrex::ParticleReal *d =
-                 static_cast<typename amrex::ParticleReal*> (malloc(sizeof(typename amrex::ParticleReal) *  numParticleOnTile));
-
-          for (auto kk=0; kk<numParticleOnTile; kk++)
+    for( auto idx=0; idx<m_NumAoSRealAttributes; idx++ ) {
+      if( write_real_comp[idx] ) {
+          // handle scalar and non-scalar records by name
+          std::string record_name, component_name;
+          std::tie(record_name, component_name) = detail::name2openPMD(real_comp_names[idx]);
+          auto& currRecord = currSpecies[record_name];
+          auto& currRecordComp = currRecord[component_name];
+
+          amrex::ParticleReal *d =
+              static_cast<amrex::ParticleReal*>( malloc(sizeof(amrex::ParticleReal) *  numParticleOnTile) );
+
+          for( auto kk=0; kk<numParticleOnTile; kk++ )
                d[kk] = aos[kk].m_rdata.arr[AMREX_SPACEDIM+idx];
 
-          std::shared_ptr <typename amrex::ParticleReal> data(d, free);
-          currVar.storeChunk(data,
+          std::shared_ptr<amrex::ParticleReal> data(d, free);
+          currRecordComp.storeChunk(data,
                {offset}, {static_cast<unsigned long long>(numParticleOnTile)});
           m_Series->flush();
       }
@@ -269,12 +418,30 @@ WarpXOpenPMDPlot::SaveRealProperty(WarpXParIter& pti,
   }
 
   {
+    std::set< std::string > addedRecords; // add meta-data per record only once
     for (auto idx=0; idx<m_NumSoARealAttributes; idx++) {
       auto ii = m_NumAoSRealAttributes + idx;
       if (write_real_comp[ii]) {
-          auto& currVar = currSpecies[real_comp_names[ii]][openPMD::RecordComponent::SCALAR];
-          currVar.storeChunk(openPMD::shareRaw(soa.GetRealData(idx)),
-                             {offset}, {static_cast<unsigned long long>(numParticleOnTile)});
+          // handle scalar and non-scalar records by name
+          std::string record_name, component_name;
+          std::tie(record_name, component_name) = detail::name2openPMD(real_comp_names[ii]);
+          auto& currRecord = currSpecies[record_name];
+          auto& currRecordComp = currRecord[component_name];
+
+          // meta data for ED-PIC extension
+          bool newRecord = false;
+          std::tie(std::ignore, newRecord) = addedRecords.insert(record_name);
+          if( newRecord ) {
+              currRecord.setAttribute( "macroWeighted", 0u );
+              if( record_name == "momentum" )
+                  currRecord.setAttribute( "weightingPower", 1.0 );
+              else
+                  currRecord.setAttribute( "weightingPower", 0.0 );
+              currRecord.setUnitDimension( detail::getUnitDimension(record_name) );
+          }
+
+          currRecordComp.storeChunk(openPMD::shareRaw(soa.GetRealData(idx)),
+              {offset}, {static_cast<unsigned long long>(numParticleOnTile)});
       }
     }
     m_Series->flush();
@@ -284,25 +451,44 @@ WarpXOpenPMDPlot::SaveRealProperty(WarpXParIter& pti,
 
 
 void
-WarpXOpenPMDPlot::SetupPos(openPMD::ParticleSpecies& currSpecies,
-               const unsigned long long& np) const
+WarpXOpenPMDPlot::SetupPos(const std::unique_ptr<WarpXParticleContainer>& pc,
+    openPMD::ParticleSpecies& currSpecies,
+    const unsigned long long& np) const
 {
-  auto particleLineup = openPMD::Dataset(openPMD::determineDatatype<amrex::ParticleReal>(), {np});
-
-  currSpecies["positionOffset"]["x"].resetDataset(particleLineup);
-  currSpecies["positionOffset"]["x"].makeConstant(0);
-  currSpecies["positionOffset"]["y"].resetDataset(particleLineup);
-  currSpecies["positionOffset"]["y"].makeConstant(0);
-  currSpecies["positionOffset"]["z"].resetDataset(particleLineup);
-  currSpecies["positionOffset"]["z"].makeConstant(0);
-
-  //auto positions = openPMD::Dataset(openPMD::determineDatatype<amrex::ParticleReal>(), {np});
-  currSpecies["position"]["x"].resetDataset(particleLineup);
-  currSpecies["position"]["y"].resetDataset(particleLineup);
-  currSpecies["position"]["z"].resetDataset(particleLineup);
-}
+  auto const realType = openPMD::Dataset(openPMD::determineDatatype<amrex::ParticleReal>(), {np});
+  auto const idType = openPMD::Dataset(openPMD::determineDatatype< uint64_t >(), {np});
 
+  for( auto const& comp : {"x", "y", "z"} ) {
+      currSpecies["positionOffset"][comp].resetDataset( realType );
+      currSpecies["positionOffset"][comp].makeConstant( 0. );
+      currSpecies["position"][comp].resetDataset( realType );
+  }
 
+  auto const scalar = openPMD::RecordComponent::SCALAR;
+  currSpecies["id"][scalar].resetDataset( idType );
+  currSpecies["charge"][scalar].resetDataset( realType );
+  currSpecies["charge"][scalar].makeConstant( pc->getCharge() );
+  currSpecies["mass"][scalar].resetDataset( realType );
+  currSpecies["mass"][scalar].makeConstant( pc->getMass() );
+
+  // meta data
+  currSpecies["position"].setUnitDimension( detail::getUnitDimension("position") );
+  currSpecies["positionOffset"].setUnitDimension( detail::getUnitDimension("positionOffset") );
+  currSpecies["charge"].setUnitDimension( detail::getUnitDimension("charge") );
+  currSpecies["mass"].setUnitDimension( detail::getUnitDimension("mass") );
+
+  // meta data for ED-PIC extension
+  currSpecies["position"].setAttribute( "macroWeighted", 0u );
+  currSpecies["position"].setAttribute( "weightingPower", 0.0 );
+  currSpecies["positionOffset"].setAttribute( "macroWeighted", 0u );
+  currSpecies["positionOffset"].setAttribute( "weightingPower", 0.0 );
+  currSpecies["id"].setAttribute( "macroWeighted", 0u );
+  currSpecies["id"].setAttribute( "weightingPower", 0.0 );
+  currSpecies["charge"].setAttribute( "macroWeighted", 0u );
+  currSpecies["charge"].setAttribute( "weightingPower", 1.0 );
+  currSpecies["mass"].setAttribute( "macroWeighted", 0u );
+  currSpecies["mass"].setAttribute( "weightingPower", 1.0 );
+}
 
 
 //
@@ -316,48 +502,109 @@ WarpXOpenPMDPlot::WriteOpenPMDFields( //const std::string& filename,
                       const int iteration,
                       const double time ) const
 {
-  //This is AmrEx's tiny profiler. Possibly will apply it later
+  //This is AMReX's tiny profiler. Possibly will apply it later
   BL_PROFILE("WarpXOpenPMDPlot::WriteOpenPMDFields()");
 
   if ( nullptr == m_Series)
     return;
 
-  const int ncomp = mf.nComp();
+  int const ncomp = mf.nComp();
 
   // Create a few vectors that store info on the global domain
   // Swap the indices for each of them, since AMReX data is Fortran order
   // and since the openPMD API assumes contiguous C order
   // - Size of the box, in integer number of cells
-  const amrex::Box& global_box = geom.Domain();
-  auto global_size = getReversedVec(global_box.size());
+  amrex::Box const & global_box = geom.Domain();
+  auto const global_size = getReversedVec(global_box.size());
   // - Grid spacing
-  std::vector<double> grid_spacing = getReversedVec(geom.CellSize());
+  std::vector<double> const grid_spacing = getReversedVec(geom.CellSize());
   // - Global offset
-  std::vector<double> global_offset = getReversedVec(geom.ProbLo());
+  std::vector<double> const global_offset = getReversedVec(geom.ProbLo());
   // - AxisLabels
 #if AMREX_SPACEDIM==3
-  std::vector<std::string> axis_labels{"x", "y", "z"};
+  std::vector<std::string> const axis_labels{"x", "y", "z"};
 #else
-  std::vector<std::string> axis_labels{"x", "z"};
+  std::vector<std::string> const axis_labels{"x", "z"};
 #endif
 
   // Prepare the type of dataset that will be written
-  openPMD::Datatype datatype = openPMD::determineDatatype<amrex::Real>();
-  auto dataset = openPMD::Dataset(datatype, global_size);
+  openPMD::Datatype const datatype = openPMD::determineDatatype<amrex::Real>();
+  auto const dataset = openPMD::Dataset(datatype, global_size);
 
+  // meta data
   auto series_iteration = m_Series->iterations[iteration];
   series_iteration.setTime( time );
 
+  // meta data for ED-PIC extension
+  auto const period = geom.periodicity(); // TODO double-check: is this the proper global bound or of some level?
+  std::vector< std::string > fieldBoundary( 6, "reflecting" );
+  std::vector< std::string > particleBoundary( 6, "absorbing" );
+#if AMREX_SPACEDIM!=3
+    fieldBoundary.resize(4);
+    particleBoundary.resize(4);
+#endif
+
+  for( auto i = 0u; i < fieldBoundary.size() / 2u; ++i )
+      if( m_fieldPMLdirections.at( i ) )
+          fieldBoundary.at( i ) = "open";
+
+  for( auto i = 0u; i < fieldBoundary.size() / 2u; ++i )
+      if( period.isPeriodic( i ) ) {
+          fieldBoundary.at(2u*i     ) = "periodic";
+          fieldBoundary.at(2u*i + 1u) = "periodic";
+          particleBoundary.at(2u*i     ) = "periodic";
+          particleBoundary.at(2u*i + 1u) = "periodic";
+      }
+
+  auto meshes = series_iteration.meshes;
+  meshes.setAttribute( "fieldSolver", [](){
+#ifdef WARPX_USE_PSATD
+      return "PSATD"; // TODO double-check if WARPX_USE_PSATD_HYBRID is covered
+#else
+      switch( WarpX::particle_pusher_algo ) {
+          case MaxwellSolverAlgo::Yee : return "Yee";
+          case MaxwellSolverAlgo::CKC : return "CK";
+          default: return "other";
+      }
+#endif
+  }() );
+  meshes.setAttribute( "fieldBoundary", fieldBoundary );
+  meshes.setAttribute( "particleBoundary", particleBoundary );
+  meshes.setAttribute( "currentSmoothing", [](){
+      if( WarpX::use_filter ) return "Binomial";
+          else return "none";
+  }() );
+    if( WarpX::use_filter )
+        meshes.setAttribute( "currentSmoothingParameters", [](){
+            std::stringstream ss;
+            ss << "period=1;compensator=false";
+            ss << ";numPasses_x=" << WarpX::filter_npass_each_dir[0];
+#if (AMREX_SPACEDIM == 3)
+            ss << ";numPasses_y=" << WarpX::filter_npass_each_dir[1];
+            ss << ";numPasses_z=" << WarpX::filter_npass_each_dir[2];
+#else
+            ss << ";numPasses_z=" << WarpX::filter_npass_each_dir[1];
+#endif
+            std::string currentSmoothingParameters = ss.str();
+            return std::move(currentSmoothingParameters);
+        }() );
+  meshes.setAttribute("chargeCorrection", [](){
+      if( WarpX::do_dive_cleaning ) return "hyperbolic"; // TODO or "spectral" or something? double-check
+      else return "none";
+  }() );
+  if( WarpX::do_dive_cleaning )
+    meshes.setAttribute("chargeCorrectionParameters", "period=1");
+
   // Loop through the different components, i.e. different fields stored in mf
   for (int icomp=0; icomp<ncomp; icomp++){
 
     // Check if this field is a vector or a scalar, and extract the field name
-    const std::string& varname = varnames[icomp];
+    std::string const & varname = varnames[icomp];
     std::string field_name = varname;
     std::string comp_name = openPMD::MeshRecordComponent::SCALAR;
-    for (const char* vector_field: {"E", "B", "j"}){
-        for (const char* comp: {"x", "y", "z"}){
-            if (varname[0] == *vector_field && varname[1] == *comp ){
+    for( char const* vector_field: {"E", "B", "j"} ) {
+        for( char const* comp: {"x", "y", "z"} ) {
+            if( varname[0] == *vector_field && varname[1] == *comp ) {
                 field_name = varname[0] + varname.substr(2); // Strip component
                 comp_name = varname[1];
             }
@@ -365,35 +612,36 @@ WarpXOpenPMDPlot::WriteOpenPMDFields( //const std::string& filename,
     }
 
     // Setup the mesh record accordingly
-    auto mesh = series_iteration.meshes[field_name];
-    mesh.setDataOrder(openPMD::Mesh::DataOrder::F); // MultiFab: Fortran order
+    auto mesh = meshes[field_name];
+    mesh.setDataOrder( openPMD::Mesh::DataOrder::F ); // MultiFab: Fortran order of indices and axes
     mesh.setAxisLabels( axis_labels );
     mesh.setGridSpacing( grid_spacing );
     mesh.setGridGlobalOffset( global_offset );
+    mesh.setAttribute( "fieldSmoothing", "none" );
     setOpenPMDUnit( mesh, field_name );
 
     // Create a new mesh record component, and store the associated metadata
     auto mesh_comp = mesh[comp_name];
     mesh_comp.resetDataset( dataset );
     // Cell-centered data: position is at 0.5 of a cell size.
-    mesh_comp.setPosition(std::vector<double>{AMREX_D_DECL(0.5, 0.5, 0.5)});
+    mesh_comp.setPosition( std::vector<double>{AMREX_D_DECL(0.5, 0.5, 0.5)} );
 
     // Loop through the multifab, and store each box as a chunk,
     // in the openPMD file.
-    for (amrex::MFIter mfi(mf); mfi.isValid(); ++mfi) {
+    for( amrex::MFIter mfi(mf); mfi.isValid(); ++mfi ) {
 
-      const amrex::FArrayBox& fab = mf[mfi];
-      const amrex::Box& local_box = fab.box();
+      amrex::FArrayBox const& fab = mf[mfi];
+      amrex::Box const& local_box = fab.box();
 
       // Determine the offset and size of this chunk
-      amrex::IntVect box_offset = local_box.smallEnd() - global_box.smallEnd();
-      auto chunk_offset = getReversedVec(box_offset);
-      auto chunk_size = getReversedVec(local_box.size());
+      amrex::IntVect const box_offset = local_box.smallEnd() - global_box.smallEnd();
+      auto const chunk_offset = getReversedVec( box_offset );
+      auto const chunk_size = getReversedVec( local_box.size() );
 
       // Write local data
-      const double* local_data = fab.dataPtr(icomp);
-      mesh_comp.storeChunk(openPMD::shareRaw(local_data),
-                           chunk_offset, chunk_size);
+      double const * local_data = fab.dataPtr( icomp );
+      mesh_comp.storeChunk( openPMD::shareRaw(local_data),
+                            chunk_offset, chunk_size );
     }
   }
   // Flush data to disk after looping over all components
diff --git a/Source/Diagnostics/requirements.txt b/Source/Diagnostics/requirements.txt
new file mode 100644
index 000000000..2c29c7422
--- /dev/null
+++ b/Source/Diagnostics/requirements.txt
@@ -0,0 +1,2 @@
+# keep this entry for GitHub's dependency graph
+openPMD-api>=0.10.3
diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp
index b35b3a6ed..1cb17287b 100644
--- a/Source/Evolve/WarpXEvolveEM.cpp
+++ b/Source/Evolve/WarpXEvolveEM.cpp
@@ -5,6 +5,7 @@
 #include <WarpXConst.H>
 #include <WarpX_f.H>
 #include <WarpXUtil.H>
+#include <WarpXAlgorithmSelection.H>
 #ifdef WARPX_USE_PY
 #include <WarpX_py.H>
 #endif
@@ -75,8 +76,9 @@ WarpX::EvolveEM (int numsteps)
         // Particles have p^{n} and x^{n}.
         // is_synchronized is true.
         if (is_synchronized) {
-            FillBoundaryE();
-            FillBoundaryB();
+            // Not called at each iteration, so exchange all guard cells
+            FillBoundaryE(guard_cells.ng_alloc_EB, guard_cells.ng_Extra);
+            FillBoundaryB(guard_cells.ng_alloc_EB, guard_cells.ng_Extra);
             UpdateAuxilaryData();
             // on first step, push p by -0.5*dt
             for (int lev = 0; lev <= finest_level; ++lev) {
@@ -89,14 +91,23 @@ WarpX::EvolveEM (int numsteps)
         } else {
             // Beyond one step, we have E^{n} and B^{n}.
             // Particles have p^{n-1/2} and x^{n}.
-            FillBoundaryE();
-            FillBoundaryB();
-            UpdateAuxilaryData();
 
+            // E and B are up-to-date inside the domain only
+            FillBoundaryE(guard_cells.ng_FieldGather, guard_cells.ng_Extra);
+            FillBoundaryB(guard_cells.ng_FieldGather, guard_cells.ng_Extra);
+            // E and B: enough guard cells to update Aux or call Field Gather in fp and cp
+            // Need to update Aux on lower levels, to interpolate to higher levels.
+#ifndef WARPX_USE_PSATD
+            FillBoundaryAux(guard_cells.ng_UpdateAux);
+#endif
+            UpdateAuxilaryData();
         }
 
         if (do_subcycling == 0 || finest_level == 0) {
             OneStep_nosub(cur_time);
+            // E : guard cells are up-to-date
+            // B : guard cells are NOT up-to-date
+            // F : guard cells are NOT up-to-date
         } else if (do_subcycling == 1 && finest_level == 1) {
             OneStep_sub1(cur_time);
         } else {
@@ -106,6 +117,8 @@ WarpX::EvolveEM (int numsteps)
 
         if (num_mirrors>0){
             applyMirrors(cur_time);
+            // E : guard cells are NOT up-to-date
+            // B : guard cells are NOT up-to-date
         }
 
 #ifdef WARPX_USE_PY
@@ -185,8 +198,14 @@ WarpX::EvolveEM (int numsteps)
         // slice gen //
         if (to_make_plot || do_insitu || to_make_slice_plot)
         {
-            FillBoundaryE();
-            FillBoundaryB();
+            // This is probably overkill, but it's not called often
+            FillBoundaryE(guard_cells.ng_alloc_EB, guard_cells.ng_Extra);
+            // This is probably overkill, but it's not called often
+            FillBoundaryB(guard_cells.ng_alloc_EB, guard_cells.ng_Extra);
+            // This is probably overkill, but it's not called often
+#ifndef WARPX_USE_PSATD
+            FillBoundaryAux(guard_cells.ng_UpdateAux);
+#endif
             UpdateAuxilaryData();
 
             for (int lev = 0; lev <= finest_level; ++lev) {
@@ -237,8 +256,14 @@ WarpX::EvolveEM (int numsteps)
 
     if (write_plot_file || do_insitu)
     {
-        FillBoundaryE();
-        FillBoundaryB();
+        // This is probably overkill, but it's not called often
+        FillBoundaryE(guard_cells.ng_alloc_EB, guard_cells.ng_Extra);
+        // This is probably overkill, but it's not called often
+        FillBoundaryB(guard_cells.ng_alloc_EB, guard_cells.ng_Extra);
+        // This is probably overkill
+#ifndef WARPX_USE_PSATD
+        FillBoundaryAux(guard_cells.ng_UpdateAux);
+#endif
         UpdateAuxilaryData();
 
         for (int lev = 0; lev <= finest_level; ++lev) {
@@ -300,6 +325,10 @@ WarpX::OneStep_nosub (Real cur_time)
 
     SyncRho();
 
+    // At this point, J is up-to-date inside the domain, and E and B are
+    // up-to-date including enough guard cells for first step of the field
+    // solve.
+
     // For extended PML: copy J from regular grid to PML, and damp J in PML
     if (do_pml && pml_has_particles) CopyJPML();
     if (do_pml && do_pml_j_damping) DampJPML();
@@ -309,24 +338,28 @@ WarpX::OneStep_nosub (Real cur_time)
 #ifdef WARPX_USE_PSATD
     PushPSATD(dt[0]);
     if (do_pml) DampPML();
-    FillBoundaryE();
-    FillBoundaryB();
+    FillBoundaryE(guard_cells.ng_alloc_EB, guard_cells.ng_Extra);
+    FillBoundaryB(guard_cells.ng_alloc_EB, guard_cells.ng_Extra);
 #else
     EvolveF(0.5*dt[0], DtType::FirstHalf);
-    FillBoundaryF();
+    FillBoundaryF(guard_cells.ng_FieldSolverF);
     EvolveB(0.5*dt[0]); // We now have B^{n+1/2}
-    FillBoundaryB();
 
+    FillBoundaryB(guard_cells.ng_FieldSolver, IntVect::TheZeroVector());
     EvolveE(dt[0]); // We now have E^{n+1}
-    FillBoundaryE();
+
+    FillBoundaryE(guard_cells.ng_FieldSolver, IntVect::TheZeroVector());
     EvolveF(0.5*dt[0], DtType::SecondHalf);
     EvolveB(0.5*dt[0]); // We now have B^{n+1}
     if (do_pml) {
+        FillBoundaryF(guard_cells.ng_alloc_F);
         DampPML();
-        FillBoundaryE();
+        FillBoundaryE(guard_cells.ng_MovingWindow, IntVect::TheZeroVector());
+        FillBoundaryF(guard_cells.ng_MovingWindow);
+        FillBoundaryB(guard_cells.ng_MovingWindow, IntVect::TheZeroVector());
     }
-    FillBoundaryB();
-
+    // E and B are up-to-date in the domain, but all guard cells are
+    // outdated.
 #endif
 }
 
@@ -345,11 +378,12 @@ WarpX::OneStep_nosub (Real cur_time)
 * steps of the fine grid.
 *
 */
+
+
 void
 WarpX::OneStep_sub1 (Real curtime)
 {
     // TODO: we could save some charge depositions
-
     // Loop over species. For each ionizable species, create particles in
     // product species.
     mypc->doFieldIonization();
@@ -369,21 +403,22 @@ WarpX::OneStep_sub1 (Real curtime)
 
     EvolveB(fine_lev, PatchType::fine, 0.5*dt[fine_lev]);
     EvolveF(fine_lev, PatchType::fine, 0.5*dt[fine_lev], DtType::FirstHalf);
-    FillBoundaryB(fine_lev, PatchType::fine);
-    FillBoundaryF(fine_lev, PatchType::fine);
+    FillBoundaryB(fine_lev, PatchType::fine, guard_cells.ng_FieldSolver);
+    FillBoundaryF(fine_lev, PatchType::fine, guard_cells.ng_alloc_F);
 
     EvolveE(fine_lev, PatchType::fine, dt[fine_lev]);
-    FillBoundaryE(fine_lev, PatchType::fine);
+    FillBoundaryE(fine_lev, PatchType::fine, guard_cells.ng_FieldGather);
 
     EvolveB(fine_lev, PatchType::fine, 0.5*dt[fine_lev]);
     EvolveF(fine_lev, PatchType::fine, 0.5*dt[fine_lev], DtType::SecondHalf);
 
     if (do_pml) {
+        FillBoundaryF(fine_lev, PatchType::fine, guard_cells.ng_alloc_F);
         DampPML(fine_lev, PatchType::fine);
-        FillBoundaryE(fine_lev, PatchType::fine);
+        FillBoundaryE(fine_lev, PatchType::fine, guard_cells.ng_FieldGather);
     }
 
-    FillBoundaryB(fine_lev, PatchType::fine);
+    FillBoundaryB(fine_lev, PatchType::fine, guard_cells.ng_FieldGather);
 
     // ii) Push particles on the coarse patch and mother grid.
     // Push the fields on the coarse patch and mother grid
@@ -395,20 +430,21 @@ WarpX::OneStep_sub1 (Real curtime)
 
     EvolveB(fine_lev, PatchType::coarse, dt[fine_lev]);
     EvolveF(fine_lev, PatchType::coarse, dt[fine_lev], DtType::FirstHalf);
-    FillBoundaryB(fine_lev, PatchType::coarse);
-    FillBoundaryF(fine_lev, PatchType::coarse);
+    FillBoundaryB(fine_lev, PatchType::coarse, guard_cells.ng_FieldGather);
+    FillBoundaryF(fine_lev, PatchType::coarse, guard_cells.ng_FieldSolverF);
 
     EvolveE(fine_lev, PatchType::coarse, dt[fine_lev]);
-    FillBoundaryE(fine_lev, PatchType::coarse);
+    FillBoundaryE(fine_lev, PatchType::coarse, guard_cells.ng_FieldGather);
 
     EvolveB(coarse_lev, PatchType::fine, 0.5*dt[coarse_lev]);
     EvolveF(coarse_lev, PatchType::fine, 0.5*dt[coarse_lev], DtType::FirstHalf);
-    FillBoundaryB(coarse_lev, PatchType::fine);
-    FillBoundaryF(coarse_lev, PatchType::fine);
+    FillBoundaryB(coarse_lev, PatchType::fine, guard_cells.ng_FieldGather + guard_cells.ng_Extra);
+    FillBoundaryF(coarse_lev, PatchType::fine, guard_cells.ng_FieldSolverF);
 
     EvolveE(coarse_lev, PatchType::fine, 0.5*dt[coarse_lev]);
-    FillBoundaryE(coarse_lev, PatchType::fine);
+    FillBoundaryE(coarse_lev, PatchType::fine, guard_cells.ng_FieldGather + guard_cells.ng_Extra);
 
+    FillBoundaryAux(guard_cells.ng_UpdateAux);
     // iii) Get auxiliary fields on the fine grid, at dt[fine_lev]
     UpdateAuxilaryData();
 
@@ -423,22 +459,21 @@ WarpX::OneStep_sub1 (Real curtime)
 
     EvolveB(fine_lev, PatchType::fine, 0.5*dt[fine_lev]);
     EvolveF(fine_lev, PatchType::fine, 0.5*dt[fine_lev], DtType::FirstHalf);
-    FillBoundaryB(fine_lev, PatchType::fine);
-    FillBoundaryF(fine_lev, PatchType::fine);
+    FillBoundaryB(fine_lev, PatchType::fine, guard_cells.ng_FieldSolver);
+    FillBoundaryF(fine_lev, PatchType::fine, guard_cells.ng_FieldSolverF);
 
     EvolveE(fine_lev, PatchType::fine, dt[fine_lev]);
-    FillBoundaryE(fine_lev, PatchType::fine);
+    FillBoundaryE(fine_lev, PatchType::fine, guard_cells.ng_FieldSolver);
 
     EvolveB(fine_lev, PatchType::fine, 0.5*dt[fine_lev]);
     EvolveF(fine_lev, PatchType::fine, 0.5*dt[fine_lev], DtType::SecondHalf);
 
     if (do_pml) {
         DampPML(fine_lev, PatchType::fine);
-        FillBoundaryE(fine_lev, PatchType::fine);
+        FillBoundaryE(fine_lev, PatchType::fine, guard_cells.ng_FieldSolver);
     }
 
-    FillBoundaryB(fine_lev, PatchType::fine);
-    FillBoundaryF(fine_lev, PatchType::fine);
+    FillBoundaryB(fine_lev, PatchType::fine, guard_cells.ng_FieldSolver);
 
     // v) Push the fields on the coarse patch and mother grid
     // by only half a coarse step (second half)
@@ -447,32 +482,38 @@ WarpX::OneStep_sub1 (Real curtime)
     AddRhoFromFineLevelandSumBoundary(coarse_lev, ncomps, ncomps);
 
     EvolveE(fine_lev, PatchType::coarse, dt[fine_lev]);
-    FillBoundaryE(fine_lev, PatchType::coarse);
+    FillBoundaryE(fine_lev, PatchType::coarse, guard_cells.ng_FieldSolver);
 
     EvolveB(fine_lev, PatchType::coarse, dt[fine_lev]);
     EvolveF(fine_lev, PatchType::coarse, dt[fine_lev], DtType::SecondHalf);
 
     if (do_pml) {
+        FillBoundaryF(fine_lev, PatchType::fine, guard_cells.ng_FieldSolverF);
         DampPML(fine_lev, PatchType::coarse); // do it twice
         DampPML(fine_lev, PatchType::coarse);
-        FillBoundaryE(fine_lev, PatchType::coarse);
+        FillBoundaryE(fine_lev, PatchType::coarse, guard_cells.ng_alloc_EB);
     }
 
-    FillBoundaryB(fine_lev, PatchType::coarse);
-    FillBoundaryF(fine_lev, PatchType::coarse);
+    FillBoundaryB(fine_lev, PatchType::coarse, guard_cells.ng_FieldSolver);
+
+    FillBoundaryF(fine_lev, PatchType::coarse, guard_cells.ng_FieldSolverF);
 
     EvolveE(coarse_lev, PatchType::fine, 0.5*dt[coarse_lev]);
-    FillBoundaryE(coarse_lev, PatchType::fine);
+    FillBoundaryE(coarse_lev, PatchType::fine, guard_cells.ng_FieldSolver);
 
     EvolveB(coarse_lev, PatchType::fine, 0.5*dt[coarse_lev]);
     EvolveF(coarse_lev, PatchType::fine, 0.5*dt[coarse_lev], DtType::SecondHalf);
 
     if (do_pml) {
+        if (do_moving_window){
+            // Exchance guard cells of PMLs only (0 cells are exchanged for the
+            // regular B field MultiFab). This is required as B and F have just been
+            // evolved.
+            FillBoundaryB(coarse_lev, PatchType::fine, IntVect::TheZeroVector());
+            FillBoundaryF(coarse_lev, PatchType::fine, IntVect::TheZeroVector());
+        }
         DampPML(coarse_lev, PatchType::fine);
-        FillBoundaryE(coarse_lev, PatchType::fine);
     }
-
-    FillBoundaryB(coarse_lev, PatchType::fine);
 }
 
 void
diff --git a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
index aee131324..c21388aba 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
@@ -144,12 +144,7 @@ SpectralKSpace::getSpectralShiftFactor( const DistributionMapping& dm,
         }
         const Complex I{0,1};
         for (int i=0; i<k.size(); i++ ){
-#ifdef AMREX_USE_GPU
-            shift[i] = thrust::exp( I*sign*k[i]*0.5*dx[i_dim] );
-#else
-            shift[i] = std::exp( I*sign*k[i]*0.5*dx[i_dim] );
-#endif
-
+            shift[i] = exp( I*sign*k[i]*0.5*dx[i_dim]);
         }
     }
     return shift_factor;
diff --git a/Source/Initialization/InjectorDensity.cpp b/Source/Initialization/InjectorDensity.cpp
index 9f711a7af..fa54b342c 100644
--- a/Source/Initialization/InjectorDensity.cpp
+++ b/Source/Initialization/InjectorDensity.cpp
@@ -36,8 +36,8 @@ InjectorDensity::sharedMemoryNeeded () const noexcept
     case Type::parser:
     {
         // For parser injector, the 3D position of each particle
-        // is stored in shared memory.
-        return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 3;
+        // and time, t, is stored in shared memory.
+        return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4;
     }
     default:
         return 0;
diff --git a/Source/Initialization/InjectorMomentum.cpp b/Source/Initialization/InjectorMomentum.cpp
index 8fadf0c4b..255883a34 100644
--- a/Source/Initialization/InjectorMomentum.cpp
+++ b/Source/Initialization/InjectorMomentum.cpp
@@ -30,9 +30,9 @@ InjectorMomentum::sharedMemoryNeeded () const noexcept
     {
     case Type::parser:
     {
-        // For parser injector, the 3D position of each particle
+        // For parser injector, the 3D position of each particle and time, t,
         // is stored in shared memory.
-        return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 3;
+        return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4;
     }
     default:
         return 0;
diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp
index be29a1cbc..48c30ae93 100644
--- a/Source/Initialization/WarpXInitData.cpp
+++ b/Source/Initialization/WarpXInitData.cpp
@@ -535,8 +535,8 @@ WarpX::InitializeExternalFieldsOnGridUsingParser (
                 mfzfab(i,j,k) = zfield_parser->getField(x,y,z);
             },
             /* To allocate shared memory for the GPU threads. */
-            /* But, for now only 3 doubles (x,y,z) are allocated. */
-            amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 3
+            /* But, for now only 4 doubles (x,y,z,t) are allocated. */
+            amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4
         );
 
     }
diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp
index ed9f5eda0..7a75b9f24 100644
--- a/Source/Laser/LaserParticleContainer.cpp
+++ b/Source/Laser/LaserParticleContainer.cpp
@@ -466,17 +466,19 @@ LaserParticleContainer::Evolve (int lev,
             // Current Deposition
             //
             // Deposit inside domains
-            int* ion_lev = nullptr;
-            DepositCurrent(pti, wp, uxp, uyp, uzp, ion_lev, &jx, &jy, &jz,
-                           0, np_current, thread_num,
-                           lev, lev, dt);
-
-            bool has_buffer = cjx;
-            if (has_buffer){
-                // Deposit in buffers
-                DepositCurrent(pti, wp, uxp, uyp, uzp, ion_lev, cjx, cjy, cjz,
-                               np_current, np-np_current, thread_num,
-                               lev, lev-1, dt);
+            {
+                int* ion_lev = nullptr;
+                DepositCurrent(pti, wp, uxp, uyp, uzp, ion_lev, &jx, &jy, &jz,
+                               0, np_current, thread_num,
+                               lev, lev, dt);
+
+                bool has_buffer = cjx;
+                if (has_buffer){
+                    // Deposit in buffers
+                    DepositCurrent(pti, wp, uxp, uyp, uzp, ion_lev, cjx, cjy, cjz,
+                                   np_current, np-np_current, thread_num,
+                                   lev, lev-1, dt);
+                }
             }
 
             //
diff --git a/Source/Parallelization/GuardCellManager.H b/Source/Parallelization/GuardCellManager.H
new file mode 100644
index 000000000..c57745b84
--- /dev/null
+++ b/Source/Parallelization/GuardCellManager.H
@@ -0,0 +1,76 @@
+#ifndef GUARDCELLMANAGER_H_
+#define GUARDCELLMANAGER_H_
+
+#include <AMReX_IntVect.H>
+
+/**
+ * \brief This class computes and stores the number of guard cells needed for
+ * the allocation of the MultiFabs and required for each part of the PIC loop.
+ */
+class guardCellManager{
+
+public:
+
+    /**
+     * \brief Initialize number of guard cells depending on the options used.
+     *
+     * \param do_subcycling bool, whether to use subcycling
+     * \param do_fdtd_nci_corr bool, whether to use Godfrey NCI corrector
+     * \param do_nodal bool, whether the field solver is nodal
+     * \param do_moving_window bool, whether to use moving window
+     * \param do_fft_mpi_dec bool, whether to do parallel FFTs for PSATD
+     * \param aux_is_nodal bool, true if the aux grid is nodal
+     * \param moving_window_dir direction of moving window
+     * \param nox order of current deposition
+     * \param nox_fft order of PSATD in x direction
+     * \param noy_fft order of PSATD in y direction
+     * \param noz_fft order of PSATD in z direction
+     * \param nci_corr_stencil stencil of NCI corrector
+     * \param maxwell_fdtd_solver_id if of Maxwell solver
+     * \param max_level max level of the simulation
+     */
+    void Init(
+        const bool do_subcycling,
+        const bool do_fdtd_nci_corr,
+        const bool do_nodal,
+        const bool do_moving_window,
+        const bool do_fft_mpi_dec,
+        const bool aux_is_nodal,
+        const int moving_window_dir,
+        const int nox,
+        const int nox_fft, const int noy_fft, const int noz_fft,
+        const int nci_corr_stencil,
+        const int maxwell_fdtd_solver_id,
+        const int max_level,
+        const bool exchange_all_guard_cells);
+
+    // Guard cells allocated for MultiFabs E and B
+    amrex::IntVect ng_alloc_EB = amrex::IntVect::TheZeroVector();
+    // Guard cells allocated for MultiFab J
+    amrex::IntVect ng_alloc_J = amrex::IntVect::TheZeroVector();
+    // Guard cells allocated for MultiFab Rho
+    amrex::IntVect ng_alloc_Rho = amrex::IntVect::TheZeroVector();
+    // Guard cells allocated for MultiFab F
+    amrex::IntVect ng_alloc_F = amrex::IntVect::TheZeroVector();
+
+    // Guard cells exchanged for specific parts of the PIC loop
+
+    // Number of guard cells of E and B that must exchanged before Field Solver
+    amrex::IntVect ng_FieldSolver = amrex::IntVect::TheZeroVector();
+    // Number of guard cells of F that must exchanged before Field Solver
+    amrex::IntVect ng_FieldSolverF = amrex::IntVect::TheZeroVector();
+    // Number of guard cells of E and B that must exchanged before Field Gather
+    amrex::IntVect ng_FieldGather = amrex::IntVect::TheZeroVector();
+    // Number of guard cells of E and B that must exchanged before updating the Aux grid
+    amrex::IntVect ng_UpdateAux = amrex::IntVect::TheZeroVector();
+    // Number of guard cells of all MultiFabs that must exchanged before moving window
+    amrex::IntVect ng_MovingWindow = amrex::IntVect::TheZeroVector();
+
+    // When the auxiliary grid is nodal but the field solver is staggered
+    // (typically with momentum-conserving gather with FDTD Yee solver),
+    // An extra guard cell is needed on the fine grid to do the interpolation
+    // for E and B.
+    amrex::IntVect ng_Extra = amrex::IntVect::TheZeroVector();
+};
+
+#endif // GUARDCELLMANAGER_H_
diff --git a/Source/Parallelization/GuardCellManager.cpp b/Source/Parallelization/GuardCellManager.cpp
new file mode 100644
index 000000000..a275f4c00
--- /dev/null
+++ b/Source/Parallelization/GuardCellManager.cpp
@@ -0,0 +1,171 @@
+#include "GuardCellManager.H"
+#include "NCIGodfreyFilter.H"
+#include <AMReX_Print.H>
+
+using namespace amrex;
+
+void
+guardCellManager::Init(
+    const bool do_subcycling,
+    const bool do_fdtd_nci_corr,
+    const bool do_nodal,
+    const bool do_moving_window,
+    const bool do_fft_mpi_dec,
+    const bool aux_is_nodal,
+    const int moving_window_dir,
+    const int nox,
+    const int nox_fft, const int noy_fft, const int noz_fft,
+    const int nci_corr_stencil,
+    const int maxwell_fdtd_solver_id,
+    const int max_level,
+    const bool exchange_all_guard_cells)
+{
+    // When using subcycling, the particles on the fine level perform two pushes
+    // before being redistributed ; therefore, we need one extra guard cell
+    // (the particles may move by 2*c*dt)
+    const int ngx_tmp = (max_level > 0 && do_subcycling == 1) ? nox+1 : nox;
+    const int ngy_tmp = (max_level > 0 && do_subcycling == 1) ? nox+1 : nox;
+    const int ngz_tmp = (max_level > 0 && do_subcycling == 1) ? nox+1 : nox;
+
+    // Ex, Ey, Ez, Bx, By, and Bz have the same number of ghost cells.
+    // jx, jy, jz and rho have the same number of ghost cells.
+    // E and B have the same number of ghost cells as j and rho if NCI filter is not used,
+    // but different number of ghost cells in z-direction if NCI filter is used.
+    // The number of cells should be even, in order to easily perform the
+    // interpolation from coarse grid to fine grid.
+    int ngx = (ngx_tmp % 2) ? ngx_tmp+1 : ngx_tmp;  // Always even number
+    int ngy = (ngy_tmp % 2) ? ngy_tmp+1 : ngy_tmp;  // Always even number
+    int ngz_nonci = (ngz_tmp % 2) ? ngz_tmp+1 : ngz_tmp;  // Always even number
+    int ngz;
+    if (do_fdtd_nci_corr) {
+        int ng = ngz_tmp + nci_corr_stencil;
+        ngz = (ng % 2) ? ng+1 : ng;
+    } else {
+        ngz = ngz_nonci;
+    }
+
+    // J is only interpolated from fine to coarse (not coarse to fine)
+    // and therefore does not need to be even.
+    int ngJx = ngx_tmp;
+    int ngJy = ngy_tmp;
+    int ngJz = ngz_tmp;
+
+    // When calling the moving window (with one level of refinement),  we shift
+    // the fine grid by 2 cells ; therefore, we need at least 2 guard cells
+    // on level 1. This may not be necessary for level 0.
+    if (do_moving_window) {
+        ngx = std::max(ngx,2);
+        ngy = std::max(ngy,2);
+        ngz = std::max(ngz,2);
+        ngJx = std::max(ngJx,2);
+        ngJy = std::max(ngJy,2);
+        ngJz = std::max(ngJz,2);
+    }
+
+#if (AMREX_SPACEDIM == 3)
+    ng_alloc_EB = IntVect(ngx,ngy,ngz);
+    ng_alloc_J = IntVect(ngJx,ngJy,ngJz);
+#elif (AMREX_SPACEDIM == 2)
+    ng_alloc_EB = IntVect(ngx,ngz);
+    ng_alloc_J = IntVect(ngJx,ngJz);
+#endif
+
+    ng_alloc_Rho = ng_alloc_J+1; //One extra ghost cell, so that it's safe to deposit charge density
+    // after pushing particle.
+    int ng_alloc_F_int = (do_moving_window) ? 2 : 0;
+    // CKC solver requires one additional guard cell
+    if (maxwell_fdtd_solver_id == 1) ng_alloc_F_int = std::max( ng_alloc_F_int, 1 );
+    ng_alloc_F = IntVect(AMREX_D_DECL(ng_alloc_F_int, ng_alloc_F_int, ng_alloc_F_int));
+
+#ifdef WARPX_USE_PSATD
+    if (do_fft_mpi_dec == false){
+        // All boxes should have the same number of guard cells
+        // (to avoid temporary parallel copies)
+        // Thus take the max of the required number of guards for each field
+        // Also: the number of guard cell should be enough to contain
+        // the stencil of the FFT solver. Here, this number (`ngFFT`)
+        // is determined *empirically* to be the order of the solver
+        // for nodal, and half the order of the solver for staggered.
+        IntVect ngFFT;
+        if (do_nodal) {
+            ngFFT = IntVect(AMREX_D_DECL(nox_fft, noy_fft, noz_fft));
+        } else {
+            ngFFT = IntVect(AMREX_D_DECL(nox_fft/2, noy_fft/2, noz_fft/2));
+        }
+        for (int i_dim=0; i_dim<AMREX_SPACEDIM; i_dim++ ){
+            int ng_required = ngFFT[i_dim];
+            // Get the max
+            ng_required = std::max( ng_required, ng_alloc_EB[i_dim] );
+            ng_required = std::max( ng_required, ng_alloc_J[i_dim] );
+            ng_required = std::max( ng_required, ng_alloc_Rho[i_dim] );
+            ng_required = std::max( ng_required, ng_alloc_F[i_dim] );
+            // Set the guard cells to this max
+            ng_alloc_EB[i_dim] = ng_required;
+            ng_alloc_J[i_dim] = ng_required;
+            ng_alloc_F[i_dim] = ng_required;
+            ng_alloc_Rho[i_dim] = ng_required;
+            ng_alloc_F_int = ng_required;
+        }
+    }
+    ng_alloc_F = IntVect(AMREX_D_DECL(ng_alloc_F_int, ng_alloc_F_int, ng_alloc_F_int));
+#endif
+
+    ng_Extra = IntVect(static_cast<int>(aux_is_nodal and !do_nodal));
+
+    // Compute number of cells required for Field Solver
+#ifdef WARPX_USE_PSATD
+    ng_FieldSolver = ng_alloc_EB;
+    ng_FieldSolverF = ng_alloc_EB;
+#else
+    ng_FieldSolver = IntVect(AMREX_D_DECL(1,1,1));
+    ng_FieldSolverF = IntVect(AMREX_D_DECL(1,1,1));
+#endif
+
+    if (exchange_all_guard_cells){
+        // Run in safe mode: exchange all allocated guard cells at each
+        // call of FillBoundary
+        ng_FieldSolver = ng_alloc_EB;
+        ng_FieldSolverF = ng_alloc_F;
+        ng_FieldGather = ng_alloc_EB;
+        ng_UpdateAux = ng_alloc_EB;
+        if (do_moving_window){
+            ng_MovingWindow = ng_alloc_EB;
+        }
+    } else {
+
+        ng_FieldSolver = ng_FieldSolver.min(ng_alloc_EB);
+
+        // Compute number of cells required for Field Gather
+        int FGcell[4] = {0,1,1,2}; // Index is nox
+        IntVect ng_FieldGather_noNCI = IntVect(AMREX_D_DECL(FGcell[nox],FGcell[nox],FGcell[nox]));
+        // Add one cell if momentum_conserving gather in a staggered-field simulation
+        ng_FieldGather_noNCI += ng_Extra;
+        // Not sure why, but need one extra guard cell when using MR
+        if (max_level >= 1) ng_FieldGather_noNCI += ng_Extra;
+        ng_FieldGather_noNCI = ng_FieldGather_noNCI.min(ng_alloc_EB);
+        // If NCI filter, add guard cells in the z direction
+        IntVect ng_NCIFilter = IntVect::TheZeroVector();
+        if (do_fdtd_nci_corr)
+            ng_NCIFilter[AMREX_SPACEDIM-1] = NCIGodfreyFilter::m_stencil_width;
+        // Note: communications of guard cells for bilinear filter are handled
+        // separately.
+        ng_FieldGather = ng_FieldGather_noNCI + ng_NCIFilter;
+
+        // Guard cells for auxiliary grid.
+        // Not sure why there is a 2* here...
+        ng_UpdateAux = 2*ng_FieldGather_noNCI + ng_NCIFilter;
+
+        // Make sure we do not exchange more guard cells than allocated.
+        ng_FieldGather = ng_FieldGather.min(ng_alloc_EB);
+        ng_UpdateAux = ng_UpdateAux.min(ng_alloc_EB);
+        ng_FieldSolverF = ng_FieldSolverF.min(ng_alloc_F);
+        // Only FillBoundary(ng_FieldGather) is called between consecutive
+        // field solves. So ng_FieldGather must have enough cells
+        // for the field solve too.
+        ng_FieldGather = ng_FieldGather.max(ng_FieldSolver);
+
+        if (do_moving_window){
+            ng_MovingWindow[moving_window_dir] = 1;
+        }
+    }
+}
diff --git a/Source/Parallelization/InterpolateCurrentFineToCoarse.H b/Source/Parallelization/InterpolateCurrentFineToCoarse.H
index cbbcdfab5..58451c6b7 100644
--- a/Source/Parallelization/InterpolateCurrentFineToCoarse.H
+++ b/Source/Parallelization/InterpolateCurrentFineToCoarse.H
@@ -63,9 +63,9 @@ public:
 
         // return zero for out-of-bounds accesses during interpolation
         // this is efficiently used as a method to add neutral elements beyond guards in the average below
-        auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l) noexcept
+        auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const jj, int const kk, int const ll) noexcept
         {
-            return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l) :  amrex::Real{0.};
+            return fine_unsafe.contains(jj, kk, ll) ? fine_unsafe(jj, kk, ll) :  amrex::Real{0.};
         };
 
         int const ii = i * m_refinement_ratio;
diff --git a/Source/Parallelization/Make.package b/Source/Parallelization/Make.package
index 7c3c38627..065556b33 100644
--- a/Source/Parallelization/Make.package
+++ b/Source/Parallelization/Make.package
@@ -1,7 +1,9 @@
 CEXE_sources += WarpXComm.cpp
 CEXE_sources += WarpXRegrid.cpp
+CEXE_sources += GuardCellManager.cpp
 CEXE_headers += WarpXSumGuardCells.H
 CEXE_headers += WarpXComm_K.H
+CEXE_headers += GuardCellManager.H
 CEXE_headers += WarpXComm.H
 
 INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Parallelization
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index b61ae4fc7..2e0cbdfad 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -321,41 +321,41 @@ WarpX::UpdateAuxilaryDataSameType ()
 }
 
 void
-WarpX::FillBoundaryB ()
+WarpX::FillBoundaryB (IntVect ng, IntVect ng_extra_fine)
 {
     for (int lev = 0; lev <= finest_level; ++lev)
     {
-        FillBoundaryB(lev);
+        FillBoundaryB(lev, ng, ng_extra_fine);
     }
 }
 
 void
-WarpX::FillBoundaryE ()
+WarpX::FillBoundaryE (IntVect ng, IntVect ng_extra_fine)
 {
     for (int lev = 0; lev <= finest_level; ++lev)
     {
-        FillBoundaryE(lev);
+        FillBoundaryE(lev, ng, ng_extra_fine);
     }
 }
 
 void
-WarpX::FillBoundaryF ()
+WarpX::FillBoundaryF (IntVect ng)
 {
     for (int lev = 0; lev <= finest_level; ++lev)
     {
-        FillBoundaryF(lev);
+        FillBoundaryF(lev, ng);
     }
 }
 
 void
-WarpX::FillBoundaryE(int lev)
+WarpX::FillBoundaryE(int lev, IntVect ng, IntVect ng_extra_fine)
 {
-    FillBoundaryE(lev, PatchType::fine);
-    if (lev > 0) FillBoundaryE(lev, PatchType::coarse);
+    FillBoundaryE(lev, PatchType::fine, ng+ng_extra_fine);
+    if (lev > 0) FillBoundaryE(lev, PatchType::coarse, ng);
 }
 
 void
-WarpX::FillBoundaryE (int lev, PatchType patch_type)
+WarpX::FillBoundaryE (int lev, PatchType patch_type, IntVect ng)
 {
     if (patch_type == PatchType::fine)
     {
@@ -370,8 +370,12 @@ WarpX::FillBoundaryE (int lev, PatchType patch_type)
         }
 
         const auto& period = Geom(lev).periodicity();
-        Vector<MultiFab*> mf{Efield_fp[lev][0].get(),Efield_fp[lev][1].get(),Efield_fp[lev][2].get()};
-        amrex::FillBoundary(mf, period);
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
+            ng <= Efield_fp[lev][0]->nGrowVect(),
+            "Error: in FillBoundaryE, requested more guard cells than allocated");
+        Efield_fp[lev][0]->FillBoundary(ng, period);
+        Efield_fp[lev][1]->FillBoundary(ng, period);
+        Efield_fp[lev][2]->FillBoundary(ng, period);
     }
     else if (patch_type == PatchType::coarse)
     {
@@ -386,20 +390,24 @@ WarpX::FillBoundaryE (int lev, PatchType patch_type)
         }
 
         const auto& cperiod = Geom(lev-1).periodicity();
-        Vector<MultiFab*> mf{Efield_cp[lev][0].get(),Efield_cp[lev][1].get(),Efield_cp[lev][2].get()};
-        amrex::FillBoundary(mf, cperiod);
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
+            ng <= Efield_cp[lev][0]->nGrowVect(),
+            "Error: in FillBoundaryE, requested more guard cells than allocated");
+        Efield_cp[lev][0]->FillBoundary(ng, cperiod);
+        Efield_cp[lev][1]->FillBoundary(ng, cperiod);
+        Efield_cp[lev][2]->FillBoundary(ng, cperiod);
     }
 }
 
 void
-WarpX::FillBoundaryB (int lev)
+WarpX::FillBoundaryB (int lev, IntVect ng, IntVect ng_extra_fine)
 {
-    FillBoundaryB(lev, PatchType::fine);
-    if (lev > 0) FillBoundaryB(lev, PatchType::coarse);
+    FillBoundaryB(lev, PatchType::fine, ng + ng_extra_fine);
+    if (lev > 0) FillBoundaryB(lev, PatchType::coarse, ng);
 }
 
 void
-WarpX::FillBoundaryB (int lev, PatchType patch_type)
+WarpX::FillBoundaryB (int lev, PatchType patch_type, IntVect ng)
 {
     if (patch_type == PatchType::fine)
     {
@@ -413,8 +421,12 @@ WarpX::FillBoundaryB (int lev, PatchType patch_type)
         pml[lev]->FillBoundaryB(patch_type);
         }
         const auto& period = Geom(lev).periodicity();
-        Vector<MultiFab*> mf{Bfield_fp[lev][0].get(),Bfield_fp[lev][1].get(),Bfield_fp[lev][2].get()};
-        amrex::FillBoundary(mf, period);
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
+            ng <= Bfield_fp[lev][0]->nGrowVect(),
+            "Error: in FillBoundaryB, requested more guard cells than allocated");
+        Bfield_fp[lev][0]->FillBoundary(ng, period);
+        Bfield_fp[lev][1]->FillBoundary(ng, period);
+        Bfield_fp[lev][2]->FillBoundary(ng, period);
     }
     else if (patch_type == PatchType::coarse)
     {
@@ -428,20 +440,24 @@ WarpX::FillBoundaryB (int lev, PatchType patch_type)
         pml[lev]->FillBoundaryB(patch_type);
         }
         const auto& cperiod = Geom(lev-1).periodicity();
-        Vector<MultiFab*> mf{Bfield_cp[lev][0].get(),Bfield_cp[lev][1].get(),Bfield_cp[lev][2].get()};
-        amrex::FillBoundary(mf, cperiod);
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
+            ng <= Bfield_cp[lev][0]->nGrowVect(),
+            "Error: in FillBoundaryB, requested more guard cells than allocated");
+        Bfield_cp[lev][0]->FillBoundary(ng, cperiod);
+        Bfield_cp[lev][1]->FillBoundary(ng, cperiod);
+        Bfield_cp[lev][2]->FillBoundary(ng, cperiod);
     }
 }
 
 void
-WarpX::FillBoundaryF (int lev)
+WarpX::FillBoundaryF (int lev, IntVect ng)
 {
-  FillBoundaryF(lev, PatchType::fine);
-  if (lev > 0) FillBoundaryF(lev, PatchType::coarse);
+    FillBoundaryF(lev, PatchType::fine, ng);
+    if (lev > 0) FillBoundaryF(lev, PatchType::coarse, ng);
 }
 
 void
-WarpX::FillBoundaryF (int lev, PatchType patch_type)
+WarpX::FillBoundaryF (int lev, PatchType patch_type, IntVect ng)
 {
     if (patch_type == PatchType::fine && F_fp[lev])
     {
@@ -453,7 +469,10 @@ WarpX::FillBoundaryF (int lev, PatchType patch_type)
         }
 
         const auto& period = Geom(lev).periodicity();
-        F_fp[lev]->FillBoundary(period);
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
+            ng <= F_fp[lev]->nGrowVect(),
+            "Error: in FillBoundaryF, requested more guard cells than allocated");
+        F_fp[lev]->FillBoundary(ng, period);
     }
     else if (patch_type == PatchType::coarse && F_cp[lev])
     {
@@ -465,11 +484,35 @@ WarpX::FillBoundaryF (int lev, PatchType patch_type)
         }
 
         const auto& cperiod = Geom(lev-1).periodicity();
-        F_cp[lev]->FillBoundary(cperiod);
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
+            ng <= F_cp[lev]->nGrowVect(),
+            "Error: in FillBoundaryF, requested more guard cells than allocated");
+        F_cp[lev]->FillBoundary(ng, cperiod);
     }
 }
 
 void
+WarpX::FillBoundaryAux (IntVect ng)
+{
+    for (int lev = 0; lev <= finest_level-1; ++lev)
+    {
+        FillBoundaryAux(lev, ng);
+    }
+}
+
+void
+WarpX::FillBoundaryAux (int lev, IntVect ng)
+{
+    const auto& period = Geom(lev).periodicity();
+    Efield_aux[lev][0]->FillBoundary(ng, period);
+    Efield_aux[lev][1]->FillBoundary(ng, period);
+    Efield_aux[lev][2]->FillBoundary(ng, period);
+    Bfield_aux[lev][0]->FillBoundary(ng, period);
+    Bfield_aux[lev][1]->FillBoundary(ng, period);
+    Bfield_aux[lev][2]->FillBoundary(ng, period);
+}
+
+void
 WarpX::SyncCurrent ()
 {
     BL_PROFILE("SyncCurrent()");
diff --git a/Source/Parser/GpuParser.H b/Source/Parser/GpuParser.H
index c158ee314..ff855d275 100644
--- a/Source/Parser/GpuParser.H
+++ b/Source/Parser/GpuParser.H
@@ -17,7 +17,7 @@ public:
 
     AMREX_GPU_HOST_DEVICE
     amrex::Real
-    operator() (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+    operator() (amrex::Real x, amrex::Real y, amrex::Real z, amrex::Real t=0.0) const noexcept
     {
 #ifdef AMREX_USE_GPU
 
@@ -27,15 +27,17 @@ public:
         amrex::Gpu::SharedMemory<amrex::Real> gsm;
         amrex::Real* p = gsm.dataPtr();
         int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y);
-        p[tid*3] = x;
-        p[tid*3+1] = y;
-        p[tid*3+2] = z;
+        p[tid*4] = x;
+        p[tid*4+1] = y;
+        p[tid*4+2] = z;
+        p[tid*4+3] = t;
         return wp_ast_eval(m_gpu_parser.ast);
 #else
 // WarpX compiled for GPU, function compiled for __host__
         m_var.x = x;
         m_var.y = y;
         m_var.z = z;
+        m_t = t;
         return wp_ast_eval(m_cpu_parser.ast);
 #endif
 
@@ -49,10 +51,12 @@ public:
         m_var[tid].x = x;
         m_var[tid].y = y;
         m_var[tid].z = z;
+        m_t[tid] = t;
         return wp_ast_eval(m_parser[tid]->ast);
 #endif
     }
 
+
 private:
 
 #ifdef AMREX_USE_GPU
@@ -61,10 +65,12 @@ private:
     // Copy of the parser running on __host__
     struct wp_parser m_cpu_parser;
     mutable amrex::XDim3 m_var;
+    mutable amrex::Real m_t;
 #else
     // Only one parser
     struct wp_parser** m_parser;
     mutable amrex::XDim3* m_var;
+    mutable amrex::Real* m_t;
     int nthreads;
 #endif
 };
diff --git a/Source/Parser/GpuParser.cpp b/Source/Parser/GpuParser.cpp
index 5078b498b..ba904666b 100644
--- a/Source/Parser/GpuParser.cpp
+++ b/Source/Parser/GpuParser.cpp
@@ -16,6 +16,7 @@ GpuParser::GpuParser (WarpXParser const& wp)
     wp_parser_regvar_gpu(&m_gpu_parser, "x", 0);
     wp_parser_regvar_gpu(&m_gpu_parser, "y", 1);
     wp_parser_regvar_gpu(&m_gpu_parser, "z", 2);
+    wp_parser_regvar_gpu(&m_gpu_parser, "t", 3);
 
     // Initialize CPU parser: allocate memory in CUDA managed memory,
     // copy all data needed on CPU to m_cpu_parser
@@ -28,6 +29,7 @@ GpuParser::GpuParser (WarpXParser const& wp)
     wp_parser_regvar(&m_cpu_parser, "x", &(m_var.x));
     wp_parser_regvar(&m_cpu_parser, "y", &(m_var.y));
     wp_parser_regvar(&m_cpu_parser, "z", &(m_var.z));
+    wp_parser_regvar(&m_cpu_parser, "t", &(m_t));
 
 #else // not defined AMREX_USE_GPU
 
@@ -39,6 +41,7 @@ GpuParser::GpuParser (WarpXParser const& wp)
 
     m_parser = ::new struct wp_parser*[nthreads];
     m_var = ::new amrex::XDim3[nthreads];
+    m_t = ::new amrex::Real[nthreads];
 
     for (int tid = 0; tid < nthreads; ++tid)
     {
@@ -50,6 +53,7 @@ GpuParser::GpuParser (WarpXParser const& wp)
         wp_parser_regvar(m_parser[tid], "x", &(m_var[tid].x));
         wp_parser_regvar(m_parser[tid], "y", &(m_var[tid].y));
         wp_parser_regvar(m_parser[tid], "z", &(m_var[tid].z));
+        wp_parser_regvar(m_parser[tid], "t", &(m_t[tid]));
     }
 
 #endif // AMREX_USE_GPU
diff --git a/Source/Parser/WarpXParserWrapper.H b/Source/Parser/WarpXParserWrapper.H
index 2dd7f72c7..2a4ff6fd2 100644
--- a/Source/Parser/WarpXParserWrapper.H
+++ b/Source/Parser/WarpXParserWrapper.H
@@ -24,7 +24,7 @@ struct ParserWrapper
 
     AMREX_GPU_HOST_DEVICE
     amrex::Real
-    getField (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
+    getField (amrex::Real x, amrex::Real y, amrex::Real z, amrex::Real t=0.0) const noexcept
     {
        return m_parser(x,y,z);
     }
diff --git a/Source/Parser/wp_parser_c.h b/Source/Parser/wp_parser_c.h
index 970d6b355..2cf0e2c00 100644
--- a/Source/Parser/wp_parser_c.h
+++ b/Source/Parser/wp_parser_c.h
@@ -30,7 +30,7 @@ wp_ast_eval (struct wp_node* node)
 #ifdef AMREX_DEVICE_COMPILE
     extern __shared__ amrex_real extern_xyz[];
     int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y);
-    amrex_real* x = extern_xyz + tid*3;
+    amrex_real* x = extern_xyz + tid*4; // parser assumes 4 independent variables (x,y,z,t)
 #endif
 
     switch (node->type)
diff --git a/Source/Particles/MultiParticleContainer.H b/Source/Particles/MultiParticleContainer.H
index 9db129b05..ed1c2f371 100644
--- a/Source/Particles/MultiParticleContainer.H
+++ b/Source/Particles/MultiParticleContainer.H
@@ -8,6 +8,7 @@
 #include <RigidInjectedParticleContainer.H>
 #include <PhotonParticleContainer.H>
 #include <LaserParticleContainer.H>
+#include <WarpXParserWrapper.H>
 
 #include <AMReX_Particles.H>
 #ifdef WARPX_QED
@@ -215,6 +216,21 @@ public:
 
     IonizationProcess ionization_process;
 
+    std::string m_B_ext_particle_s = "default";
+    std::string m_E_ext_particle_s = "default";
+    // External fields added to particle fields.
+    amrex::Vector<amrex::Real> m_B_external_particle;
+    amrex::Vector<amrex::Real> m_E_external_particle;
+    // ParserWrapper for B_external on the particle
+    std::unique_ptr<ParserWrapper> m_Bx_particle_parser;
+    std::unique_ptr<ParserWrapper> m_By_particle_parser;
+    std::unique_ptr<ParserWrapper> m_Bz_particle_parser;
+    // ParserWrapper for E_external on the particle
+    std::unique_ptr<ParserWrapper> m_Ex_particle_parser;
+    std::unique_ptr<ParserWrapper> m_Ey_particle_parser;
+    std::unique_ptr<ParserWrapper> m_Ez_particle_parser;
+
+
 protected:
 
     // Particle container types
diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp
index d84bc1afa..ab836ce9d 100644
--- a/Source/Particles/MultiParticleContainer.cpp
+++ b/Source/Particles/MultiParticleContainer.cpp
@@ -72,6 +72,93 @@ MultiParticleContainer::ReadParameters ()
     {
         ParmParse pp("particles");
 
+        // allocating and initializing default values of external fields for particles
+        m_E_external_particle.resize(3);
+        m_B_external_particle.resize(3);
+        // initialize E and B fields to 0.0
+        for (int idim = 0; idim < 3; ++idim) {
+            m_E_external_particle[idim] = 0.0;
+            m_B_external_particle[idim] = 0.0;
+        }
+        // default values of E_external_particle and B_external_particle
+        // are used to set the E and B field when "constant" or "parser"
+        // is not explicitly used in the input
+        pp.query("B_ext_particle_init_style", m_B_ext_particle_s);
+        std::transform(m_B_ext_particle_s.begin(),
+                       m_B_ext_particle_s.end(),
+                       m_B_ext_particle_s.begin(),
+                       ::tolower);
+        pp.query("E_ext_particle_init_style", m_E_ext_particle_s);
+        std::transform(m_E_ext_particle_s.begin(),
+                       m_E_ext_particle_s.end(),
+                       m_E_ext_particle_s.begin(),
+                       ::tolower);
+        // if the input string for B_external on particles is "constant"
+        // then the values for the external B on particles must
+        // be provided in the input file.
+        if (m_B_ext_particle_s == "constant")
+            pp.getarr("B_external_particle", m_B_external_particle);
+
+        // if the input string for E_external on particles is "constant"
+        // then the values for the external E on particles must
+        // be provided in the input file.
+        if (m_E_ext_particle_s == "constant")
+            pp.getarr("E_external_particle", m_E_external_particle);
+
+        // if the input string for B_ext_particle_s is
+        // "parse_b_ext_particle_function" then the mathematical expression
+        // for the Bx_, By_, Bz_external_particle_function(x,y,z)
+        // must be provided in the input file.
+        if (m_B_ext_particle_s == "parse_b_ext_particle_function") {
+           // store the mathematical expression as string
+           std::string str_Bx_ext_particle_function;
+           std::string str_By_ext_particle_function;
+           std::string str_Bz_ext_particle_function;
+           Store_parserString(pp, "Bx_external_particle_function(x,y,z,t)",
+                                      str_Bx_ext_particle_function);
+           Store_parserString(pp, "By_external_particle_function(x,y,z,t)",
+                                      str_By_ext_particle_function);
+           Store_parserString(pp, "Bz_external_particle_function(x,y,z,t)",
+                                      str_Bz_ext_particle_function);
+
+           // Parser for B_external on the particle
+           m_Bx_particle_parser.reset(new ParserWrapper(
+                                    makeParser(str_Bx_ext_particle_function)));
+           m_By_particle_parser.reset(new ParserWrapper(
+                                    makeParser(str_By_ext_particle_function)));
+           m_Bz_particle_parser.reset(new ParserWrapper(
+                                    makeParser(str_Bz_ext_particle_function)));
+
+        }
+
+        // if the input string for E_ext_particle_s is
+        // "parse_e_ext_particle_function" then the mathematical expression
+        // for the Ex_, Ey_, Ez_external_particle_function(x,y,z)
+        // must be provided in the input file.
+        if (m_E_ext_particle_s == "parse_e_ext_particle_function") {
+           // store the mathematical expression as string
+           std::string str_Ex_ext_particle_function;
+           std::string str_Ey_ext_particle_function;
+           std::string str_Ez_ext_particle_function;
+           Store_parserString(pp, "Ex_external_particle_function(x,y,z,t)",
+                                      str_Ex_ext_particle_function);
+           Store_parserString(pp, "Ey_external_particle_function(x,y,z,t)",
+                                      str_Ey_ext_particle_function);
+           Store_parserString(pp, "Ez_external_particle_function(x,y,z,t)",
+                                      str_Ez_ext_particle_function);
+           // Parser for E_external on the particle
+           m_Ex_particle_parser.reset(new ParserWrapper(
+                                    makeParser(str_Ex_ext_particle_function)));
+           m_Ey_particle_parser.reset(new ParserWrapper(
+                                    makeParser(str_Ey_ext_particle_function)));
+           m_Ez_particle_parser.reset(new ParserWrapper(
+                                    makeParser(str_Ez_ext_particle_function)));
+
+        }
+
+
+
+
         pp.query("nspecies", nspecies);
         BL_ASSERT(nspecies >= 0);
 
diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H
index 0192ffb37..31d3cbbf3 100644
--- a/Source/Particles/PhysicalParticleContainer.H
+++ b/Source/Particles/PhysicalParticleContainer.H
@@ -47,6 +47,26 @@ public:
                            amrex::Real t, amrex::Real dt) override;
 #endif // WARPX_DO_ELECTROSTATIC
 
+    /**
+     * \brief Apply external E and B fields on the particles. The E and B
+     * fields could be defined as a constant or using a parser for reading
+     * in a mathematical expression. The default value for the E- and B-fields
+     * is (0.0,0.0,0.0).
+     *
+     * \param[in,out] Exp-Bzp pointer to fields on particles modified based
+     *                  on external E and B
+     * \param[in] xp,yp,zp arrays of particle positions required to compute
+     *                  mathematical expression for the external fields
+     *                  using parser.
+     */
+    void AssignExternalFieldOnParticles ( WarpXParIter& pti,
+                                          RealVector& Exp, RealVector& Eyp,
+                                          RealVector& Ezp, RealVector& Bxp,
+                                          RealVector& Byp, RealVector& Bzp,
+                                          const amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& xp,
+                                          const amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& yp,
+                                          const amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>& zp, int lev);
+
     virtual void FieldGather (int lev,
                               const amrex::MultiFab& Ex,
                               const amrex::MultiFab& Ey,
@@ -318,6 +338,7 @@ protected:
     //radiation reaction
     bool do_classical_radiation_reaction = false;
 
+
 #ifdef WARPX_QED
     // A flag to enable quantum_synchrotron process for leptons
     bool m_do_qed_quantum_sync = false;
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index f22f94e9b..d437a5b61 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -40,6 +40,7 @@ PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int isp
     pp.query("do_splitting", do_splitting);
     pp.query("split_type", split_type);
     pp.query("do_not_deposit", do_not_deposit);
+    pp.query("do_not_push", do_not_push);
 
     pp.query("do_continuous_injection", do_continuous_injection);
     pp.query("initialize_self_fields", initialize_self_fields);
@@ -493,7 +494,9 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
 
         // Update NextID to include particles created in this function
         int pid;
+#ifdef _OPENMP
 #pragma omp critical (add_plasma_nextid)
+#endif
         {
             pid = ParticleType::NextID();
             ParticleType::NextID(pid+max_new_particles);
@@ -964,6 +967,79 @@ PhysicalParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<Mul
 #endif // WARPX_DO_ELECTROSTATIC
 
 void
+PhysicalParticleContainer::AssignExternalFieldOnParticles(WarpXParIter& pti,
+                           RealVector& Exp, RealVector& Eyp, RealVector& Ezp,
+                           RealVector& Bxp, RealVector& Byp, RealVector& Bzp,
+                           const Gpu::ManagedDeviceVector<ParticleReal>& xp,
+                           const Gpu::ManagedDeviceVector<ParticleReal>& yp,
+                           const Gpu::ManagedDeviceVector<ParticleReal>& zp, int lev)
+{
+   const long np = pti.numParticles();
+    /// get WarpX class object
+    auto & warpx = WarpX::GetInstance();
+    /// get MultiParticleContainer class object
+    auto & mypc = warpx.GetPartContainer();
+   if (mypc.m_E_ext_particle_s=="constant" ||
+       mypc.m_E_ext_particle_s=="default") {
+       Exp.assign(np,mypc.m_E_external_particle[0]);
+       Eyp.assign(np,mypc.m_E_external_particle[1]);
+       Ezp.assign(np,mypc.m_E_external_particle[2]);
+   }
+   if (mypc.m_B_ext_particle_s=="constant" ||
+       mypc.m_B_ext_particle_s=="default") {
+       Bxp.assign(np,mypc.m_B_external_particle[0]);
+       Byp.assign(np,mypc.m_B_external_particle[1]);
+       Bzp.assign(np,mypc.m_B_external_particle[2]);
+   }
+   if (mypc.m_E_ext_particle_s=="parse_e_ext_particle_function") {
+      const Real* const AMREX_RESTRICT xp_data = xp.dataPtr();
+      const Real* const AMREX_RESTRICT yp_data = yp.dataPtr();
+      const Real* const AMREX_RESTRICT zp_data = zp.dataPtr();
+      Real* const AMREX_RESTRICT Exp_data = Exp.dataPtr();
+      Real* const AMREX_RESTRICT Eyp_data = Eyp.dataPtr();
+      Real* const AMREX_RESTRICT Ezp_data = Ezp.dataPtr();
+      ParserWrapper *xfield_partparser = mypc.m_Ex_particle_parser.get();
+      ParserWrapper *yfield_partparser = mypc.m_Ey_particle_parser.get();
+      ParserWrapper *zfield_partparser = mypc.m_Ez_particle_parser.get();
+      Real time = warpx.gett_new(lev);
+      amrex::ParallelFor(pti.numParticles(),
+            [=] AMREX_GPU_DEVICE (long i) {
+            Exp_data[i] = xfield_partparser->getField(xp_data[i],yp_data[i],zp_data[i],time);
+            Eyp_data[i] = yfield_partparser->getField(xp_data[i],yp_data[i],zp_data[i],time);
+            Ezp_data[i] = zfield_partparser->getField(xp_data[i],yp_data[i],zp_data[i],time);
+      },
+      /* To allocate shared memory for the GPU threads. */
+      /* But, for now only 4 doubles (x,y,z,t) are allocated. */
+      amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4
+      );
+   }
+   if (mypc.m_B_ext_particle_s=="parse_b_ext_particle_function") {
+      const Real* const AMREX_RESTRICT xp_data = xp.dataPtr();
+      const Real* const AMREX_RESTRICT yp_data = yp.dataPtr();
+      const Real* const AMREX_RESTRICT zp_data = zp.dataPtr();
+      Real* const AMREX_RESTRICT Bxp_data = Bxp.dataPtr();
+      Real* const AMREX_RESTRICT Byp_data = Byp.dataPtr();
+      Real* const AMREX_RESTRICT Bzp_data = Bzp.dataPtr();
+      ParserWrapper *xfield_partparser = mypc.m_Bx_particle_parser.get();
+      ParserWrapper *yfield_partparser = mypc.m_By_particle_parser.get();
+      ParserWrapper *zfield_partparser = mypc.m_Bz_particle_parser.get();
+      Real time = warpx.gett_new(lev);
+      amrex::ParallelFor(pti.numParticles(),
+            [=] AMREX_GPU_DEVICE (long i) {
+            Bxp_data[i] = xfield_partparser->getField(xp_data[i],yp_data[i],zp_data[i],time);
+            Byp_data[i] = yfield_partparser->getField(xp_data[i],yp_data[i],zp_data[i],time);
+            Bzp_data[i] = zfield_partparser->getField(xp_data[i],yp_data[i],zp_data[i],time);
+      },
+      /* To allocate shared memory for the GPU threads. */
+      /* But, for now only 4 doubles (x,y,z,t) are allocated. */
+      amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 4
+      );
+   }
+}
+
+
+
+void
 PhysicalParticleContainer::FieldGather (int lev,
                                         const amrex::MultiFab& Ex,
                                         const amrex::MultiFab& Ey,
@@ -1012,13 +1088,6 @@ PhysicalParticleContainer::FieldGather (int lev,
             const FArrayBox& byfab = By[pti];
             const FArrayBox& bzfab = Bz[pti];
 
-            Exp.assign(np,WarpX::E_external_particle[0]);
-            Eyp.assign(np,WarpX::E_external_particle[1]);
-            Ezp.assign(np,WarpX::E_external_particle[2]);
-            Bxp.assign(np,WarpX::B_external_particle[0]);
-            Byp.assign(np,WarpX::B_external_particle[1]);
-            Bzp.assign(np,WarpX::B_external_particle[2]);
-
             //
             // copy data from particle container to temp arrays
             //
@@ -1043,6 +1112,9 @@ PhysicalParticleContainer::FieldGather (int lev,
                                        costarr(i,j,k) += wt;
                                    });
             }
+            // synchronize avoids cudaStreams from over-writing the temporary arrays used to
+            // store positions
+            Gpu::synchronize();
         }
     }
 }
@@ -1149,14 +1221,6 @@ PhysicalParticleContainer::Evolve (int lev,
                                exfab, eyfab, ezfab, bxfab, byfab, bzfab);
             }
 
-            Exp.assign(np,WarpX::E_external_particle[0]);
-            Eyp.assign(np,WarpX::E_external_particle[1]);
-            Ezp.assign(np,WarpX::E_external_particle[2]);
-
-            Bxp.assign(np,WarpX::B_external_particle[0]);
-            Byp.assign(np,WarpX::B_external_particle[1]);
-            Bzp.assign(np,WarpX::B_external_particle[2]);
-
             // Determine which particles deposit/gather in the buffer, and
             // which particles deposit/gather in the fine patch
             long nfine_current = np;
@@ -1796,14 +1860,6 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
             const FArrayBox& byfab = By[pti];
             const FArrayBox& bzfab = Bz[pti];
 
-            Exp.assign(np,WarpX::E_external_particle[0]);
-            Eyp.assign(np,WarpX::E_external_particle[1]);
-            Ezp.assign(np,WarpX::E_external_particle[2]);
-
-            Bxp.assign(np,WarpX::B_external_particle[0]);
-            Byp.assign(np,WarpX::B_external_particle[1]);
-            Bzp.assign(np,WarpX::B_external_particle[2]);
-
             //
             // copy data from particle container to temp arrays
             //
@@ -1977,7 +2033,6 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real
 #endif
             for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
             {
-                int counter_for_ParticleCopy = 0;
                 const Box& box = pti.validbox();
                 auto index = std::make_pair(pti.index(), pti.LocalTileIndex());
                 const RealBox tile_real_box(box, dx, plo);
@@ -2173,9 +2228,16 @@ PhysicalParticleContainer::FieldGather (WarpXParIter& pti,
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE((gather_lev==(lev-1)) ||
                                      (gather_lev==(lev  )),
                                      "Gather buffers only work for lev-1");
-
     // If no particles, do not do anything
     if (np_to_gather == 0) return;
+
+    // initializing the field value to the externally applied field before
+    // gathering fields from the grid to the particles.
+    AssignExternalFieldOnParticles(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp,
+                                  m_xp[thread_num], m_yp[thread_num],
+                                  m_zp[thread_num], lev);
+
+
     // Get cell size on gather_lev
     const std::array<Real,3>& dx = WarpX::CellSize(std::max(gather_lev,0));
 
@@ -2421,4 +2483,5 @@ set_quantum_sync_engine_ptr(std::shared_ptr<QuantumSynchrotronEngine> ptr)
 {
     m_shr_p_qs_engine = ptr;
 }
+
 #endif
diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp
index bee71fba1..f3b502a0a 100644
--- a/Source/Particles/RigidInjectedParticleContainer.cpp
+++ b/Source/Particles/RigidInjectedParticleContainer.cpp
@@ -392,13 +392,6 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
             const FArrayBox& byfab = By[pti];
             const FArrayBox& bzfab = Bz[pti];
 
-            Exp.assign(np,WarpX::E_external_particle[0]);
-            Eyp.assign(np,WarpX::E_external_particle[1]);
-            Ezp.assign(np,WarpX::E_external_particle[2]);
-            Bxp.assign(np,WarpX::B_external_particle[0]);
-            Byp.assign(np,WarpX::B_external_particle[1]);
-            Bzp.assign(np,WarpX::B_external_particle[2]);
-
             //
             // copy data from particle container to temp arrays
             //
diff --git a/Source/Particles/Sorting/SortingUtils.H b/Source/Particles/Sorting/SortingUtils.H
index 35bc059aa..f425c6c7b 100644
--- a/Source/Particles/Sorting/SortingUtils.H
+++ b/Source/Particles/Sorting/SortingUtils.H
@@ -3,10 +3,7 @@
 
 #include <WarpXParticleContainer.H>
 #include <AMReX_Gpu.H>
-#ifdef AMREX_USE_GPU
-    #include <thrust/partition.h>
-    #include <thrust/distance.h>
-#endif
+#include <AMReX_Partition.H>
 
 /** \brief Fill the elements of the input vector with consecutive integer,
  *        starting from 0
@@ -16,8 +13,10 @@
 void fillWithConsecutiveIntegers( amrex::Gpu::DeviceVector<long>& v )
 {
 #ifdef AMREX_USE_GPU
-    // On GPU: Use thrust
-    thrust::sequence( v.begin(), v.end() );
+    // On GPU: Use amrex
+    auto data = v.data();
+    auto N = v.size();
+    AMREX_FOR_1D( N, i, data[i] = i;);
 #else
     // On CPU: Use std library
     std::iota( v.begin(), v.end(), 0L );
@@ -35,17 +34,18 @@ void fillWithConsecutiveIntegers( amrex::Gpu::DeviceVector<long>& v )
  */
 template< typename ForwardIterator >
 ForwardIterator stablePartition(ForwardIterator const index_begin,
-                               ForwardIterator const index_end,
-                               amrex::Gpu::DeviceVector<int> const& predicate)
+                                ForwardIterator const index_end,
+                                amrex::Gpu::DeviceVector<int> const& predicate)
 {
 #ifdef AMREX_USE_GPU
-    // On GPU: Use thrust
+    // On GPU: Use amrex
     int const* AMREX_RESTRICT predicate_ptr = predicate.dataPtr();
-    ForwardIterator const sep = thrust::stable_partition(
-        thrust::cuda::par(amrex::Gpu::The_ThrustCachedAllocator()),
-        index_begin, index_end,
-        [predicate_ptr] AMREX_GPU_DEVICE (long i) { return predicate_ptr[i]; }
-    );
+    int N = static_cast<int>(std::distance(index_begin, index_end));
+    auto num_true = amrex::StablePartition(&(*index_begin), N,
+        [predicate_ptr] AMREX_GPU_DEVICE (long i) { return predicate_ptr[i]; });
+
+    ForwardIterator sep = index_begin;
+    std::advance(sep, num_true);
 #else
     // On CPU: Use std library
     ForwardIterator const sep = std::stable_partition(
@@ -66,12 +66,7 @@ template< typename ForwardIterator >
 int iteratorDistance(ForwardIterator const first,
                      ForwardIterator const last)
 {
-#ifdef AMREX_USE_GPU
-    // On GPU: Use thrust
-    return thrust::distance( first, last );
-#else
     return std::distance( first, last );
-#endif
 }
 
 /** \brief Functor that fills the elements of the particle array `inexflag`
diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H
index fa5c586da..4fb4f007b 100644
--- a/Source/Particles/WarpXParticleContainer.H
+++ b/Source/Particles/WarpXParticleContainer.H
@@ -338,8 +338,8 @@ protected:
     //! instead of gathering fields from the finest patch level, gather from the coarsest
     bool m_gather_from_main_grid = false;
 
-    static int do_not_push;
-    static int do_not_deposit;
+    int do_not_push = 0;
+    int do_not_deposit = 0;
 
     // Whether to allow particles outside of the simulation domain to be
     // initialized when they enter the domain.
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 15a6cff9b..e6f729935 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -16,9 +16,6 @@
 
 using namespace amrex;
 
-int WarpXParticleContainer::do_not_push = 0;
-int WarpXParticleContainer::do_not_deposit = 0;
-
 WarpXParIter::WarpXParIter (ContainerType& pc, int level)
     : ParIter(pc, level, MFItInfo().SetDynamic(WarpX::do_dynamic_scheduling))
 {
@@ -113,6 +110,7 @@ WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies)
     m_xp.resize(num_threads);
     m_yp.resize(num_threads);
     m_zp.resize(num_threads);
+
 }
 
 void
@@ -129,7 +127,6 @@ WarpXParticleContainer::ReadParameters ()
         do_tiling = true;
 #endif
         pp.query("do_tiling",  do_tiling);
-        pp.query("do_not_push", do_not_push);
 
         initialized = true;
     }
diff --git a/Source/Python/WarpXWrappers.cpp b/Source/Python/WarpXWrappers.cpp
index 54dfe1955..e72d467d7 100644
--- a/Source/Python/WarpXWrappers.cpp
+++ b/Source/Python/WarpXWrappers.cpp
@@ -346,11 +346,11 @@ extern "C"
     }
     void warpx_FillBoundaryE () {
         WarpX& warpx = WarpX::GetInstance();
-        warpx.FillBoundaryE ();
+        warpx.FillBoundaryE (warpx.getngE());
     }
     void warpx_FillBoundaryB () {
         WarpX& warpx = WarpX::GetInstance();
-        warpx.FillBoundaryB ();
+        warpx.FillBoundaryB (warpx.getngE());
     }
     void warpx_SyncCurrent () {
         WarpX& warpx = WarpX::GetInstance();
diff --git a/Source/Utils/WarpXMovingWindow.cpp b/Source/Utils/WarpXMovingWindow.cpp
index e05a64bfe..3f607615b 100644
--- a/Source/Utils/WarpXMovingWindow.cpp
+++ b/Source/Utils/WarpXMovingWindow.cpp
@@ -1,3 +1,4 @@
+#include "GuardCellManager.H"
 #include <WarpX.H>
 #include <WarpXUtil.H>
 #include <WarpXConst.H>
@@ -29,6 +30,9 @@ WarpX::MoveWindow (bool move_j)
 {
     if (do_moving_window == 0) return 0;
 
+    IntVect ng_extra = guard_cells.ng_Extra;
+    IntVect ng_zero  = IntVect::TheZeroVector();
+
     // Update the continuous position of the moving window,
     // and of the plasma injection
     moving_window_x += moving_window_v * dt[0];
@@ -115,32 +119,32 @@ WarpX::MoveWindow (bool move_j)
                 if (dim == 1) Efield_parser = Eyfield_parser.get();
                 if (dim == 2) Efield_parser = Ezfield_parser.get();
             }
-            shiftMF(*Bfield_fp[lev][dim], geom[lev], num_shift, dir, B_external_grid[dim], use_Bparser, Bfield_parser);
-            shiftMF(*Efield_fp[lev][dim], geom[lev], num_shift, dir, E_external_grid[dim], use_Eparser, Efield_parser);
+            shiftMF(*Bfield_fp[lev][dim], geom[lev], num_shift, dir, ng_extra, B_external_grid[dim], use_Bparser, Bfield_parser);
+            shiftMF(*Efield_fp[lev][dim], geom[lev], num_shift, dir, ng_extra, E_external_grid[dim], use_Eparser, Efield_parser);
             if (move_j) {
-                shiftMF(*current_fp[lev][dim], geom[lev], num_shift, dir);
+                shiftMF(*current_fp[lev][dim], geom[lev], num_shift, dir, ng_zero);
             }
             if (do_pml && pml[lev]->ok()) {
                 const std::array<MultiFab*, 3>& pml_B = pml[lev]->GetB_fp();
                 const std::array<MultiFab*, 3>& pml_E = pml[lev]->GetE_fp();
-                shiftMF(*pml_B[dim], geom[lev], num_shift, dir);
-                shiftMF(*pml_E[dim], geom[lev], num_shift, dir);
+                shiftMF(*pml_B[dim], geom[lev], num_shift, dir, ng_extra);
+                shiftMF(*pml_E[dim], geom[lev], num_shift, dir, ng_extra);
             }
 
             if (lev > 0) {
                 // coarse grid
-                shiftMF(*Bfield_cp[lev][dim], geom[lev-1], num_shift_crse, dir, B_external_grid[dim], use_Bparser, Bfield_parser);
-                shiftMF(*Efield_cp[lev][dim], geom[lev-1], num_shift_crse, dir, E_external_grid[dim], use_Eparser, Efield_parser);
-                shiftMF(*Bfield_aux[lev][dim], geom[lev], num_shift, dir);
-                shiftMF(*Efield_aux[lev][dim], geom[lev], num_shift, dir);
+                shiftMF(*Bfield_cp[lev][dim], geom[lev-1], num_shift_crse, dir, ng_zero, B_external_grid[dim], use_Bparser, Bfield_parser);
+                shiftMF(*Efield_cp[lev][dim], geom[lev-1], num_shift_crse, dir, ng_zero, E_external_grid[dim], use_Eparser, Efield_parser);
+                shiftMF(*Bfield_aux[lev][dim], geom[lev], num_shift, dir, ng_zero);
+                shiftMF(*Efield_aux[lev][dim], geom[lev], num_shift, dir, ng_zero);
                 if (move_j) {
-                    shiftMF(*current_cp[lev][dim], geom[lev-1], num_shift_crse, dir);
+                    shiftMF(*current_cp[lev][dim], geom[lev-1], num_shift_crse, dir, ng_zero);
                 }
                 if (do_pml && pml[lev]->ok()) {
                     const std::array<MultiFab*, 3>& pml_B = pml[lev]->GetB_cp();
                     const std::array<MultiFab*, 3>& pml_E = pml[lev]->GetE_cp();
-                    shiftMF(*pml_B[dim], geom[lev-1], num_shift_crse, dir);
-                    shiftMF(*pml_E[dim], geom[lev-1], num_shift_crse, dir);
+                    shiftMF(*pml_B[dim], geom[lev-1], num_shift_crse, dir, ng_extra);
+                    shiftMF(*pml_E[dim], geom[lev-1], num_shift_crse, dir, ng_extra);
                 }
             }
         }
@@ -148,19 +152,19 @@ WarpX::MoveWindow (bool move_j)
         // Shift scalar component F for dive cleaning
         if (do_dive_cleaning) {
             // Fine grid
-            shiftMF(*F_fp[lev], geom[lev], num_shift, dir);
+            shiftMF(*F_fp[lev], geom[lev], num_shift, dir, ng_zero);
             if (do_pml && pml[lev]->ok()) {
                 MultiFab* pml_F = pml[lev]->GetF_fp();
-                shiftMF(*pml_F, geom[lev], num_shift, dir);
+                shiftMF(*pml_F, geom[lev], num_shift, dir, ng_extra);
             }
             if (lev > 0) {
                 // Coarse grid
-                shiftMF(*F_cp[lev], geom[lev-1], num_shift_crse, dir);
+                shiftMF(*F_cp[lev], geom[lev-1], num_shift_crse, dir, ng_zero);
                 if (do_pml && pml[lev]->ok()) {
                     MultiFab* pml_F = pml[lev]->GetF_cp();
-                    shiftMF(*pml_F, geom[lev-1], num_shift_crse, dir);
+                    shiftMF(*pml_F, geom[lev-1], num_shift_crse, dir, ng_zero);
                 }
-                shiftMF(*rho_cp[lev], geom[lev-1], num_shift_crse, dir);
+                shiftMF(*rho_cp[lev], geom[lev-1], num_shift_crse, dir, ng_zero);
             }
         }
 
@@ -168,10 +172,10 @@ WarpX::MoveWindow (bool move_j)
         if (move_j) {
             if (rho_fp[lev]){
                 // Fine grid
-                shiftMF(*rho_fp[lev],   geom[lev], num_shift, dir);
+                shiftMF(*rho_fp[lev],   geom[lev], num_shift, dir, ng_zero);
                 if (lev > 0){
                     // Coarse grid
-                    shiftMF(*rho_cp[lev], geom[lev-1], num_shift_crse, dir);
+                    shiftMF(*rho_cp[lev], geom[lev-1], num_shift_crse, dir, ng_zero);
                 }
             }
         }
@@ -220,7 +224,8 @@ WarpX::MoveWindow (bool move_j)
 
 void
 WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir,
-                amrex::Real external_field, bool useparser, ParserWrapper *field_parser)
+                IntVect ng_extra, amrex::Real external_field, bool useparser,
+                ParserWrapper *field_parser)
 {
     BL_PROFILE("WarpX::shiftMF()");
     const BoxArray& ba = mf.boxArray();
@@ -232,7 +237,16 @@ WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir,
 
     MultiFab tmpmf(ba, dm, nc, ng);
     MultiFab::Copy(tmpmf, mf, 0, 0, nc, ng);
-    tmpmf.FillBoundary(geom.periodicity());
+
+    IntVect ng_mw = IntVect::TheUnitVector();
+    // Enough guard cells in the MW direction
+    ng_mw[dir] = num_shift;
+    // Add the extra cell (if momentum-conserving gather with staggered field solve)
+    ng_mw += ng_extra;
+    // Make sure we don't exceed number of guard cells allocated
+    ng_mw = ng_mw.min(ng);
+    // Fill guard cells.
+    tmpmf.FillBoundary(ng_mw, geom.periodicity());
 
     // Make a box that covers the region that the window moved into
     const IndexType& typ = ba.ixType();
@@ -309,7 +323,7 @@ WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir,
 #endif
                       srcfab(i,j,k,n) = field_parser->getField(x,y,z);
                 }
-                , amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double)*3
+                , amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double)*4
                 );
             }
 
diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp
index e9fb958fd..a154e93df 100644
--- a/Source/Utils/WarpXUtil.cpp
+++ b/Source/Utils/WarpXUtil.cpp
@@ -185,12 +185,13 @@ void Store_parserString(amrex::ParmParse& pp, std::string query_string,
 WarpXParser makeParser (std::string const& parse_function)
 {
     WarpXParser parser(parse_function);
-    parser.registerVariables({"x","y","z"});
+    parser.registerVariables({"x","y","z","t"});
     ParmParse pp("my_constants");
     std::set<std::string> symbols = parser.symbols();
     symbols.erase("x");
     symbols.erase("y");
     symbols.erase("z");
+    symbols.erase("t");
     for (auto it = symbols.begin(); it != symbols.end(); ) {
         Real v;
         if (pp.query(it->c_str(), v)) {
diff --git a/Source/Utils/WarpX_Complex.H b/Source/Utils/WarpX_Complex.H
index 25ed1a53a..1f265d3c5 100644
--- a/Source/Utils/WarpX_Complex.H
+++ b/Source/Utils/WarpX_Complex.H
@@ -3,13 +3,14 @@
 
 #include <AMReX_REAL.H>
 #include <AMReX_Gpu.H>
+#include <AMReX_GpuComplex.H>
+
+#include <complex>
 
 // Define complex type on GPU/CPU
 #ifdef AMREX_USE_GPU
 
-#include <thrust/complex.h>
-
-using Complex = thrust::complex<amrex::Real>;
+using Complex = amrex::GpuComplex<amrex::Real>;
 
 #ifdef WARPX_USE_PSATD
 #include <cufft.h>
@@ -19,7 +20,6 @@ static_assert( sizeof(Complex) == sizeof(cuDoubleComplex),
 
 #else
 
-#include <complex>
 using Complex = std::complex<amrex::Real>;
 
 #ifdef WARPX_USE_PSATD
@@ -39,7 +39,7 @@ namespace MathFunc
     template<typename T>
     AMREX_GPU_HOST_DEVICE T exp (const T& val){
 #ifdef AMREX_USE_GPU
-        return thrust::exp(val);
+        return amrex::exp(val);
 #else
         return std::exp(val);
 #endif
@@ -49,7 +49,7 @@ namespace MathFunc
     template<typename T>
     AMREX_GPU_HOST_DEVICE T sqrt (const T& val){
 #ifdef AMREX_USE_GPU
-        return thrust::sqrt(val);
+        return amrex::sqrt(val);
 #else
         return std::sqrt(val);
 #endif
@@ -59,7 +59,7 @@ namespace MathFunc
     template<typename T1, typename T2>
     AMREX_GPU_HOST_DEVICE T1 pow (const T1& val, const T2& power){
 #ifdef AMREX_USE_GPU
-        return thrust::pow(val, power);
+        return amrex::pow(val, power);
 #else
         return std::pow(val, power);
 #endif
diff --git a/Source/Utils/write_atomic_data_cpp.py b/Source/Utils/write_atomic_data_cpp.py
index 35baf2cdf..09b7b8300 100644
--- a/Source/Utils/write_atomic_data_cpp.py
+++ b/Source/Utils/write_atomic_data_cpp.py
@@ -6,7 +6,6 @@ IonizationEnergiesTable.H, which contains tables + metadata.
 
 import re, os
 import numpy as np
-from scipy.constants import e
 
 filename = os.path.join( '.', 'atomic_data.txt' )
 with open(filename) as f:
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 0b4aac0ce..9f34fe988 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -27,6 +27,8 @@
 #include <AMReX_FillPatchUtil.H>
 #include <WarpXParserWrapper.H>
 
+#include "GuardCellManager.H"
+
 #ifdef _OPENMP
 #   include <omp.h>
 #endif
@@ -64,8 +66,8 @@ public:
     WarpX ();
     ~WarpX ();
 
-    static std::string Version ();
-    static std::string PicsarVersion ();
+    static std::string Version (); //! Version of WarpX executable
+    static std::string PicsarVersion (); //! Version of PICSAR dependency
 
     int Verbose () const { return verbose; }
 
@@ -76,21 +78,20 @@ public:
     MultiParticleContainer& GetPartContainer () { return *mypc; }
 
     static void shiftMF (amrex::MultiFab& mf, const amrex::Geometry& geom,
-                         int num_shift, int dir, amrex::Real external_field=0.0,
-                         bool useparser = false,
+                         int num_shift, int dir, amrex::IntVect ng_extra,
+                         amrex::Real external_field=0.0, bool useparser = false,
                          ParserWrapper *field_parser=nullptr);
 
     static void GotoNextLine (std::istream& is);
 
-    // External fields added to particle fields.
-    static amrex::Vector<amrex::Real> B_external_particle;
-    static amrex::Vector<amrex::Real> E_external_particle;
+    //! Author of an input file / simulation setup
+    static std::string authors;
 
     // Initial field on the grid.
     static amrex::Vector<amrex::Real> E_external_grid;
     static amrex::Vector<amrex::Real> B_external_grid;
 
-    // Initialization Type for External E and B
+    // Initialization Type for External E and B on grid
     static std::string B_ext_grid_s;
     static std::string E_ext_grid_s;
 
@@ -119,6 +120,9 @@ public:
     static long particle_pusher_algo;
     static int maxwell_fdtd_solver_id;
 
+    // div E cleaning
+    static int do_dive_cleaning;
+
     // Interpolation order
     static long nox;
     static long noy;
@@ -156,6 +160,8 @@ public:
 
     static int do_subcycling;
 
+    static bool exchange_all_guard_cells;
+
     // buffers
     static int n_field_gather_buffer;       //! in number of cells from the edge (identical for each dimension)
     static int n_current_deposition_buffer; //! in number of cells from the edge (identical for each dimension)
@@ -175,6 +181,9 @@ public:
     const amrex::MultiFab& getEfield_fp  (int lev, int direction) {return *Efield_fp[lev][direction];}
     const amrex::MultiFab& getBfield_fp  (int lev, int direction) {return *Bfield_fp[lev][direction];}
 
+    /** get low-high-low-high-... vector for each direction indicating if mother grid PMLs are enabled */
+    std::vector<bool> getPMLdirections() const;
+
     static amrex::MultiFab* getCosts (int lev) {
         if (m_instance) {
             return m_instance->costs[lev].get();
@@ -245,12 +254,14 @@ public:
     void UpdateAuxilaryDataSameType ();
 
     // Fill boundary cells including coarse/fine boundaries
-    void FillBoundaryB ();
-    void FillBoundaryE ();
-    void FillBoundaryF ();
-    void FillBoundaryE (int lev);
-    void FillBoundaryB (int lev);
-    void FillBoundaryF (int lev);
+    void FillBoundaryB   (amrex::IntVect ng, amrex::IntVect ng_extra_fine=amrex::IntVect::TheZeroVector());
+    void FillBoundaryE   (amrex::IntVect ng, amrex::IntVect ng_extra_fine=amrex::IntVect::TheZeroVector());
+    void FillBoundaryF   (amrex::IntVect ng);
+    void FillBoundaryAux (amrex::IntVect ng);
+    void FillBoundaryE   (int lev, amrex::IntVect ng, amrex::IntVect ng_extra_fine=amrex::IntVect::TheZeroVector());
+    void FillBoundaryB   (int lev, amrex::IntVect ng, amrex::IntVect ng_extra_fine=amrex::IntVect::TheZeroVector());
+    void FillBoundaryF   (int lev, amrex::IntVect ng);
+    void FillBoundaryAux (int lev, amrex::IntVect ng);
 
     void SyncCurrent ();
     void SyncRho ();
@@ -334,6 +345,8 @@ public:
                              const std::array<const amrex::MultiFab*, 3>& B,
                              const std::array<amrex::Real,3>& dx, int ngrow);
 
+    const amrex::IntVect getngE() const { return guard_cells.ng_alloc_EB; };
+    const amrex::IntVect getngF() const { return guard_cells.ng_alloc_F; };
 
     void InitSpaceChargeField (WarpXParticleContainer& pc);
     void computePhi (const amrex::Vector<std::unique_ptr<amrex::MultiFab> >& rho,
@@ -347,6 +360,21 @@ public:
                    const amrex::Vector<std::unique_ptr<amrex::MultiFab> >& phi,
                    std::array<amrex::Real, 3> const beta = {{0,0,0}} ) const;
 
+    /**
+     * \brief
+     * This function initializes the E and B fields on each level
+     * using the parser and the user-defined function for the external fields.
+     * The subroutine will parse the x_/y_z_external_grid_function and
+     * then, the B or E multifab is initialized based on the (x,y,z) position
+     * on the staggered yee-grid or cell-centered grid.
+     */
+    void InitializeExternalFieldsOnGridUsingParser (
+         amrex::MultiFab *mfx, amrex::MultiFab *mfy, amrex::MultiFab *mfz,
+         ParserWrapper *xfield_parser, ParserWrapper *yfield_parser,
+         ParserWrapper *zfield_parser, amrex::IntVect x_nodal_flag,
+         amrex::IntVect y_nodal_flag, amrex::IntVect z_nodal_flag,
+         const int lev);
+
 protected:
 
     /**
@@ -376,22 +404,6 @@ protected:
      */
     void InitLevelData (int lev, amrex::Real time);
 
-    /**
-     * \brief
-     * This function initializes the E and B fields on each level
-     * using the parser and the user-defined function for the external fields.
-     * The subroutine will parse the x_/y_z_external_grid_function and
-     * then, the B or E multifab is initialized based on the (x,y,z) position
-     * on the staggered yee-grid or cell-centered grid.
-     */
-    void InitializeExternalFieldsOnGridUsingParser (
-         amrex::MultiFab *mfx, amrex::MultiFab *mfy, amrex::MultiFab *mfz,
-         ParserWrapper *xfield_parser, ParserWrapper *yfield_parser,
-         ParserWrapper *zfield_parser, amrex::IntVect x_nodal_flag,
-         amrex::IntVect y_nodal_flag, amrex::IntVect z_nodal_flag,
-         const int lev);
-
-
     //! Tagging cells for refinement
     virtual void ErrorEst (int lev, amrex::TagBoxArray& tags, amrex::Real time, int /*ngrow*/) final;
 
@@ -427,9 +439,9 @@ private:
     ///
     void EvolveEM(int numsteps);
 
-    void FillBoundaryB (int lev, PatchType patch_type);
-    void FillBoundaryE (int lev, PatchType patch_type);
-    void FillBoundaryF (int lev, PatchType patch_type);
+    void FillBoundaryB (int lev, PatchType patch_type, amrex::IntVect ng);
+    void FillBoundaryE (int lev, PatchType patch_type, amrex::IntVect ng);
+    void FillBoundaryF (int lev, PatchType patch_type, amrex::IntVect ng);
 
     void OneStep_nosub (amrex::Real t);
     void OneStep_sub1 (amrex::Real t);
@@ -517,7 +529,8 @@ private:
 
     void AllocLevelMFs (int lev, const amrex::BoxArray& ba, const amrex::DistributionMapping& dm,
                         const amrex::IntVect& ngE, const amrex::IntVect& ngJ,
-                        const amrex::IntVect& ngRho, int ngF);
+                        const amrex::IntVect& ngRho, const amrex::IntVect& ngF,
+                        const amrex::IntVect& ngextra, const bool aux_is_nodal);
 
     amrex::Vector<int> istep;      // which step?
     amrex::Vector<int> nsubsteps;  // how many substeps on each level?
@@ -567,9 +580,6 @@ private:
     amrex::Vector<std::array< std::unique_ptr<amrex::MultiFab>, 3 > > current_buf;
     amrex::Vector<std::unique_ptr<amrex::MultiFab> > charge_buf;
 
-    // div E cleaning
-    int do_dive_cleaning = 0;
-
     // PML
     int do_pml = 1;
     int pml_ncell = 10;
@@ -655,6 +665,8 @@ private:
 
     bool is_synchronized = true;
 
+    guardCellManager guard_cells;
+
     //Slice Parameters
     int slice_max_grid_size;
     int slice_plot_int = -1;
@@ -680,18 +692,19 @@ private:
     amrex::Vector<            std::unique_ptr<amrex::MultiFab>      > rho_cp_fft;
 #endif
 
+    bool fft_hybrid_mpi_decomposition = false;
+    int nox_fft = 16;
+    int noy_fft = 16;
+    int noz_fft = 16;
+
 #ifdef WARPX_USE_PSATD
 private:
     void EvolvePSATD (int numsteps);
     void PushPSATD (amrex::Real dt);
     void PushPSATD_localFFT (int lev, amrex::Real dt);
 
-    bool fft_hybrid_mpi_decomposition = false;
     int ngroups_fft = 4;
     int fftw_plan_measure = 1;
-    int nox_fft = 16;
-    int noy_fft = 16;
-    int noz_fft = 16;
 
     amrex::Vector<std::unique_ptr<SpectralSolver>> spectral_solver_fp;
     amrex::Vector<std::unique_ptr<SpectralSolver>> spectral_solver_cp;
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index c9084cd2a..d49ead1bc 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -24,12 +24,10 @@
 
 using namespace amrex;
 
-Vector<Real> WarpX::B_external_particle(3, 0.0);
-Vector<Real> WarpX::E_external_particle(3, 0.0);
-
 Vector<Real> WarpX::E_external_grid(3, 0.0);
 Vector<Real> WarpX::B_external_grid(3, 0.0);
 
+std::string WarpX::authors = "";
 std::string WarpX::B_ext_grid_s = "default";
 std::string WarpX::E_ext_grid_s = "default";
 
@@ -57,6 +55,7 @@ long WarpX::charge_deposition_algo;
 long WarpX::field_gathering_algo;
 long WarpX::particle_pusher_algo;
 int WarpX::maxwell_fdtd_solver_id;
+int WarpX::do_dive_cleaning = 0;
 
 long WarpX::n_rz_azimuthal_modes = 1;
 long WarpX::ncomps = 1;
@@ -90,6 +89,7 @@ Real WarpX::particle_slice_width_lab = 0.0;
 bool WarpX::do_dynamic_scheduling = true;
 
 int WarpX::do_subcycling = 0;
+bool WarpX::exchange_all_guard_cells = 0;
 
 #if (AMREX_SPACEDIM == 3)
 IntVect WarpX::Bx_nodal_flag(1,0,0);
@@ -153,7 +153,7 @@ WarpX::WarpX ()
     ReadParameters();
 
 #ifdef WARPX_USE_OPENPMD
-    m_OpenPMDPlotWriter = new WarpXOpenPMDPlot(openpmd_tspf, openpmd_backend);
+    m_OpenPMDPlotWriter = new WarpXOpenPMDPlot(openpmd_tspf, openpmd_backend, WarpX::getPMLdirections());
 #endif
 
     // Geometry on all levels has been defined already.
@@ -263,6 +263,25 @@ WarpX::WarpX ()
     // at different levels (the stencil depends on c*dt/dz)
     nci_godfrey_filter_exeybz.resize(nlevs_max);
     nci_godfrey_filter_bxbyez.resize(nlevs_max);
+
+    // Sanity checks. Must be done after calling the MultiParticleContainer
+    // constructor, as it reads additional parameters
+    // (e.g., use_fdtd_nci_corr)
+
+#ifndef WARPX_USE_PSATD
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
+        not ( do_pml && do_nodal ),
+        "PML + do_nodal for finite-difference not implemented"
+        );
+#endif
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
+        not ( do_dive_cleaning && do_nodal ),
+        "divE cleaning + do_nodal not implemented"
+        );
+#ifdef WARPX_USE_PSATD
+    AMREX_ALWAYS_ASSERT(use_fdtd_nci_corr == 0);
+    AMREX_ALWAYS_ASSERT(do_subcycling == 0);
+#endif
 }
 
 WarpX::~WarpX ()
@@ -288,6 +307,7 @@ WarpX::ReadParameters ()
         ParmParse pp;// Traditionally, max_step and stop_time do not have prefix.
         pp.query("max_step", max_step);
         pp.query("stop_time", stop_time);
+        pp.query("authors", authors);
     }
 
     {
@@ -309,6 +329,7 @@ WarpX::ReadParameters ()
         pp.query("verbose", verbose);
         pp.query("regrid_int", regrid_int);
         pp.query("do_subcycling", do_subcycling);
+        pp.query("exchange_all_guard_cells", exchange_all_guard_cells);
         pp.query("override_sync_int", override_sync_int);
 
         AMREX_ALWAYS_ASSERT_WITH_MESSAGE(do_subcycling != 1 || max_level <= 1,
@@ -322,9 +343,6 @@ WarpX::ReadParameters ()
             pp.query("zmax_plasma_to_compute_max_step",
                       zmax_plasma_to_compute_max_step);
 
-        pp.queryarr("B_external_particle", B_external_particle);
-        pp.queryarr("E_external_particle", E_external_particle);
-
         pp.query("do_moving_window", do_moving_window);
         if (do_moving_window)
         {
@@ -646,7 +664,6 @@ WarpX::ReadParameters ()
        }
 
     }
-
 }
 
 // This is a virtual function.
@@ -730,58 +747,23 @@ WarpX::ClearLevel (int lev)
 void
 WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& dm)
 {
-    // When using subcycling, the particles on the fine level perform two pushes
-    // before being redistributed ; therefore, we need one extra guard cell
-    // (the particles may move by 2*c*dt)
-    const int ngx_tmp = (maxLevel() > 0 && do_subcycling == 1) ? WarpX::nox+1 : WarpX::nox;
-    const int ngy_tmp = (maxLevel() > 0 && do_subcycling == 1) ? WarpX::noy+1 : WarpX::noy;
-    const int ngz_tmp = (maxLevel() > 0 && do_subcycling == 1) ? WarpX::noz+1 : WarpX::noz;
-
-    // Ex, Ey, Ez, Bx, By, and Bz have the same number of ghost cells.
-    // jx, jy, jz and rho have the same number of ghost cells.
-    // E and B have the same number of ghost cells as j and rho if NCI filter is not used,
-    // but different number of ghost cells in z-direction if NCI filter is used.
-    // The number of cells should be even, in order to easily perform the
-    // interpolation from coarse grid to fine grid.
-    int ngx = (ngx_tmp % 2) ? ngx_tmp+1 : ngx_tmp;  // Always even number
-    int ngy = (ngy_tmp % 2) ? ngy_tmp+1 : ngy_tmp;  // Always even number
-    int ngz_nonci = (ngz_tmp % 2) ? ngz_tmp+1 : ngz_tmp;  // Always even number
-    int ngz;
-    if (WarpX::use_fdtd_nci_corr) {
-        int ng = ngz_tmp + NCIGodfreyFilter::m_stencil_width;
-        ngz = (ng % 2) ? ng+1 : ng;
-    } else {
-        ngz = ngz_nonci;
-    }
 
-    // J is only interpolated from fine to coarse (not coarse to fine)
-    // and therefore does not need to be even.
-    int ngJx = ngx_tmp;
-    int ngJy = ngy_tmp;
-    int ngJz = ngz_tmp;
-
-    // When calling the moving window (with one level of refinement),  we shift
-    // the fine grid by 2 cells ; therefore, we need at least 2 guard cells
-    // on level 1. This may not be necessary for level 0.
-    if (do_moving_window) {
-        ngx = std::max(ngx,2);
-        ngy = std::max(ngy,2);
-        ngz = std::max(ngz,2);
-        ngJx = std::max(ngJx,2);
-        ngJy = std::max(ngJy,2);
-        ngJz = std::max(ngJz,2);
-    }
-
-#if (AMREX_SPACEDIM == 3)
-    IntVect ngE(ngx,ngy,ngz);
-    IntVect ngJ(ngJx,ngJy,ngJz);
-#elif (AMREX_SPACEDIM == 2)
-    IntVect ngE(ngx,ngz);
-    IntVect ngJ(ngJx,ngJz);
-#endif
+    bool aux_is_nodal = (field_gathering_algo == GatheringAlgo::MomentumConserving);
 
-    IntVect ngRho = ngJ+1; //One extra ghost cell, so that it's safe to deposit charge density
-                           // after pushing particle.
+    guard_cells.Init(
+        do_subcycling,
+        WarpX::use_fdtd_nci_corr,
+        do_nodal,
+        do_moving_window,
+        fft_hybrid_mpi_decomposition,
+        aux_is_nodal,
+        moving_window_dir,
+        WarpX::nox,
+        nox_fft, noy_fft, noz_fft,
+        NCIGodfreyFilter::m_stencil_width,
+        maxwell_fdtd_solver_id,
+        maxLevel(),
+        exchange_all_guard_cells);
 
     if (mypc->nSpeciesDepositOnMainGrid() && n_current_deposition_buffer == 0) {
         n_current_deposition_buffer = 1;
@@ -792,54 +774,22 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d
     }
 
     if (n_current_deposition_buffer < 0) {
-        n_current_deposition_buffer = ngJ.max();
+        n_current_deposition_buffer = guard_cells.ng_alloc_J.max();
     }
     if (n_field_gather_buffer < 0) {
         // Field gather buffer should be larger than current deposition buffers
         n_field_gather_buffer = n_current_deposition_buffer + 1;
     }
 
-    int ngF = (do_moving_window) ? 2 : 0;
-    // CKC solver requires one additional guard cell
-    if (maxwell_fdtd_solver_id == 1) ngF = std::max( ngF, 1 );
-
-#ifdef WARPX_USE_PSATD
-    if (fft_hybrid_mpi_decomposition == false){
-        // All boxes should have the same number of guard cells
-        // (to avoid temporary parallel copies)
-        // Thus take the max of the required number of guards for each field
-        // Also: the number of guard cell should be enough to contain
-        // the stencil of the FFT solver. Here, this number (`ngFFT`)
-        // is determined *empirically* to be the order of the solver
-        // for nodal, and half the order of the solver for staggered.
-        IntVect ngFFT;
-        if (do_nodal) {
-            ngFFT = IntVect(AMREX_D_DECL(nox_fft, noy_fft, noz_fft));
-        } else {
-            ngFFT = IntVect(AMREX_D_DECL(nox_fft/2, noy_fft/2, noz_fft/2));
-        }
-        for (int i_dim=0; i_dim<AMREX_SPACEDIM; i_dim++ ){
-            int ng_required = ngFFT[i_dim];
-            // Get the max
-            ng_required = std::max( ng_required, ngE[i_dim] );
-            ng_required = std::max( ng_required, ngJ[i_dim] );
-            ng_required = std::max( ng_required, ngRho[i_dim] );
-            ng_required = std::max( ng_required, ngF );
-            // Set the guard cells to this max
-            ngE[i_dim] = ng_required;
-            ngJ[i_dim] = ng_required;
-            ngRho[i_dim] = ng_required;
-            ngF = ng_required;
-        }
-    }
-#endif
-
-    AllocLevelMFs(lev, ba, dm, ngE, ngJ, ngRho, ngF);
+    AllocLevelMFs(lev, ba, dm, guard_cells.ng_alloc_EB, guard_cells.ng_alloc_J,
+                  guard_cells.ng_alloc_Rho, guard_cells.ng_alloc_F,
+                  guard_cells.ng_Extra, aux_is_nodal);
 }
 
 void
 WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm,
-                      const IntVect& ngE, const IntVect& ngJ, const IntVect& ngRho, int ngF)
+                      const IntVect& ngE, const IntVect& ngJ, const IntVect& ngRho,
+                      const IntVect& ngF, const IntVect& ngextra, const bool aux_is_nodal)
 {
 
 #if defined WARPX_DIM_RZ
@@ -851,9 +801,6 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm
     ncomps = n_rz_azimuthal_modes*2 - 1;
 #endif
 
-    bool aux_is_nodal = (field_gathering_algo == GatheringAlgo::MomentumConserving);
-    IntVect ngextra(static_cast<int>(aux_is_nodal and !do_nodal));
-
     //
     // The fine patch
     //
@@ -883,7 +830,7 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm
 
     if (do_dive_cleaning)
     {
-        F_fp[lev].reset  (new MultiFab(amrex::convert(ba,IntVect::TheUnitVector()),dm,ncomps, ngF));
+        F_fp[lev].reset  (new MultiFab(amrex::convert(ba,IntVect::TheUnitVector()),dm,ncomps, ngF.max()));
     }
 #ifdef WARPX_USE_PSATD
     else
@@ -970,7 +917,7 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm
         }
         if (do_dive_cleaning)
         {
-            F_cp[lev].reset  (new MultiFab(amrex::convert(cba,IntVect::TheUnitVector()),dm,ncomps, ngF));
+            F_cp[lev].reset  (new MultiFab(amrex::convert(cba,IntVect::TheUnitVector()),dm,ncomps, ngF.max()));
         }
 #ifdef WARPX_USE_PSATD
         else
@@ -1280,6 +1227,24 @@ WarpX::GetPML (int lev)
     }
 }
 
+std::vector< bool >
+WarpX::getPMLdirections() const
+{
+    std::vector< bool > dirsWithPML( 6, false );
+#if AMREX_SPACEDIM!=3
+    dirsWithPML.resize( 4 );
+#endif
+    if( do_pml )
+    {
+        for( auto i = 0u; i < dirsWithPML.size() / 2u; ++i )
+        {
+            dirsWithPML.at( 2u*i      ) = bool(do_pml_Lo[i]);
+            dirsWithPML.at( 2u*i + 1u ) = bool(do_pml_Hi[i]);
+        }
+    }
+    return dirsWithPML;
+}
+
 void
 WarpX::BuildBufferMasks ()
 {
@@ -1362,7 +1327,7 @@ WarpX::Version ()
 std::string
 WarpX::PicsarVersion ()
 {
-#ifdef WARPX_GIT_VERSION
+#ifdef PICSAR_GIT_VERSION
     return std::string(PICSAR_GIT_VERSION);
 #else
     return std::string("Unknown");