63 files changed, 1055 insertions, 802 deletions
diff --git a/Docs/Doxyfile b/Docs/Doxyfile
index 61cdabb22..8506c5ee4 100644
--- a/Docs/Doxyfile
+++ b/Docs/Doxyfile
@@ -830,7 +830,7 @@ FILE_PATTERNS          = *.c \
 # be searched for input files as well.
 # The default value is: NO.
 
-RECURSIVE              = NO
+RECURSIVE              = YES
 
 # The EXCLUDE tag can be used to specify files and/or directories that should be
 # excluded from the INPUT source files. This way you can easily exclude a
@@ -2046,7 +2046,24 @@ INCLUDE_FILE_PATTERNS  =
 # recursively expanded use the := operator instead of the = operator.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
-PREDEFINED             =
+PREDEFINED             = AMREX_Linux=1            \
+                         AMREX_PARTICLES=1        \
+                         AMREX_USE_MPI=1          \
+                         AMREX_USE_OMP=1          \
+                         AMREX_SPACEDIM=3         \
+                         AMREX_TINY_PROFILING=1   \
+                         BL_Linux=1               \
+                         BL_USE_MPI=1             \
+                         BL_USE_OMP=1             \
+                         BL_USE_SENSEI_INSITU=1   \
+                         WARPX=1                  \
+                         WARPX_DIM_RZ=1           \
+                         WARPX_DIM_XZ=1           \
+                         WARPX_DO_ELECTROSTATIC=1 \
+                         WARPX_USE_GPU=1          \
+                         WARPX_USE_OPENPMD=1      \
+                         WARPX_USE_PSATD=1        \
+                         WARPX_USE_PSATD_HYBRID=1
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
diff --git a/Docs/source/running_cpp/parameters.rst b/Docs/source/running_cpp/parameters.rst
index a6f6756ad..30687244e 100644
--- a/Docs/source/running_cpp/parameters.rst
+++ b/Docs/source/running_cpp/parameters.rst
@@ -70,6 +70,16 @@ Setting up the field mesh
     This patch is rectangular, and thus its extent is given here by the coordinates
     of the lower corner (``warpx.fine_tag_lo``) and upper corner (``warpx.fine_tag_hi``).
 
+* ``warpx.n_current_deposition_buffer`` (`integer`)
+    When using mesh refinement: the particles that are located inside
+    a refinement patch, but within ``n_current_deposition_buffer`` cells of
+    the edge of this patch, will deposit their charge and current to the
+    lower refinement level, instead of depositing to the refinement patch
+    itself. See the section :doc:`../../theory/amr` for more details.
+    If this variable is not explicitly set in the input script,
+    ``n_current_deposition_buffer`` is automatically set so as to be large
+    enough to hold the particle shape, on the fine grid
+
 * ``warpx.n_field_gather_buffer`` (`integer`; 0 by default)
     When using mesh refinement: the particles that are located inside
     a refinement patch, but within ``n_field_gather_buffer`` cells of
@@ -77,14 +87,10 @@ Setting up the field mesh
     level, instead of gathering the fields from the refinement patch itself.
     This avoids some of the spurious effects that can occur inside the
     refinement patch, close to its edge. See the section
-    :doc:`../../theory/amr` for more details.
-
-* ``warpx.n_current_deposition_buffer`` (`integer`)
-    When using mesh refinement: the particles that are located inside
-    a refinement patch, but within ``n_current_deposition_buffer`` cells of
-    the edge of this patch, will deposit their charge and current to the
-    lower refinement level, instead of depositing to the refinement patch
-    itself. See the section :doc:`../../theory/amr` for more details.
+    :doc:`../../theory/amr` for more details. If this variable is not
+    explicitly set in the input script, ``n_field_gather_buffer`` is
+    automatically set so that it is one cell larger than
+    ``n_current_deposition_buffer``, on the fine grid.
 
 * ``particles.deposit_on_main_grid`` (`list of strings`)
     When using mesh refinement: the particle species whose name are included
@@ -285,8 +291,9 @@ Particle initialization
 * ``species_name.predefined_profile_name`` (`string`)
     Only read of ``<species_name>.electrons.profile`` is `predefined`.
 
-    * If ``parabolic_channel``, the plasma profile is a parabolic profile with linear ramps
-      at the beginning and the end of the profile. The density is given by
+    * If ``parabolic_channel``, the plasma profile is a parabolic profile with
+      cosine-like ramps at the beginning and the end of the profile.
+      The density is given by
 
       .. math::
 
@@ -299,9 +306,9 @@ Particle initialization
           n(x,y) = 1 + 4\frac{x^2+y^2}{k_p^2 R_c^4}
 
       where :math:`k_p` is the plasma wavenumber associated with density :math:`n_0`.
-      Here, :math:`n(z)` is a linear up-ramp from :math:`0` to :math:`L_{ramp,up}`,
+      Here, :math:`n(z)` is a cosine-like up-ramp from :math:`0` to :math:`L_{ramp,up}`,
       constant to :math:`1` from :math:`L_{ramp,up}` to :math:`L_{ramp,up} + L_{plateau}`
-      and a linear down-ramp from :math:`L_{ramp,up} + L_{plateau}` to
+      and a cosine-like down-ramp from :math:`L_{ramp,up} + L_{plateau}` to
       :math:`L_{ramp,up} + L_{plateau}+L_{ramp,down}`. All parameters are given
       in ``predefined_profile_params``.
 
@@ -528,6 +535,11 @@ Laser initialization
     moving window and laser propagation directions to be the same (`x`, `y`
     or `z`)
 
+* ``<laser_name>.min_particles_per_mode`` (`int`) optional (default `4`)
+    When using the RZ version, this specifies the minimum number of particles
+    per angular mode. The laser particles are loaded into radial spokes, with
+    the number of spokes given by min_particles_per_mode*(warpx.n_rz_azimuthal_modes-1).
+
 * ``warpx.num_mirrors`` (`int`) optional (default `0`)
     Users can input perfect mirror condition inside the simulation domain.
     The number of mirrors is given by ``warpx.num_mirrors``. The mirrors are
diff --git a/Docs/source/visualization/visualization.rst b/Docs/source/visualization/visualization.rst
index 76acd1bb6..772d13ef9 100644
--- a/Docs/source/visualization/visualization.rst
+++ b/Docs/source/visualization/visualization.rst
@@ -33,3 +33,11 @@ files to disk).
 
     sensei
     ascent
+
+If you like the 3D rendering of laser wakefield acceleration
+on the WarpX documentation frontpage (which is
+also the avatar of the ECP-WarpX organization), you can find the serial
+analysis script :download:`video_yt.py<../../../Tools/video_yt.py>` as well
+as a parallel analysis script
+:download:`video_yt.py<../../../Tools/yt3d_mpi.py>` used to make a similar
+rendering for a beam-driven wakefield simulation, running parallel.
diff --git a/Examples/Modules/nci_corrector/ncicorr_analysis.py b/Examples/Modules/nci_corrector/ncicorr_analysis.py
index b9e46628d..5b7da2447 100755
--- a/Examples/Modules/nci_corrector/ncicorr_analysis.py
+++ b/Examples/Modules/nci_corrector/ncicorr_analysis.py
@@ -14,8 +14,8 @@ ex = np.reshape(ad['boxlib', 'Ex'].v,(128,128))
 ez = np.reshape(ad['boxlib', 'Ez'].v,(128,128))
 by = np.reshape(ad['boxlib', 'By'].v,(128,128))
 energy = np.sum(ex**2 + ez**2 + scc.c**2*by**2)*1.e-12
-assert( energy < 7. )
 
-# Energy should be:
-# FILTER OFF: ~15000.
-# FILTER ON :     ~6.
+print("energy: %s" %energy)
+print("Should be ~1.e0 if filter ON, ~1.e5 if filter OFF.")
+
+assert( energy < 7. )
diff --git a/Examples/Physics_applications/laser_acceleration/inputs.rz b/Examples/Physics_applications/laser_acceleration/inputs.rz
new file mode 100755
index 000000000..83071834e
--- /dev/null
+++ b/Examples/Physics_applications/laser_acceleration/inputs.rz
@@ -0,0 +1,87 @@
+#################################
+####### GENERAL PARAMETERS ######
+#################################
+max_step = 1000
+amr.n_cell =  64  512
+amr.max_grid_size = 64   # maximum size of each AMReX box, used to decompose the domain
+amr.blocking_factor = 32 # minimum size of each AMReX box, used to decompose the domain
+amr.plot_int = 200
+geometry.coord_sys   = 1                  # 0: Cartesian
+geometry.is_periodic = 0     0      # Is periodic?
+geometry.prob_lo     =   0.   -56.e-6    # physical domain
+geometry.prob_hi     =  30.e-6    12.e-6
+amr.max_level = 0 # Maximum level in hierarchy (1 might be unstable, >1 is not supported)
+
+warpx.n_rz_azimuthal_modes = 2
+
+#################################
+############ NUMERICS ###########
+#################################
+interpolation.nox = 3 # Particle interpolation order. Must be the same in x, y, and z
+interpolation.noy = 3
+interpolation.noz = 3
+warpx.verbose = 1
+warpx.do_dive_cleaning = 0
+warpx.plot_raw_fields = 1
+warpx.plot_raw_fields_guards = 1
+warpx.use_filter = 1
+warpx.cfl = 1. # if 1., the time step is set to its CFL limit
+warpx.do_pml = 0 # use Perfectly Matched Layer as boundary condition
+warpx.do_moving_window = 1
+warpx.moving_window_dir = z # Only z is supported for the moment
+warpx.moving_window_v = 1.0 # units of speed of light
+
+#################################
+############ PLASMA #############
+#################################
+particles.nspecies = 2 # number of species
+particles.species_names = electrons beam
+
+electrons.charge = -q_e
+electrons.mass = m_e
+electrons.injection_style = "NUniformPerCell"
+electrons.num_particles_per_cell_each_dim = 1 1 1
+electrons.xmin = -20.e-6
+electrons.xmax =  20.e-6
+electrons.zmin =  10.e-6
+electrons.profile = constant
+electrons.density = 2.e23  # number of electrons per m^3
+electrons.momentum_distribution_type = "constant"
+electrons.do_continuous_injection = 1
+
+beam.charge = -q_e
+beam.mass = m_e
+beam.injection_style = "gaussian_beam"
+beam.x_rms = .5e-6
+beam.y_rms = .5e-6
+beam.z_rms = .5e-6
+beam.x_m = 0.
+beam.y_m = 0.
+beam.z_m = -28.e-6
+beam.npart = 100
+beam.q_tot = -1.e-12
+beam.profile = "constant"
+beam.density = 8.e23 # not used in case of a gaussian beam
+beam.momentum_distribution_type = "gaussian"
+beam.ux_m = 0.0
+beam.uy_m = 0.0
+beam.uz_m = 500.
+beam.ux_th = 2.
+beam.uy_th = 2.
+beam.uz_th = 50.
+
+#################################
+############ PLASMA #############
+#################################
+lasers.nlasers      = 1
+lasers.names        = laser1
+laser1.profile      = Gaussian
+laser1.position     = 0. 0. 9.e-6        # This point is on the laser plane
+laser1.direction    = 0. 0. 1.           # The plane normal direction
+laser1.polarization = 0. 1. 0.           # The main polarization vector
+laser1.e_max        = 16.e12             # Maximum amplitude of the laser field (in V/m)
+laser1.profile_waist = 5.e-6             # The waist of the laser (in m)
+laser1.profile_duration = 15.e-15        # The duration of the laser (in s)
+laser1.profile_t_peak = 30.e-15          # Time at which the laser reaches its peak (in s)
+laser1.profile_focal_distance = 100.e-6  # Focal distance from the antenna (in m)
+laser1.wavelength = 0.8e-6               # The wavelength of the laser (in m)
diff --git a/Examples/Tests/Langmuir/langmuir2d_analysis.py b/Examples/Tests/Langmuir/langmuir2d_analysis.py
index 40125d6f5..016eabbce 100755
--- a/Examples/Tests/Langmuir/langmuir2d_analysis.py
+++ b/Examples/Tests/Langmuir/langmuir2d_analysis.py
@@ -25,13 +25,21 @@ data = ds.covering_grid( 0, ds.domain_left_edge, ds.domain_dimensions )
 # Check the Jx field, which oscillates at wp
 j_predicted = -n0*e*c*ux*np.cos( wp*t*39.5/40 ) # 40 timesteps / j at half-timestep
 jx = data['jx'].to_ndarray()
+# Print errors, and assert small error
+print( "relative error: np.max( np.abs( ( jx[:32,:,0] - j_predicted ) / j_predicted ) ) = %s" \
+           %np.max( np.abs( ( jx[:32,:,0] - j_predicted ) / j_predicted ) ) )
 assert np.allclose( jx[:32,:,0], j_predicted, rtol=0.1 )
+print( "absolute error: np.max( np.abs( jx[32:,:,0] ) ) = %s" %np.max( np.abs( jx[:32,:,0] ) ) )
 assert np.allclose( jx[32:,:,0], 0, atol=1.e-2 )
 
 # Check the Ex field, which oscillates at wp
 E_predicted = m_e * wp * ux * c / e * np.sin(wp*t)
 Ex = data['Ex'].to_ndarray()
+# Print errors, and assert small error
+print( "relative error: np.max( np.abs( ( Ex[:32,:,0] - E_predicted ) / E_predicted ) ) = %s" \
+           %np.max( np.abs( ( Ex[:32,:,0] - E_predicted ) / E_predicted ) ) )
 assert np.allclose( Ex[:32,:,0], E_predicted, rtol=0.1 )
+print( "absolute error: np.max( np.abs( Ex[32:,:,0] ) ) = %s" %np.max( np.abs( Ex[:32,:,0] ) ) )
 assert np.allclose( Ex[32:,:,0], 0, atol=1.e-4 )
 
 # Save an image to be displayed on the website
diff --git a/Examples/Tests/Langmuir/langmuir_analysis.py b/Examples/Tests/Langmuir/langmuir_analysis.py
index 578ecc8b7..2ffb7f56b 100755
--- a/Examples/Tests/Langmuir/langmuir_analysis.py
+++ b/Examples/Tests/Langmuir/langmuir_analysis.py
@@ -48,33 +48,27 @@ E_predicted = m_e * wp * u * c / e * np.sin(wp*t)
 # at the edges of the plasma
 if direction == 'x':
     E = data[ 'Ex' ].to_ndarray()
-    # compute and print errors
-    max_rel_error_nonzero = np.max(np.abs((E[2:30,:,:]-E_predicted)/E_predicted))
-    max_rel_error_zero = np.max(E[34:-2,:,:])
-    print('relative error: %s' %max_rel_error_nonzero)
-    print('absolute field error (where field should be 0): %s' %max_rel_error_zero)
-    # assert small errors
+    # Print errors, and assert small error
+    print( "relative error: np.max( np.abs( ( E[2:30,:,:] - E_predicted ) / E_predicted ) ) = %s" \
+               %np.max( np.abs( ( E[2:30,:,:] - E_predicted ) / E_predicted ) ) )
     assert np.allclose( E[2:30,:,:], E_predicted, rtol=0.1 )
-    assert np.allclose( E[34:-2,:,:], 0, atol=2.e-5 )
+    print( "absolute error: np.max( np.abs( E[34:-2,:,:] ) ) = %s" %np.max( np.abs( E[34:-2,:,:] ) ) )
+    assert np.allclose( E[34:-2,:,:], 0, atol=5.e-5 )
 elif direction == 'y':
     E = data[ 'Ey' ].to_ndarray()
-    # compute and print errors
-    max_rel_error_nonzero = np.max(np.abs((E[:,2:30,:]-E_predicted)/E_predicted))
-    max_rel_error_zero = np.max(E[:,34:-2,:])
-    print('relative error: %s' %max_rel_error_nonzero)
-    print('absolute field error (where field should be 0): %s' %max_rel_error_zero)
-    # assert small errors
+    # Print errors, and assert small error
+    print( "relative error: np.max( np.abs( ( E[:,2:30,:] - E_predicted ) / E_predicted ) ) = %s" \
+               %np.max( np.abs( ( E[:,2:30,:] - E_predicted ) / E_predicted ) ) )
     assert np.allclose( E[:,2:30,:], E_predicted, rtol=0.1 )
+    print( "absolute error: np.max( np.abs( E[:,34:-2,:] ) ) = %s" %np.max( np.abs( E[:,34:-2,:] ) ) )
     assert np.allclose( E[:,34:-2,:], 0, atol=2.e-5 )
 elif direction == 'z':
     E = data[ 'Ez' ].to_ndarray()
-    # compute and print errors
-    max_rel_error_nonzero = np.max(np.abs((E[:,:,2:30]-E_predicted)/E_predicted))
-    max_rel_error_zero = np.max(E[:,:,34:-2])
-    print('relative error: %s' %max_rel_error_nonzero)
-    print('absolute field error (where field should be 0): %s' %max_rel_error_zero)
-    # assert small errors
+    # Print errors, and assert small error
+    print( "relative error: np.max( np.abs( ( E[:,:,2:30] - E_predicted ) / E_predicted ) ) = %s" \
+               %np.max( np.abs( ( E[:,:,2:30] - E_predicted ) / E_predicted ) ) )
     assert np.allclose( E[:,:,2:30], E_predicted, rtol=0.1 )
+    print( "absolute error: np.max( np.abs( E[:,:,34:-2] ) ) = %s" %np.max( np.abs( E[:,:,34:-2] ) ) )
     assert np.allclose( E[:,:,34:-2], 0, atol=2.e-5 )
 
 # Save an image to be displayed on the website
diff --git a/Examples/Tests/PML/analysis_pml_ckc.py b/Examples/Tests/PML/analysis_pml_ckc.py
index d6bef942f..08019e60b 100755
--- a/Examples/Tests/PML/analysis_pml_ckc.py
+++ b/Examples/Tests/PML/analysis_pml_ckc.py
@@ -30,8 +30,8 @@ energy_end = energyE + energyB
 Reflectivity = energy_end/energy_start
 Reflectivity_theory = 1.8015e-06
 
-print("Reflectivity", Reflectivity)
-print("Reflectivity_theory", Reflectivity_theory)
+print("Reflectivity: %s" %Reflectivity)
+print("Reflectivity_theory: %s" %Reflectivity_theory)
 
 assert( Reflectivity < 105./100 * Reflectivity_theory )
 
diff --git a/Examples/Tests/PML/analysis_pml_psatd.py b/Examples/Tests/PML/analysis_pml_psatd.py
index ba9120c8d..c8c1aea6c 100755
--- a/Examples/Tests/PML/analysis_pml_psatd.py
+++ b/Examples/Tests/PML/analysis_pml_psatd.py
@@ -30,5 +30,8 @@ energy_end = energyE + energyB
 Reflectivity = energy_end/energy_start
 Reflectivity_theory = 1.3806831258153887e-06
 
+print("Reflectivity: %s" %Reflectivity)
+print("Reflectivity_theory: %s" %Reflectivity_theory)
+
 assert( abs(Reflectivity-Reflectivity_theory) < 5./100 * Reflectivity_theory )
 
diff --git a/Examples/Tests/PML/analysis_pml_yee.py b/Examples/Tests/PML/analysis_pml_yee.py
index 0def05450..c0c91329d 100755
--- a/Examples/Tests/PML/analysis_pml_yee.py
+++ b/Examples/Tests/PML/analysis_pml_yee.py
@@ -30,5 +30,8 @@ energy_end = energyE + energyB
 Reflectivity = energy_end/energy_start
 Reflectivity_theory = 5.683000058954201e-07
 
+print("Reflectivity: %s" %Reflectivity)
+print("Reflectivity_theory: %s" %Reflectivity_theory)
+
 assert( abs(Reflectivity-Reflectivity_theory) < 5./100 * Reflectivity_theory )
 
diff --git a/Examples/Tests/SingleParticle/bilinear_filter_analysis.py b/Examples/Tests/SingleParticle/bilinear_filter_analysis.py
index 269a4d329..494434279 100755
--- a/Examples/Tests/SingleParticle/bilinear_filter_analysis.py
+++ b/Examples/Tests/SingleParticle/bilinear_filter_analysis.py
@@ -42,5 +42,5 @@ F_filtered = all_data_level_0['boxlib', 'jx'].v.squeeze()
 
 # Compare theory and PIC for filtered value
 error = np.sum( np.abs(F_filtered - my_F_filtered) ) / np.sum( np.abs(my_F_filtered) )
-print( "error: %s" %error )
+print( "error: np.sum( np.abs(F_filtered - my_F_filtered) ) / np.sum( np.abs(my_F_filtered) ) = %s" %error )
 assert( error < 1.e-14 )
diff --git a/Examples/Tests/particles_in_PML/analysis.py b/Examples/Tests/particles_in_PML/analysis.py
index ab5792082..96406d717 100755
--- a/Examples/Tests/particles_in_PML/analysis.py
+++ b/Examples/Tests/particles_in_PML/analysis.py
@@ -24,7 +24,7 @@ Ex_array = ad0['Ex'].to_ndarray()
 Ey_array = ad0['Ey'].to_ndarray()
 Ez_array = ad0['Ez'].to_ndarray()
 max_Efield = max(Ex_array.max(), Ey_array.max(), Ez_array.max())
-print( max_Efield )
+print( "max_Efield = %s" %max_Efield )
 
 # The field associated with the particle does not have
 # the same amplitude in 2d and 3d
diff --git a/Examples/Tests/photon_pusher/check.py b/Examples/Tests/photon_pusher/check.py
index b1f6d916c..3e6873842 100755
--- a/Examples/Tests/photon_pusher/check.py
+++ b/Examples/Tests/photon_pusher/check.py
@@ -87,6 +87,11 @@ def check():
     disc_mom = [np.linalg.norm(a-b)/np.linalg.norm(b)
         for a,b in zip(res_mom, answ_mom)]
 
+    print("max(disc_pos) = %s" %max(disc_pos))
+    print("tol_pos = %s" %tol_pos)
+    print("max(disc_mom) = %s" %max(disc_mom))
+    print("tol_mom = %s" %tol_mom)
+
     assert ((max(disc_pos) <= tol_pos) and (max(disc_mom) <= tol_mom))
 
 # This function generates the input file to test the photon pusher.
diff --git a/Regression/WarpX-tests.ini b/Regression/WarpX-tests.ini
index 0e2a01473..e5b9ca028 100644
--- a/Regression/WarpX-tests.ini
+++ b/Regression/WarpX-tests.ini
@@ -211,6 +211,24 @@ runtime_params = electrons.ux=0.01 electrons.xmax=0.e-6 warpx.fields_to_plot=Ex
 analysisRoutine = Examples/Tests/Langmuir/langmuir2d_analysis.py
 analysisOutputImage = langmuir2d_analysis.png
 
+[Langmuir_2d_nompi]
+buildDir = .
+inputFile = Examples/Tests/Langmuir/inputs.rt
+dim = 2
+addToCompileString =
+restartTest = 0
+useMPI = 0
+numprocs = 1
+useOMP = 2
+numthreads = 2
+compileTest = 0
+doVis = 0
+compareParticles = 1
+particleTypes = electrons
+runtime_params = electrons.ux=0.01 electrons.xmax=0.e-6 warpx.fields_to_plot=Ex jx electrons.plot_vars=w ux Ex
+analysisRoutine = Examples/Tests/Langmuir/langmuir2d_analysis.py
+analysisOutputImage = langmuir2d_analysis.png
+
 [Langmuir_x]
 buildDir = .
 inputFile = Examples/Tests/Langmuir/inputs.rt
@@ -352,7 +370,7 @@ numthreads = 1
 compileTest = 0
 doVis = 0
 compareParticles = 1
-runtime_params = warpx.do_nodal=1 algo.current_deposition=direct electrons.plot_vars=w ux uy uz Ex Ey Ez
+runtime_params = warpx.do_nodal=1 algo.current_deposition=direct electrons.plot_vars=w ux uy uz Ex Ey Ez positrons.plot_vars=w ux uy uz Ex Ey Ez
 particleTypes = electrons positrons
 analysisRoutine = Examples/Tests/Langmuir/langmuir_multi_2d_analysis.py
 analysisOutputImage = langmuir_multi_2d_analysis.png
diff --git a/Source/BoundaryConditions/PML.cpp b/Source/BoundaryConditions/PML.cpp
index edf8c8358..572030f73 100644
--- a/Source/BoundaryConditions/PML.cpp
+++ b/Source/BoundaryConditions/PML.cpp
@@ -23,7 +23,6 @@ namespace
         int olo = overlap.smallEnd(idim);
         int ohi = overlap.bigEnd(idim);
         int slo = sigma.m_lo;
-        int shi = sigma.m_hi;
         int sslo = sigma_star.m_lo;
 
         for (int i = olo; i <= ohi+1; ++i)
@@ -51,7 +50,6 @@ namespace
         int olo = overlap.smallEnd(idim);
         int ohi = overlap.bigEnd(idim);
         int slo = sigma.m_lo;
-        int shi = sigma.m_hi;
         int sslo = sigma_star.m_lo;
         for (int i = olo; i <= ohi+1; ++i)
         {
diff --git a/Source/Diagnostics/BoostedFrameDiagnostic.cpp b/Source/Diagnostics/BoostedFrameDiagnostic.cpp
index abd9e99fe..45343a0cb 100644
--- a/Source/Diagnostics/BoostedFrameDiagnostic.cpp
+++ b/Source/Diagnostics/BoostedFrameDiagnostic.cpp
@@ -876,37 +876,37 @@ writeParticleData(const WarpXParticleContainer::DiagnosticParticleData& pdata,
 
     field_name = name + Concatenate("w_", i_lab, 5) + "_" + std::to_string(MyProc);
     ofs.open(field_name.c_str(), std::ios::out|std::ios::binary);
-    writeRealData(pdata.GetRealData(DiagIdx::w).data(), np, ofs);
+    writeData(pdata.GetRealData(DiagIdx::w).data(), np, ofs);
     ofs.close();
 
     field_name = name + Concatenate("x_", i_lab, 5) + "_" + std::to_string(MyProc);
     ofs.open(field_name.c_str(), std::ios::out|std::ios::binary);
-    writeRealData(pdata.GetRealData(DiagIdx::x).data(), np, ofs);
+    writeData(pdata.GetRealData(DiagIdx::x).data(), np, ofs);
     ofs.close();
 
     field_name = name + Concatenate("y_", i_lab, 5) + "_" + std::to_string(MyProc);
     ofs.open(field_name.c_str(), std::ios::out|std::ios::binary);
-    writeRealData(pdata.GetRealData(DiagIdx::y).data(), np, ofs);
+    writeData(pdata.GetRealData(DiagIdx::y).data(), np, ofs);
     ofs.close();
 
     field_name = name + Concatenate("z_", i_lab, 5) + "_" + std::to_string(MyProc);
     ofs.open(field_name.c_str(), std::ios::out|std::ios::binary);
-    writeRealData(pdata.GetRealData(DiagIdx::z).data(), np, ofs);
+    writeData(pdata.GetRealData(DiagIdx::z).data(), np, ofs);
     ofs.close();
 
     field_name = name + Concatenate("ux_", i_lab, 5) + "_" + std::to_string(MyProc);
     ofs.open(field_name.c_str(), std::ios::out|std::ios::binary);
-    writeRealData(pdata.GetRealData(DiagIdx::ux).data(), np, ofs);
+    writeData(pdata.GetRealData(DiagIdx::ux).data(), np, ofs);
     ofs.close();
 
     field_name = name + Concatenate("uy_", i_lab, 5) + "_" + std::to_string(MyProc);
     ofs.open(field_name.c_str(), std::ios::out|std::ios::binary);
-    writeRealData(pdata.GetRealData(DiagIdx::uy).data(), np, ofs);
+    writeData(pdata.GetRealData(DiagIdx::uy).data(), np, ofs);
     ofs.close();
 
     field_name = name + Concatenate("uz_", i_lab, 5) + "_" + std::to_string(MyProc);
     ofs.open(field_name.c_str(), std::ios::out|std::ios::binary);
-    writeRealData(pdata.GetRealData(DiagIdx::uz).data(), np, ofs);
+    writeData(pdata.GetRealData(DiagIdx::uz).data(), np, ofs);
     ofs.close();
 }
 
diff --git a/Source/Diagnostics/ParticleIO.cpp b/Source/Diagnostics/ParticleIO.cpp
index 5cf3f3047..2a9c16aa8 100644
--- a/Source/Diagnostics/ParticleIO.cpp
+++ b/Source/Diagnostics/ParticleIO.cpp
@@ -174,9 +174,9 @@ PhysicalParticleContainer::ConvertUnits(ConvertDirection convert_direction)
         {
             // - momenta are stored as a struct of array, in `attribs`
             auto& attribs = pti.GetAttribs();
-            Real* AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
-            Real* AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
-            Real* AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
+            ParticleReal* AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
+            ParticleReal* AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
+            ParticleReal* AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
             // Loop over the particles and convert momentum
             const long np = pti.numParticles();
             ParallelFor( np,
diff --git a/Source/FieldSolver/WarpX_FDTD.H b/Source/FieldSolver/WarpX_FDTD.H
index 8ead945e6..b93c0f40a 100644
--- a/Source/FieldSolver/WarpX_FDTD.H
+++ b/Source/FieldSolver/WarpX_FDTD.H
@@ -16,7 +16,7 @@ void warpx_push_bx_yee(int i, int j, int k,
                  + dtsdz * (Ey(i,j  ,k+1) - Ey(i,j,k));
 #elif (defined WARPX_DIM_XZ)
     Bx(i,j,0) += + dtsdz * (Ey(i,j+1,0) - Ey(i,j,0));
-#if (defined WARPX_DIM_RZ)
+#elif (defined WARPX_DIM_RZ)
     if (i != 0 || rmin != 0.) {
         Bx(i,j,0,0) += + dtsdz * (Ey(i,j+1,0,0) - Ey(i,j,0,0));
     } else {
@@ -29,9 +29,9 @@ void warpx_push_bx_yee(int i, int j, int k,
                 // (due to the 1/r terms). The following expressions regularize
                 // these divergences by assuming, on axis :
                 // Ez/r = 0/r + dEz/dr
-                Bx(i,j,0,2*imode-1) = Bx(i,j,0,2*imode-1) - imode*dtsdx*Ez(i+1,j,0,2*imode) &
+                Bx(i,j,0,2*imode-1) = Bx(i,j,0,2*imode-1) - imode*dtsdx*Ez(i+1,j,0,2*imode)
                            + dtsdz*(Ey(i,j+1,0,2*imode-1) - Ey(i,j,0,2*imode-1));
-                Bx(i,j,0,2*imode) = Bx(i,j,0,2*imode) + imode*dtsdx*Ez(i+1,j,0,2*imode-1) &
+                Bx(i,j,0,2*imode) = Bx(i,j,0,2*imode) + imode*dtsdx*Ez(i+1,j,0,2*imode-1)
                            + dtsdz*(Ey(i,j+1,0,2*imode) - Ey(i,j,0,2*imode));
             } else {
                 Bx(i,j,0,2*imode-1) = 0.;
@@ -40,14 +40,13 @@ void warpx_push_bx_yee(int i, int j, int k,
         } else {
             // Br(i,j,m) = Br(i,j,m) + I*m*dt*Ez(i,j,m)/r + dtsdz*(Et(i,j+1,m) - Et(i,j,m))
             const amrex::Real r = rmin*dxinv + i;
-            Bx(i,j,0,2*imode-1) = Bx(i,j,0,2*imode-1) - imode*dtsdx*Ez(i,j,0,2*imode)/r &
+            Bx(i,j,0,2*imode-1) = Bx(i,j,0,2*imode-1) - imode*dtsdx*Ez(i,j,0,2*imode)/r
                        + dtsdz*(Ey(i,j+1,0,2*imode-1) - Ey(i,j,0,2*imode-1));
-            Bx(i,j,0,2*imode) = Bx(i,j,0,2*imode) + imode*dtsdx*Ez(i,j,0,2*imode-1)/r &
+            Bx(i,j,0,2*imode) = Bx(i,j,0,2*imode) + imode*dtsdx*Ez(i,j,0,2*imode-1)/r
                        + dtsdz*(Ey(i,j+1,0,2*imode) - Ey(i,j,0,2*imode));
         }
     }
 #endif
-#endif
 }
 
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H
index 9a0aeb819..2d5d346e2 100644
--- a/Source/FortranInterface/WarpX_f.H
+++ b/Source/FortranInterface/WarpX_f.H
@@ -84,14 +84,14 @@ extern "C"
 #endif
                           const amrex::Real* dx);
 
-    void WRPX_DEPOSIT_CIC(const amrex::Real* particles, int ns, int np,
-                          const amrex::Real* weights,
+    void WRPX_DEPOSIT_CIC(const amrex::ParticleReal* particles, int ns, int np,
+                          const amrex::ParticleReal* weights,
                           const amrex::Real* charge,
                           amrex::Real* rho, const int* lo, const int* hi,
                           const amrex::Real* plo, const amrex::Real* dx,
                           const int* ng);
 
-    void WRPX_INTERPOLATE_CIC_TWO_LEVELS(const amrex::Real* particles, int ns, int np,
+    void WRPX_INTERPOLATE_CIC_TWO_LEVELS(const amrex::ParticleReal* particles, int ns, int np,
                                          amrex::Real* Ex_p, amrex::Real* Ey_p,
 #if (AMREX_SPACEDIM == 3)
                                          amrex::Real* Ez_p,
@@ -109,7 +109,7 @@ extern "C"
                                          const int* clo, const int* chi, const amrex::Real* cdx,
                                          const amrex::Real* plo, const int* ng, const int* lev);
 
-    void WRPX_INTERPOLATE_CIC(const amrex::Real* particles, int ns, int np,
+    void WRPX_INTERPOLATE_CIC(const amrex::ParticleReal* particles, int ns, int np,
                               amrex::Real* Ex_p, amrex::Real* Ey_p,
 #if (AMREX_SPACEDIM == 3)
                               amrex::Real* Ez_p,
@@ -122,10 +122,10 @@ extern "C"
                               const amrex::Real* plo, const amrex::Real* dx,
                               const int* ng);
 
-    void WRPX_PUSH_LEAPFROG(amrex::Real* particles, int ns, int np,
-                            amrex::Real* vx_p, amrex::Real* vy_p,
+    void WRPX_PUSH_LEAPFROG(amrex::ParticleReal* particles, int ns, int np,
+                            amrex::ParticleReal* vx_p, amrex::ParticleReal* vy_p,
 #if (AMREX_SPACEDIM == 3)
-                            amrex::Real* vz_p,
+                            amrex::ParticleReal* vz_p,
 #endif
                             const amrex::Real* Ex_p, const amrex::Real* Ey_p,
 #if (AMREX_SPACEDIM == 3)
@@ -134,10 +134,10 @@ extern "C"
                             const amrex::Real* charge, const amrex::Real* mass, const amrex::Real* dt,
                             const amrex::Real* prob_lo, const amrex::Real* prob_hi);
 
-    void WRPX_PUSH_LEAPFROG_POSITIONS(amrex::Real* particles, int ns, int np,
-                                      amrex::Real* vx_p, amrex::Real* vy_p,
+    void WRPX_PUSH_LEAPFROG_POSITIONS(amrex::ParticleReal* particles, int ns, int np,
+                                      amrex::ParticleReal* vx_p, amrex::ParticleReal* vy_p,
 #if (AMREX_SPACEDIM == 3)
-                                      amrex::Real* vz_p,
+                                      amrex::ParticleReal* vz_p,
 #endif
                                       const amrex::Real* dt, const amrex::Real* prob_lo,
                                       const amrex::Real* prob_hi);
diff --git a/Source/Initialization/InjectorPosition.H b/Source/Initialization/InjectorPosition.H
index f8f16746c..6ecae93e0 100644
--- a/Source/Initialization/InjectorPosition.H
+++ b/Source/Initialization/InjectorPosition.H
@@ -41,7 +41,9 @@ struct InjectorPositionRegular
         int ix_part = i_part/(ny*nz);  // written this way backward compatibility
         int iz_part = (i_part-ix_part*(ny*nz)) / ny;
         int iy_part = (i_part-ix_part*(ny*nz)) - ny*iz_part;
-        return amrex::XDim3{(0.5+ix_part)/nx, (0.5+iy_part)/ny, (0.5+iz_part) / nz};
+        return amrex::XDim3{(amrex::Real(0.5)+ix_part)/nx,
+                            (amrex::Real(0.5)+iy_part)/ny,
+                            (amrex::Real(0.5)+iz_part) / nz};
     }
 private:
     amrex::Dim3 ppc;
diff --git a/Source/Initialization/PlasmaInjector.H b/Source/Initialization/PlasmaInjector.H
index a944165d6..56b32c827 100644
--- a/Source/Initialization/PlasmaInjector.H
+++ b/Source/Initialization/PlasmaInjector.H
@@ -43,9 +43,9 @@ public:
     bool doInjection () const noexcept { return inj_pos != NULL;}
 
     bool add_single_particle = false;
-    amrex::Vector<amrex::Real> single_particle_pos;
-    amrex::Vector<amrex::Real> single_particle_vel;
-    amrex::Real single_particle_weight;
+    amrex::Vector<amrex::ParticleReal> single_particle_pos;
+    amrex::Vector<amrex::ParticleReal> single_particle_vel;
+    amrex::ParticleReal single_particle_weight;
 
     bool gaussian_beam = false;
     amrex::Real x_m;
diff --git a/Source/Laser/LaserParticleContainer.H b/Source/Laser/LaserParticleContainer.H
index 31fe7cee4..e2a0743bc 100644
--- a/Source/Laser/LaserParticleContainer.H
+++ b/Source/Laser/LaserParticleContainer.H
@@ -56,10 +56,10 @@ public:
                                             amrex::Real * AMREX_RESTRICT const pplane_Xp,
                                             amrex::Real * AMREX_RESTRICT const pplane_Yp);
 
-    void update_laser_particle (const int np, amrex::Real * AMREX_RESTRICT const puxp,
-                                amrex::Real * AMREX_RESTRICT const puyp,
-                                amrex::Real * AMREX_RESTRICT const puzp,
-                                amrex::Real const * AMREX_RESTRICT const pwp,
+    void update_laser_particle (const int np, amrex::ParticleReal * AMREX_RESTRICT const puxp,
+                                amrex::ParticleReal * AMREX_RESTRICT const puyp,
+                                amrex::ParticleReal * AMREX_RESTRICT const puzp,
+                                amrex::ParticleReal const * AMREX_RESTRICT const pwp,
                                 amrex::Real const * AMREX_RESTRICT const amplitude,
                                 const amrex::Real dt, const int thread_num);
 
@@ -81,6 +81,8 @@ private:
     amrex::Real               wavelength  = std::numeric_limits<amrex::Real>::quiet_NaN();
     amrex::Real               Z0_lab = 0; // Position of the antenna in the lab frame
 
+    long min_particles_per_mode = 4;
+
     // computed using runtime parameters
     amrex::Vector<amrex::Real> p_Y;
     amrex::Vector<amrex::Real> u_X;
diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp
index dca73de50..8571c74ad 100644
--- a/Source/Laser/LaserParticleContainer.cpp
+++ b/Source/Laser/LaserParticleContainer.cpp
@@ -40,115 +40,116 @@ LaserParticleContainer::LaserParticleContainer (AmrCore* amr_core, int ispecies,
         profile = laser_t::Harris;
     } else if(laser_type_s == "parse_field_function") {
         profile = laser_t::parse_field_function;
-        } else {
-            amrex::Abort("Unknown laser type");
-        }
+    } else {
+        amrex::Abort("Unknown laser type");
+    }
 
-        // Parse the properties of the antenna
-        pp.getarr("position", position);
-        pp.getarr("direction", nvec);
-        pp.getarr("polarization", p_X);
-        pp.query("pusher_algo", pusher_algo);
-        pp.get("wavelength", wavelength);
-        pp.get("e_max", e_max);
-        pp.query("do_continuous_injection", do_continuous_injection);
-
-        if ( profile == laser_t::Gaussian ) {
-            // Parse the properties of the Gaussian profile
-            pp.get("profile_waist", profile_waist);
-            pp.get("profile_duration", profile_duration);
-            pp.get("profile_t_peak", profile_t_peak);
-            pp.get("profile_focal_distance", profile_focal_distance);
-            stc_direction = p_X;
-            pp.queryarr("stc_direction", stc_direction);
-            pp.query("zeta", zeta);
-            pp.query("beta", beta);
-            pp.query("phi2", phi2);
-        }
+    // Parse the properties of the antenna
+    pp.getarr("position", position);
+    pp.getarr("direction", nvec);
+    pp.getarr("polarization", p_X);
+    pp.query("pusher_algo", pusher_algo);
+    pp.get("wavelength", wavelength);
+    pp.get("e_max", e_max);
+    pp.query("do_continuous_injection", do_continuous_injection);
+    pp.query("min_particles_per_mode", min_particles_per_mode);
+
+    if ( profile == laser_t::Gaussian ) {
+        // Parse the properties of the Gaussian profile
+        pp.get("profile_waist", profile_waist);
+        pp.get("profile_duration", profile_duration);
+        pp.get("profile_t_peak", profile_t_peak);
+        pp.get("profile_focal_distance", profile_focal_distance);
+        stc_direction = p_X;
+        pp.queryarr("stc_direction", stc_direction);
+        pp.query("zeta", zeta);
+        pp.query("beta", beta);
+        pp.query("phi2", phi2);
+    }
 
-        if ( profile == laser_t::Harris ) {
-            // Parse the properties of the Harris profile
-            pp.get("profile_waist", profile_waist);
-            pp.get("profile_duration", profile_duration);
-            pp.get("profile_focal_distance", profile_focal_distance);
-        }
+    if ( profile == laser_t::Harris ) {
+        // Parse the properties of the Harris profile
+        pp.get("profile_waist", profile_waist);
+        pp.get("profile_duration", profile_duration);
+        pp.get("profile_focal_distance", profile_focal_distance);
+    }
 
-        if ( profile == laser_t::parse_field_function ) {
-            // Parse the properties of the parse_field_function profile
-            pp.get("field_function(X,Y,t)", field_function);
-            parser.define(field_function);
-            parser.registerVariables({"X","Y","t"});
-
-            ParmParse ppc("my_constants");
-            std::set<std::string> symbols = parser.symbols();
-            symbols.erase("X");
-            symbols.erase("Y");
-            symbols.erase("t"); // after removing variables, we are left with constants
-            for (auto it = symbols.begin(); it != symbols.end(); ) {
-                Real v;
-                if (ppc.query(it->c_str(), v)) {
-                    parser.setConstant(*it, v);
-                    it = symbols.erase(it);
-                } else {
-                    ++it;
-                }
-            }
-            for (auto const& s : symbols) { // make sure there no unknown symbols
-                amrex::Abort("Laser Profile: Unknown symbol "+s);
+    if ( profile == laser_t::parse_field_function ) {
+        // Parse the properties of the parse_field_function profile
+        pp.get("field_function(X,Y,t)", field_function);
+        parser.define(field_function);
+        parser.registerVariables({"X","Y","t"});
+
+        ParmParse ppc("my_constants");
+        std::set<std::string> symbols = parser.symbols();
+        symbols.erase("X");
+        symbols.erase("Y");
+        symbols.erase("t"); // after removing variables, we are left with constants
+        for (auto it = symbols.begin(); it != symbols.end(); ) {
+            Real v;
+            if (ppc.query(it->c_str(), v)) {
+                parser.setConstant(*it, v);
+                it = symbols.erase(it);
+            } else {
+                ++it;
             }
         }
-
-        // Plane normal
-        Real s = 1.0/std::sqrt(nvec[0]*nvec[0] + nvec[1]*nvec[1] + nvec[2]*nvec[2]);
-        nvec = { nvec[0]*s, nvec[1]*s, nvec[2]*s };
-
-        if (WarpX::gamma_boost > 1.) {
-            // Check that the laser direction is equal to the boost direction
-            AMREX_ALWAYS_ASSERT_WITH_MESSAGE(  nvec[0]*WarpX::boost_direction[0]
-                                             + nvec[1]*WarpX::boost_direction[1]
-                                             + nvec[2]*WarpX::boost_direction[2] - 1. < 1.e-12,
-                                               "The Lorentz boost should be in the same direction as the laser propagation");
-            // Get the position of the plane, along the boost direction, in the lab frame
-            // and convert the position of the antenna to the boosted frame
-            Z0_lab = nvec[0]*position[0] + nvec[1]*position[1] + nvec[2]*position[2];
-            Real Z0_boost = Z0_lab/WarpX::gamma_boost;
-            position[0] += (Z0_boost-Z0_lab)*nvec[0];
-            position[1] += (Z0_boost-Z0_lab)*nvec[1];
-            position[2] += (Z0_boost-Z0_lab)*nvec[2];
+        for (auto const& s : symbols) { // make sure there no unknown symbols
+            amrex::Abort("Laser Profile: Unknown symbol "+s);
         }
+    }
 
-        // The first polarization vector
-        s = 1.0/std::sqrt(p_X[0]*p_X[0] + p_X[1]*p_X[1] + p_X[2]*p_X[2]);
-        p_X = { p_X[0]*s, p_X[1]*s, p_X[2]*s };
+    // Plane normal
+    Real s = 1.0/std::sqrt(nvec[0]*nvec[0] + nvec[1]*nvec[1] + nvec[2]*nvec[2]);
+    nvec = { nvec[0]*s, nvec[1]*s, nvec[2]*s };
+
+    if (WarpX::gamma_boost > 1.) {
+        // Check that the laser direction is equal to the boost direction
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(  nvec[0]*WarpX::boost_direction[0]
+                                         + nvec[1]*WarpX::boost_direction[1]
+                                         + nvec[2]*WarpX::boost_direction[2] - 1. < 1.e-12,
+                                           "The Lorentz boost should be in the same direction as the laser propagation");
+        // Get the position of the plane, along the boost direction, in the lab frame
+        // and convert the position of the antenna to the boosted frame
+        Z0_lab = nvec[0]*position[0] + nvec[1]*position[1] + nvec[2]*position[2];
+        Real Z0_boost = Z0_lab/WarpX::gamma_boost;
+        position[0] += (Z0_boost-Z0_lab)*nvec[0];
+        position[1] += (Z0_boost-Z0_lab)*nvec[1];
+        position[2] += (Z0_boost-Z0_lab)*nvec[2];
+    }
+
+    // The first polarization vector
+    s = 1.0/std::sqrt(p_X[0]*p_X[0] + p_X[1]*p_X[1] + p_X[2]*p_X[2]);
+    p_X = { p_X[0]*s, p_X[1]*s, p_X[2]*s };
 
-        Real dp = std::inner_product(nvec.begin(), nvec.end(), p_X.begin(), 0.0);
-        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(std::abs(dp) < 1.0e-14,
-            "Laser plane vector is not perpendicular to the main polarization vector");
+    Real dp = std::inner_product(nvec.begin(), nvec.end(), p_X.begin(), 0.0);
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(std::abs(dp) < 1.0e-14,
+        "Laser plane vector is not perpendicular to the main polarization vector");
 
-        p_Y = CrossProduct(nvec, p_X);   // The second polarization vector
+    p_Y = CrossProduct(nvec, p_X);   // The second polarization vector
 
-        s = 1.0/std::sqrt(stc_direction[0]*stc_direction[0] + stc_direction[1]*stc_direction[1] + stc_direction[2]*stc_direction[2]);
-        stc_direction = { stc_direction[0]*s, stc_direction[1]*s, stc_direction[2]*s };
-        dp = std::inner_product(nvec.begin(), nvec.end(), stc_direction.begin(), 0.0);
-        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(std::abs(dp) < 1.0e-14,
-            "stc_direction is not perpendicular to the laser plane vector");
+    s = 1.0/std::sqrt(stc_direction[0]*stc_direction[0] + stc_direction[1]*stc_direction[1] + stc_direction[2]*stc_direction[2]);
+    stc_direction = { stc_direction[0]*s, stc_direction[1]*s, stc_direction[2]*s };
+    dp = std::inner_product(nvec.begin(), nvec.end(), stc_direction.begin(), 0.0);
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(std::abs(dp) < 1.0e-14,
+        "stc_direction is not perpendicular to the laser plane vector");
 
-        // Get angle between p_X and stc_direction
-        // in 2d, stcs are in the simulation plane
+    // Get angle between p_X and stc_direction
+    // in 2d, stcs are in the simulation plane
 #if AMREX_SPACEDIM == 3
-        theta_stc = acos(stc_direction[0]*p_X[0] +
+    theta_stc = acos(stc_direction[0]*p_X[0] +
                      stc_direction[1]*p_X[1] +
                      stc_direction[2]*p_X[2]);
 #else
-        theta_stc = 0.;
+    theta_stc = 0.;
 #endif
 
-#if AMREX_SPACEDIM == 3
-        u_X = p_X;
-        u_Y = p_Y;
+#if (defined WARPX_DIM_3D) || (defined WARPX_DIM_RZ)
+    u_X = p_X;
+    u_Y = p_Y;
 #else
-        u_X = CrossProduct({0., 1., 0.}, nvec);
-        u_Y = {0., 1., 0.};
+    u_X = CrossProduct({0., 1., 0.}, nvec);
+    u_Y = {0., 1., 0.};
 #endif
 
     laser_injection_box= Geom(0).ProbDomain();
@@ -271,9 +272,15 @@ LaserParticleContainer::InitData (int lev)
                  position[1] + (S_X*(i+0.5))*u_X[1] + (S_Y*(j+0.5))*u_Y[1],
                  position[2] + (S_X*(i+0.5))*u_X[2] + (S_Y*(j+0.5))*u_Y[2] };
 #else
+#   if (defined WARPX_DIM_RZ)
+        return { position[0] + (S_X*(i+0.5)),
+                 0.0,
+                 position[2]};
+#   else
         return { position[0] + (S_X*(i+0.5))*u_X[0],
-                0.0,
-                position[2] + (S_X*(i+0.5))*u_X[2] };
+                 0.0,
+                 position[2] + (S_X*(i+0.5))*u_X[2] };
+#   endif
 #endif
     };
 
@@ -283,7 +290,11 @@ LaserParticleContainer::InitData (int lev)
         return {u_X[0]*(pos[0]-position[0])+u_X[1]*(pos[1]-position[1])+u_X[2]*(pos[2]-position[2]),
                 u_Y[0]*(pos[0]-position[0])+u_Y[1]*(pos[1]-position[1])+u_Y[2]*(pos[2]-position[2])};
 #else
+#   if (defined WARPX_DIM_RZ)
+        return {pos[0]-position[0], 0.0};
+#   else
         return {u_X[0]*(pos[0]-position[0])+u_X[2]*(pos[2]-position[2]), 0.0};
+#   endif
 #endif
     };
 
@@ -364,6 +375,7 @@ LaserParticleContainer::InitData (int lev)
 #endif
                 if (laser_injection_box.contains(x))
                 {
+#ifndef WARPX_DIM_RZ
                     for (int k = 0; k<2; ++k) {
                         particle_x.push_back(pos[0]);
                         particle_y.push_back(pos[1]);
@@ -371,6 +383,21 @@ LaserParticleContainer::InitData (int lev)
                     }
                     particle_w.push_back( weight);
                     particle_w.push_back(-weight);
+#else
+                    // Particles are laid out in radial spokes
+                    const int n_spokes = (WarpX::n_rz_azimuthal_modes - 1)*min_particles_per_mode;
+                    for (int spoke = 0 ; spoke < n_spokes ; spoke++) {
+                        const Real phase = 2.*MathConst::pi*spoke/n_spokes;
+                        for (int k = 0; k<2; ++k) {
+                            particle_x.push_back(pos[0]*std::cos(phase));
+                            particle_y.push_back(pos[0]*std::sin(phase));
+                            particle_z.push_back(pos[2]);
+                        }
+                        const Real r_weight = weight*2.*MathConst::pi*pos[0]/n_spokes;
+                        particle_w.push_back( r_weight);
+                        particle_w.push_back(-r_weight);
+                    }
+#endif
                 }
             }
         }
@@ -564,13 +591,17 @@ LaserParticleContainer::ComputeSpacing (int lev, Real& Sx, Real& Sy) const
 #if (AMREX_SPACEDIM == 3)
     Sx = std::min(std::min(dx[0]/(std::abs(u_X[0])+eps),
                            dx[1]/(std::abs(u_X[1])+eps)),
-                  dx[2]/(std::abs(u_X[2])+eps));
+                           dx[2]/(std::abs(u_X[2])+eps));
     Sy = std::min(std::min(dx[0]/(std::abs(u_Y[0])+eps),
                            dx[1]/(std::abs(u_Y[1])+eps)),
-                  dx[2]/(std::abs(u_Y[2])+eps));
+                           dx[2]/(std::abs(u_Y[2])+eps));
 #else
+#   if (defined WARPX_DIM_RZ)
+    Sx = dx[0];
+#   else
     Sx = std::min(dx[0]/(std::abs(u_X[0])+eps),
                   dx[2]/(std::abs(u_X[2])+eps));
+#   endif
     Sy = 1.0;
 #endif
 }
@@ -579,7 +610,7 @@ void
 LaserParticleContainer::ComputeWeightMobility (Real Sx, Real Sy)
 {
     constexpr Real eps = 0.01;
-    constexpr Real fac = 1.0/(2.0*3.1415926535897932*PhysConst::mu0*PhysConst::c*PhysConst::c*eps);
+    constexpr Real fac = 1.0/(2.0*MathConst::pi*PhysConst::mu0*PhysConst::c*PhysConst::c*eps);
     weight = fac * wavelength * Sx * Sy / std::min(Sx,Sy) * e_max;
 
     // The mobility is the constant of proportionality between the field to
@@ -612,14 +643,14 @@ LaserParticleContainer::calculate_laser_plane_coordinates (
     Real * AMREX_RESTRICT const pplane_Xp,
     Real * AMREX_RESTRICT const pplane_Yp)
 {
-    Real const * const AMREX_RESTRICT xp = m_xp[thread_num].dataPtr();
-    Real const * const AMREX_RESTRICT yp = m_yp[thread_num].dataPtr();
-    Real const * const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr();
+    ParticleReal const * const AMREX_RESTRICT xp = m_xp[thread_num].dataPtr();
+    ParticleReal const * const AMREX_RESTRICT yp = m_yp[thread_num].dataPtr();
+    ParticleReal const * const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr();
     Real tmp_u_X_0 = u_X[0];
     Real tmp_u_X_2 = u_X[2];
     Real tmp_position_0 = position[0];
     Real tmp_position_2 = position[2];
-#if (AMREX_SPACEDIM == 3)
+#if (defined WARPX_DIM_3D) || (defined WARPX_DIM_RZ)
     Real tmp_u_X_1 = u_X[1];
     Real tmp_u_Y_0 = u_Y[0];
     Real tmp_u_Y_1 = u_Y[1];
@@ -630,7 +661,7 @@ LaserParticleContainer::calculate_laser_plane_coordinates (
     amrex::ParallelFor(
         np,
         [=] AMREX_GPU_DEVICE (int i) {
-#if (AMREX_SPACEDIM == 3)
+#if (defined WARPX_DIM_3D) || (defined WARPX_DIM_RZ)
             pplane_Xp[i] =
                 tmp_u_X_0 * (xp[i] - tmp_position_0) +
                 tmp_u_X_1 * (yp[i] - tmp_position_1) +
@@ -660,14 +691,14 @@ LaserParticleContainer::calculate_laser_plane_coordinates (
  */
 void
 LaserParticleContainer::update_laser_particle(
-    const int np, Real * AMREX_RESTRICT const puxp, Real * AMREX_RESTRICT const puyp,
-    Real * AMREX_RESTRICT const puzp, Real const * AMREX_RESTRICT const pwp,
+    const int np, ParticleReal * AMREX_RESTRICT const puxp, ParticleReal * AMREX_RESTRICT const puyp,
+    ParticleReal * AMREX_RESTRICT const puzp, ParticleReal const * AMREX_RESTRICT const pwp,
     Real const * AMREX_RESTRICT const amplitude, const Real dt,
     const int thread_num)
 {
-    Real * const AMREX_RESTRICT xp = m_xp[thread_num].dataPtr();
-    Real * const AMREX_RESTRICT yp = m_yp[thread_num].dataPtr();
-    Real * const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr();
+    ParticleReal * const AMREX_RESTRICT xp = m_xp[thread_num].dataPtr();
+    ParticleReal * const AMREX_RESTRICT yp = m_yp[thread_num].dataPtr();
+    ParticleReal * const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr();
     Real tmp_p_X_0 = p_X[0];
     Real tmp_p_X_1 = p_X[1];
     Real tmp_p_X_2 = p_X[2];
@@ -702,7 +733,7 @@ LaserParticleContainer::update_laser_particle(
             puzp[i] = gamma * vz;
             // Push the the particle positions
             xp[i] += vx * dt;
-#if (AMREX_SPACEDIM == 3)
+#if (defined WARPX_DIM_3D) || (defined WARPX_DIM_RZ)
             yp[i] += vy * dt;
 #endif
             zp[i] += vz * dt;
diff --git a/Source/Laser/LaserProfiles.cpp b/Source/Laser/LaserProfiles.cpp
index 69804b17c..281ab2101 100644
--- a/Source/Laser/LaserProfiles.cpp
+++ b/Source/Laser/LaserProfiles.cpp
@@ -28,16 +28,16 @@ LaserParticleContainer::gaussian_laser_profile (
     const Real oscillation_phase = k0 * PhysConst::c * ( t - profile_t_peak );
     // The coefficients below contain info about Gouy phase,
     // laser diffraction, and phase front curvature
-    const Complex diffract_factor = 1. + I * profile_focal_distance
-        * 2./( k0 * profile_waist * profile_waist );
-    const Complex inv_complex_waist_2 = 1./( profile_waist*profile_waist * diffract_factor );
+    const Complex diffract_factor = Real(1.) + I * profile_focal_distance
+        * Real(2.)/( k0 * profile_waist * profile_waist );
+    const Complex inv_complex_waist_2 = Real(1.)/( profile_waist*profile_waist * diffract_factor );
 
     // Time stretching due to STCs and phi2 complex envelope
     // (1 if zeta=0, beta=0, phi2=0)
-    const Complex stretch_factor = 1. + 4. *
+    const Complex stretch_factor = Real(1.) + Real(4.) *
         (zeta+beta*profile_focal_distance) * (zeta+beta*profile_focal_distance)
         * (inv_tau2*inv_complex_waist_2) +
-        2.*I*(phi2 - beta*beta*k0*profile_focal_distance) * inv_tau2;
+        Real(2.)*I*(phi2 - beta*beta*k0*profile_focal_distance) * inv_tau2;
 
     // Amplitude and monochromatic oscillations
     Complex prefactor = e_max * MathFunc::exp( I * oscillation_phase );
@@ -61,10 +61,10 @@ LaserParticleContainer::gaussian_laser_profile (
     amrex::ParallelFor(
         np,
         [=] AMREX_GPU_DEVICE (int i) {
-            const Complex stc_exponent = 1./stretch_factor * inv_tau2 *
+            const Complex stc_exponent = Real(1.)/stretch_factor * inv_tau2 *
                 MathFunc::pow((t - tmp_profile_t_peak -
                                tmp_beta*k0*(Xp[i]*std::cos(tmp_theta_stc) + Yp[i]*std::sin(tmp_theta_stc)) -
-                               2.*I*(Xp[i]*std::cos(tmp_theta_stc) + Yp[i]*std::sin(tmp_theta_stc))
+                               Real(2.)*I*(Xp[i]*std::cos(tmp_theta_stc) + Yp[i]*std::sin(tmp_theta_stc))
                                *( tmp_zeta - tmp_beta*tmp_profile_focal_distance ) * inv_complex_waist_2),2);
             // stcfactor = everything but complex transverse envelope
             const Complex stcfactor = prefactor * MathFunc::exp( - stc_exponent );
diff --git a/Source/Make.WarpX b/Source/Make.WarpX
index 1dfffbbb6..13d4f4554 100644
--- a/Source/Make.WarpX
+++ b/Source/Make.WarpX
@@ -175,11 +175,7 @@ ifeq ($(USE_HDF5),TRUE)
         LIBRARY_LOCATIONS += $(HDF5_HOME)/lib
     endif
     DEFINES += -DWARPX_USE_HDF5
-<<<<<<< HEAD
-    LIBRARIES += -lhdf5 -lz
-=======
     libraries += -lhdf5 -lz
->>>>>>> dev
 endif
 
 # job_info support
diff --git a/Source/Parser/GpuParser.H b/Source/Parser/GpuParser.H
index e49671e06..c158ee314 100644
--- a/Source/Parser/GpuParser.H
+++ b/Source/Parser/GpuParser.H
@@ -16,16 +16,16 @@ public:
     void clear ();
 
     AMREX_GPU_HOST_DEVICE
-    double
-    operator() (double x, double y, double z) const noexcept
+    amrex::Real
+    operator() (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept
     {
 #ifdef AMREX_USE_GPU
 
 #ifdef AMREX_DEVICE_COMPILE
 // WarpX compiled for GPU, function compiled for __device__
         // the 3D position of each particle is stored in shared memory.
-        amrex::Gpu::SharedMemory<double> gsm;
-        double* p = gsm.dataPtr();
+        amrex::Gpu::SharedMemory<amrex::Real> gsm;
+        amrex::Real* p = gsm.dataPtr();
         int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y);
         p[tid*3] = x;
         p[tid*3+1] = y;
diff --git a/Source/Parser/WarpXParser.H b/Source/Parser/WarpXParser.H
index ffa61e457..8c1d854d8 100644
--- a/Source/Parser/WarpXParser.H
+++ b/Source/Parser/WarpXParser.H
@@ -6,6 +6,8 @@
 #include <string>
 #include <set>
 
+#include <AMReX_REAL.H>
+
 #include "wp_parser_c.h"
 #include "wp_parser_y.h"
 
@@ -23,15 +25,15 @@ public:
     ~WarpXParser ();
     void define (std::string const& func_body);
 
-    void setConstant (std::string const& name, double c);
+    void setConstant (std::string const& name, amrex::Real c);
 
     //
     // Option 1: Register every variable to an address provided.
     //           Assign values to external variables.
     //           Call eval().
-    void registerVariable (std::string const& name, double& var);
+    void registerVariable (std::string const& name, amrex::Real& var);
     //
-    inline double eval () const noexcept;
+    inline amrex::Real eval () const noexcept;
 
     //
     // Option 2: Register all variables at once. Parser will create
@@ -40,7 +42,7 @@ public:
     void registerVariables (std::vector<std::string> const& names);
     //
     template <typename T, typename... Ts> inline
-    double eval (T x, Ts... yz) const noexcept;
+    amrex::Real eval (T x, Ts... yz) const noexcept;
 
     void print () const;
 
@@ -54,23 +56,23 @@ private:
     void clear ();
 
     template <typename T> inline
-    void unpack (double* p, T x) const noexcept;
+    void unpack (amrex::Real* p, T x) const noexcept;
 
     template <typename T, typename... Ts> inline
-    void unpack (double* p, T x, Ts... yz) const noexcept;
+    void unpack (amrex::Real* p, T x, Ts... yz) const noexcept;
 
     std::string m_expression;
 #ifdef _OPENMP
     std::vector<struct wp_parser*> m_parser;
-    mutable std::vector<std::array<double,16> > m_variables;
+    mutable std::vector<std::array<amrex::Real,16> > m_variables;
 #else
     struct wp_parser* m_parser = nullptr;
-    mutable std::array<double,16> m_variables;
+    mutable std::array<amrex::Real,16> m_variables;
 #endif
 };
 
 inline
-double
+amrex::Real
 WarpXParser::eval () const noexcept
 {
 #ifdef _OPENMP
@@ -82,7 +84,7 @@ WarpXParser::eval () const noexcept
 
 template <typename T, typename... Ts>
 inline
-double
+amrex::Real
 WarpXParser::eval (T x, Ts... yz) const noexcept
 {
 #ifdef _OPENMP
@@ -96,7 +98,7 @@ WarpXParser::eval (T x, Ts... yz) const noexcept
 template <typename T>
 inline
 void
-WarpXParser::unpack (double* p, T x) const noexcept
+WarpXParser::unpack (amrex::Real* p, T x) const noexcept
 {
     *p = x;
 }
@@ -104,7 +106,7 @@ WarpXParser::unpack (double* p, T x) const noexcept
 template <typename T, typename... Ts>
 inline
 void
-WarpXParser::unpack (double* p, T x, Ts... yz) const noexcept
+WarpXParser::unpack (amrex::Real* p, T x, Ts... yz) const noexcept
 {
     *p++ = x;
     unpack(p, yz...);
diff --git a/Source/Parser/WarpXParser.cpp b/Source/Parser/WarpXParser.cpp
index 3237086f2..ced536327 100644
--- a/Source/Parser/WarpXParser.cpp
+++ b/Source/Parser/WarpXParser.cpp
@@ -69,7 +69,7 @@ WarpXParser::clear ()
 }
 
 void
-WarpXParser::registerVariable (std::string const& name, double& var)
+WarpXParser::registerVariable (std::string const& name, amrex::Real& var)
 {
     // We assume this is called inside OMP parallel region
 #ifdef _OPENMP
@@ -105,7 +105,7 @@ WarpXParser::registerVariables (std::vector<std::string> const& names)
 }
 
 void
-WarpXParser::setConstant (std::string const& name, double c)
+WarpXParser::setConstant (std::string const& name, amrex::Real c)
 {
 #ifdef _OPENMP
 
diff --git a/Source/Parser/wp_parser.tab.c b/Source/Parser/wp_parser.tab.c
index 3981894a5..0f7c2403d 100644
--- a/Source/Parser/wp_parser.tab.c
+++ b/Source/Parser/wp_parser.tab.c
@@ -138,7 +138,7 @@ union YYSTYPE
 #line 19 "wp_parser.y" /* yacc.c:352  */
 
     struct wp_node* n;
-    double d;
+    amrex_real d;
     struct wp_symbol* s;
     enum wp_f1_t f1;
     enum wp_f2_t f2;
diff --git a/Source/Parser/wp_parser.tab.h b/Source/Parser/wp_parser.tab.h
index b50516808..0c859fc03 100644
--- a/Source/Parser/wp_parser.tab.h
+++ b/Source/Parser/wp_parser.tab.h
@@ -75,7 +75,7 @@ union YYSTYPE
 #line 19 "wp_parser.y" /* yacc.c:1921  */
 
     struct wp_node* n;
-    double d;
+    amrex_real d;
     struct wp_symbol* s;
     enum wp_f1_t f1;
     enum wp_f2_t f2;
diff --git a/Source/Parser/wp_parser.y b/Source/Parser/wp_parser.y
index 453eda1cd..809dbfa5e 100644
--- a/Source/Parser/wp_parser.y
+++ b/Source/Parser/wp_parser.y
@@ -18,7 +18,7 @@
 */
 %union {
     struct wp_node* n;
-    double d;
+    amrex_real d;
     struct wp_symbol* s;
     enum wp_f1_t f1;
     enum wp_f2_t f2;
diff --git a/Source/Parser/wp_parser_c.h b/Source/Parser/wp_parser_c.h
index 3aafdec65..970d6b355 100644
--- a/Source/Parser/wp_parser_c.h
+++ b/Source/Parser/wp_parser_c.h
@@ -4,6 +4,7 @@
 #include "wp_parser_y.h"
 #include <AMReX_GpuQualifiers.H>
 #include <AMReX_Extension.H>
+#include <AMReX_REAL.H>
 
 #ifdef __cplusplus
 extern "C" {
@@ -21,15 +22,15 @@ extern "C" {
 #include <string>
 
 AMREX_GPU_HOST_DEVICE
-inline double
+inline amrex_real
 wp_ast_eval (struct wp_node* node)
 {
-    double result;
+    amrex_real result;
 
 #ifdef AMREX_DEVICE_COMPILE
-    extern __shared__ double extern_xyz[];
+    extern __shared__ amrex_real extern_xyz[];
     int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y);
-    double* x = extern_xyz + tid*3;
+    amrex_real* x = extern_xyz + tid*3;
 #endif
 
     switch (node->type)
diff --git a/Source/Parser/wp_parser_y.c b/Source/Parser/wp_parser_y.c
index 259f9368b..b71b42638 100644
--- a/Source/Parser/wp_parser_y.c
+++ b/Source/Parser/wp_parser_y.c
@@ -6,8 +6,6 @@
 #include "wp_parser_y.h"
 #include "wp_parser.tab.h"
 
-#include <AMReX_GpuQualifiers.H>
-
 static struct wp_node* wp_root = NULL;
 
 /* This is called by a bison rule to store the original AST in a
@@ -21,7 +19,7 @@ wp_defexpr (struct wp_node* body)
 }
 
 struct wp_node*
-wp_newnumber (double d)
+wp_newnumber (amrex_real d)
 {
     struct wp_number* r = (struct wp_number*) malloc(sizeof(struct wp_number));
     r->type = WP_NUMBER;
@@ -154,8 +152,8 @@ wp_parser_dup (struct wp_parser* source)
 }
 
 AMREX_GPU_HOST_DEVICE
-double
-wp_call_f1 (enum wp_f1_t type, double a)
+amrex_real
+wp_call_f1 (enum wp_f1_t type, amrex_real a)
 {
     switch (type) {
     case WP_SQRT:        return sqrt(a);
@@ -185,8 +183,8 @@ wp_call_f1 (enum wp_f1_t type, double a)
 }
 
 AMREX_GPU_HOST_DEVICE
-double
-wp_call_f2 (enum wp_f2_t type, double a, double b)
+amrex_real
+wp_call_f2 (enum wp_f2_t type, amrex_real a, amrex_real b)
 {
     switch (type) {
     case WP_POW:
@@ -356,13 +354,13 @@ wp_parser_ast_dup (struct wp_parser* my_parser, struct wp_node* node, int move)
 
 #define WP_MOVEUP_R(node, v) \
     struct wp_node* n = node->r->r; \
-    double* p = node->r->rip.p; \
+    amrex_real* p = node->r->rip.p; \
     node->r = n; \
     node->lvp.v = v; \
     node->rip.p = p;
 #define WP_MOVEUP_L(node, v) \
     struct wp_node* n = node->l->r; \
-    double* p = node->l->rip.p; \
+    amrex_real* p = node->l->rip.p; \
     node->r = n; \
     node->lvp.v = v; \
     node->rip.p = p;
@@ -392,7 +390,7 @@ wp_ast_optimize (struct wp_node* node)
         if (node->l->type == WP_NUMBER &&
             node->r->type == WP_NUMBER)
         {
-            double v = ((struct wp_number*)(node->l))->value
+            amrex_real v = ((struct wp_number*)(node->l))->value
                 +      ((struct wp_number*)(node->r))->value;
             ((struct wp_number*)node)->type = WP_NUMBER;
             ((struct wp_number*)node)->value = v;
@@ -422,28 +420,28 @@ wp_ast_optimize (struct wp_node* node)
         else if (node->l->type == WP_NUMBER &&
                  node->r->type == WP_ADD_VP)
         {
-            double v = ((struct wp_number*)(node->l))->value + WP_EVAL_R(node);
+            amrex_real v = ((struct wp_number*)(node->l))->value + WP_EVAL_R(node);
             WP_MOVEUP_R(node, v);
             node->type = WP_ADD_VP;
         }
         else if (node->l->type == WP_NUMBER &&
                  node->r->type == WP_SUB_VP)
         {
-            double v = ((struct wp_number*)(node->l))->value + WP_EVAL_R(node);
+            amrex_real v = ((struct wp_number*)(node->l))->value + WP_EVAL_R(node);
             WP_MOVEUP_R(node, v);
             node->type = WP_SUB_VP;
         }
         else if (node->l->type == WP_ADD_VP &&
                  node->r->type == WP_NUMBER)
         {
-            double v = WP_EVAL_L(node) + ((struct wp_number*)(node->r))->value;
+            amrex_real v = WP_EVAL_L(node) + ((struct wp_number*)(node->r))->value;
             WP_MOVEUP_L(node, v);
             node->type = WP_ADD_VP;
         }
         else if (node->l->type == WP_SUB_VP &&
                  node->r->type == WP_NUMBER)
         {
-            double v = WP_EVAL_L(node) + ((struct wp_number*)(node->r))->value;
+            amrex_real v = WP_EVAL_L(node) + ((struct wp_number*)(node->r))->value;
             WP_MOVEUP_L(node, v);
             node->type = WP_SUB_VP;
         }
@@ -455,7 +453,7 @@ wp_ast_optimize (struct wp_node* node)
         if (node->l->type == WP_NUMBER &&
             node->r->type == WP_NUMBER)
         {
-            double v = ((struct wp_number*)(node->l))->value
+            amrex_real v = ((struct wp_number*)(node->l))->value
                 -      ((struct wp_number*)(node->r))->value;
             ((struct wp_number*)node)->type = WP_NUMBER;
             ((struct wp_number*)node)->value = v;
@@ -485,28 +483,28 @@ wp_ast_optimize (struct wp_node* node)
         else if (node->l->type == WP_NUMBER &&
                  node->r->type == WP_ADD_VP)
         {
-            double v = ((struct wp_number*)(node->l))->value - WP_EVAL_R(node);
+            amrex_real v = ((struct wp_number*)(node->l))->value - WP_EVAL_R(node);
             WP_MOVEUP_R(node, v);
             node->type = WP_SUB_VP;
         }
         else if (node->l->type == WP_NUMBER &&
                  node->r->type == WP_SUB_VP)
         {
-            double v = ((struct wp_number*)(node->l))->value - WP_EVAL_R(node);
+            amrex_real v = ((struct wp_number*)(node->l))->value - WP_EVAL_R(node);
             WP_MOVEUP_R(node, v);
             node->type = WP_ADD_VP;
         }
         else if (node->l->type == WP_ADD_VP &&
                  node->r->type == WP_NUMBER)
         {
-            double v = WP_EVAL_L(node) - ((struct wp_number*)(node->r))->value;
+            amrex_real v = WP_EVAL_L(node) - ((struct wp_number*)(node->r))->value;
             WP_MOVEUP_L(node, v);
             node->type = WP_ADD_VP;
         }
         else if (node->l->type == WP_SUB_VP &&
                  node->r->type == WP_NUMBER)
         {
-            double v = WP_EVAL_L(node) - ((struct wp_number*)(node->r))->value;
+            amrex_real v = WP_EVAL_L(node) - ((struct wp_number*)(node->r))->value;
             WP_MOVEUP_L(node, v);
             node->type = WP_SUB_VP;
         }
@@ -518,7 +516,7 @@ wp_ast_optimize (struct wp_node* node)
         if (node->l->type == WP_NUMBER &&
             node->r->type == WP_NUMBER)
         {
-            double v = ((struct wp_number*)(node->l))->value
+            amrex_real v = ((struct wp_number*)(node->l))->value
                 *      ((struct wp_number*)(node->r))->value;
             ((struct wp_number*)node)->type = WP_NUMBER;
             ((struct wp_number*)node)->value = v;
@@ -548,28 +546,28 @@ wp_ast_optimize (struct wp_node* node)
         else if (node->l->type == WP_NUMBER &&
                  node->r->type == WP_MUL_VP)
         {
-            double v = ((struct wp_number*)(node->l))->value * WP_EVAL_R(node);
+            amrex_real v = ((struct wp_number*)(node->l))->value * WP_EVAL_R(node);
             WP_MOVEUP_R(node, v);
             node->type = WP_MUL_VP;
         }
         else if (node->l->type == WP_NUMBER &&
                  node->r->type == WP_DIV_VP)
         {
-            double v = ((struct wp_number*)(node->l))->value * WP_EVAL_R(node);
+            amrex_real v = ((struct wp_number*)(node->l))->value * WP_EVAL_R(node);
             WP_MOVEUP_R(node, v);
             node->type = WP_DIV_VP;
         }
         else if (node->l->type == WP_MUL_VP &&
                  node->r->type == WP_NUMBER)
         {
-            double v = WP_EVAL_L(node) * ((struct wp_number*)(node->r))->value;
+            amrex_real v = WP_EVAL_L(node) * ((struct wp_number*)(node->r))->value;
             WP_MOVEUP_L(node, v);
             node->type = WP_MUL_VP;
         }
         else if (node->l->type == WP_DIV_VP &&
                  node->r->type == WP_NUMBER)
         {
-            double v = WP_EVAL_L(node) * ((struct wp_number*)(node->r))->value;
+            amrex_real v = WP_EVAL_L(node) * ((struct wp_number*)(node->r))->value;
             WP_MOVEUP_L(node, v);
             node->type = WP_DIV_VP;
         }
@@ -581,7 +579,7 @@ wp_ast_optimize (struct wp_node* node)
         if (node->l->type == WP_NUMBER &&
             node->r->type == WP_NUMBER)
         {
-            double v = ((struct wp_number*)(node->l))->value
+            amrex_real v = ((struct wp_number*)(node->l))->value
                 /      ((struct wp_number*)(node->r))->value;
             ((struct wp_number*)node)->type = WP_NUMBER;
             ((struct wp_number*)node)->value = v;
@@ -611,28 +609,28 @@ wp_ast_optimize (struct wp_node* node)
         else if (node->l->type == WP_NUMBER &&
                  node->r->type == WP_MUL_VP)
         {
-            double v = ((struct wp_number*)(node->l))->value / WP_EVAL_R(node);
+            amrex_real v = ((struct wp_number*)(node->l))->value / WP_EVAL_R(node);
             WP_MOVEUP_R(node, v);
             node->type = WP_DIV_VP;
         }
         else if (node->l->type == WP_NUMBER &&
                  node->r->type == WP_DIV_VP)
         {
-            double v = ((struct wp_number*)(node->l))->value / WP_EVAL_R(node);
+            amrex_real v = ((struct wp_number*)(node->l))->value / WP_EVAL_R(node);
             WP_MOVEUP_R(node, v);
             node->type = WP_MUL_VP;
         }
         else if (node->l->type == WP_MUL_VP &&
                  node->r->type == WP_NUMBER)
         {
-            double v = WP_EVAL_L(node) / ((struct wp_number*)(node->r))->value;
+            amrex_real v = WP_EVAL_L(node) / ((struct wp_number*)(node->r))->value;
             WP_MOVEUP_L(node, v);
             node->type = WP_MUL_VP;
         }
         else if (node->l->type == WP_DIV_VP &&
                  node->r->type == WP_NUMBER)
         {
-            double v = WP_EVAL_L(node) / ((struct wp_number*)(node->r))->value;
+            amrex_real v = WP_EVAL_L(node) / ((struct wp_number*)(node->r))->value;
             WP_MOVEUP_L(node, v);
             node->type = WP_DIV_VP;
         }
@@ -641,7 +639,7 @@ wp_ast_optimize (struct wp_node* node)
         wp_ast_optimize(node->l);
         if (node->l->type == WP_NUMBER)
         {
-            double v = -((struct wp_number*)(node->l))->value;
+            amrex_real v = -((struct wp_number*)(node->l))->value;
             ((struct wp_number*)node)->type = WP_NUMBER;
             ((struct wp_number*)node)->value = v;
         }
@@ -675,7 +673,7 @@ wp_ast_optimize (struct wp_node* node)
         wp_ast_optimize(node->l);
         if (node->l->type == WP_NUMBER)
         {
-            double v = wp_call_f1
+            amrex_real v = wp_call_f1
                 (((struct wp_f1*)node)->ftype,
                  ((struct wp_number*)(((struct wp_f1*)node)->l))->value);
             ((struct wp_number*)node)->type = WP_NUMBER;
@@ -688,7 +686,7 @@ wp_ast_optimize (struct wp_node* node)
         if (node->l->type == WP_NUMBER &&
             node->r->type == WP_NUMBER)
         {
-            double v = wp_call_f2
+            amrex_real v = wp_call_f2
                 (((struct wp_f2*)node)->ftype,
                  ((struct wp_number*)(((struct wp_f2*)node)->l))->value,
                  ((struct wp_number*)(((struct wp_f2*)node)->r))->value);
@@ -698,7 +696,7 @@ wp_ast_optimize (struct wp_node* node)
         else if (node->r->type == WP_NUMBER && ((struct wp_f2*)node)->ftype == WP_POW)
         {
             struct wp_node* n = node->l;
-            double v = ((struct wp_number*)(node->r))->value;
+            amrex_real v = ((struct wp_number*)(node->r))->value;
             if (-3.0 == v) {
                 ((struct wp_f1*)node)->type = WP_F1;
                 ((struct wp_f1*)node)->l = n;
@@ -733,7 +731,7 @@ wp_ast_optimize (struct wp_node* node)
         wp_ast_optimize(node->r);
         if (node->r->type == WP_NUMBER)
         {
-            double v = node->lvp.v + ((struct wp_number*)(node->r))->value;
+            amrex_real v = node->lvp.v + ((struct wp_number*)(node->r))->value;
             ((struct wp_number*)node)->type = WP_NUMBER;
             ((struct wp_number*)node)->value = v;
         }
@@ -742,7 +740,7 @@ wp_ast_optimize (struct wp_node* node)
         wp_ast_optimize(node->r);
         if (node->r->type == WP_NUMBER)
         {
-            double v = node->lvp.v - ((struct wp_number*)(node->r))->value;
+            amrex_real v = node->lvp.v - ((struct wp_number*)(node->r))->value;
             ((struct wp_number*)node)->type = WP_NUMBER;
             ((struct wp_number*)node)->value = v;
         }
@@ -751,7 +749,7 @@ wp_ast_optimize (struct wp_node* node)
         wp_ast_optimize(node->r);
         if (node->r->type == WP_NUMBER)
         {
-            double v = node->lvp.v * ((struct wp_number*)(node->r))->value;
+            amrex_real v = node->lvp.v * ((struct wp_number*)(node->r))->value;
             ((struct wp_number*)node)->type = WP_NUMBER;
             ((struct wp_number*)node)->value = v;
         }
@@ -760,7 +758,7 @@ wp_ast_optimize (struct wp_node* node)
         wp_ast_optimize(node->r);
         if (node->r->type == WP_NUMBER)
         {
-            double v = node->lvp.v / ((struct wp_number*)(node->r))->value;
+            amrex_real v = node->lvp.v / ((struct wp_number*)(node->r))->value;
             ((struct wp_number*)node)->type = WP_NUMBER;
             ((struct wp_number*)node)->value = v;
         }
@@ -769,7 +767,7 @@ wp_ast_optimize (struct wp_node* node)
         wp_ast_optimize(node->l);
         if (node->l->type == WP_NUMBER)
         {
-            double v = -((struct wp_number*)(node->l))->value;
+            amrex_real v = -((struct wp_number*)(node->l))->value;
             ((struct wp_number*)node)->type = WP_NUMBER;
             ((struct wp_number*)node)->value = v;
         }
@@ -938,7 +936,7 @@ wp_ast_print (struct wp_node* node)
 }
 
 void
-wp_ast_regvar (struct wp_node* node, char const* name, double* p)
+wp_ast_regvar (struct wp_node* node, char const* name, amrex_real* p)
 {
     switch (node->type)
     {
@@ -1047,7 +1045,7 @@ wp_ast_regvar_gpu (struct wp_node* node, char const* name, int i)
     }
 }
 
-void wp_ast_setconst (struct wp_node* node, char const* name, double c)
+void wp_ast_setconst (struct wp_node* node, char const* name, amrex_real c)
 {
     switch (node->type)
     {
@@ -1099,7 +1097,7 @@ void wp_ast_setconst (struct wp_node* node, char const* name, double c)
 }
 
 void
-wp_parser_regvar (struct wp_parser* parser, char const* name, double* p)
+wp_parser_regvar (struct wp_parser* parser, char const* name, amrex_real* p)
 {
     wp_ast_regvar(parser->ast, name, p);
 }
@@ -1111,7 +1109,7 @@ wp_parser_regvar_gpu (struct wp_parser* parser, char const* name, int i)
 }
 
 void
-wp_parser_setconst (struct wp_parser* parser, char const* name, double c)
+wp_parser_setconst (struct wp_parser* parser, char const* name, amrex_real c)
 {
     wp_ast_setconst(parser->ast, name, c);
     wp_ast_optimize(parser->ast);
diff --git a/Source/Parser/wp_parser_y.h b/Source/Parser/wp_parser_y.h
index 8c9f8e4e4..d83815090 100644
--- a/Source/Parser/wp_parser_y.h
+++ b/Source/Parser/wp_parser_y.h
@@ -2,6 +2,7 @@
 #define WP_PARSER_Y_H_
 
 #include <AMReX_GpuQualifiers.H>
+#include <AMReX_REAL.H>
 
 #ifdef __cplusplus
 #include <cstdlib>
@@ -77,11 +78,11 @@ enum wp_node_t {
 
 union wp_ip {
     int i;
-    double* p;
+    amrex_real* p;
 };
 
 union wp_vp {
-    double v;
+    amrex_real v;
     union wp_ip ip;
 };
 
@@ -95,7 +96,7 @@ struct wp_node {
 
 struct wp_number {
     enum wp_node_t type;
-    double value;
+    amrex_real value;
 };
 
 struct wp_symbol {
@@ -122,7 +123,7 @@ struct wp_f2 {  /* Builtin functions with two arguments */
 /* These functions are used in bison rules to generate the original
  * AST. */
 void wp_defexpr (struct wp_node* body);
-struct wp_node* wp_newnumber (double d);
+struct wp_node* wp_newnumber (amrex_real d);
 struct wp_symbol* wp_makesymbol (char* name);
 struct wp_node* wp_newsymbol (struct wp_symbol* sym);
 struct wp_node* wp_newnode (enum wp_node_t type, struct wp_node* l,
@@ -153,20 +154,20 @@ void wp_parser_delete (struct wp_parser* parser);
 struct wp_parser* wp_parser_dup (struct wp_parser* source);
 struct wp_node* wp_parser_ast_dup (struct wp_parser* parser, struct wp_node* src, int move);
 
-void wp_parser_regvar (struct wp_parser* parser, char const* name, double* p);
+void wp_parser_regvar (struct wp_parser* parser, char const* name, amrex_real* p);
 void wp_parser_regvar_gpu (struct wp_parser* parser, char const* name, int i);
-void wp_parser_setconst (struct wp_parser* parser, char const* name, double c);
+void wp_parser_setconst (struct wp_parser* parser, char const* name, amrex_real c);
 
 /* We need to walk the tree in these functions */
 void wp_ast_optimize (struct wp_node* node);
 size_t wp_ast_size (struct wp_node* node);
 void wp_ast_print (struct wp_node* node);
-void wp_ast_regvar (struct wp_node* node, char const* name, double* p);
+void wp_ast_regvar (struct wp_node* node, char const* name, amrex_real* p);
 void wp_ast_regvar_gpu (struct wp_node* node, char const* name, int i);
-void wp_ast_setconst (struct wp_node* node, char const* name, double c);
+void wp_ast_setconst (struct wp_node* node, char const* name, amrex_real c);
 
-AMREX_GPU_HOST_DEVICE double wp_call_f1 (enum wp_f1_t type, double a);
-AMREX_GPU_HOST_DEVICE double wp_call_f2 (enum wp_f2_t type, double a, double b);
+AMREX_GPU_HOST_DEVICE amrex_real wp_call_f1 (enum wp_f1_t type, amrex_real a);
+AMREX_GPU_HOST_DEVICE amrex_real wp_call_f2 (enum wp_f2_t type, amrex_real a, amrex_real b);
 
 #ifdef __cplusplus
 }
diff --git a/Source/Particles/Deposition/ChargeDeposition.H b/Source/Particles/Deposition/ChargeDeposition.H
index b9210e67c..eec407a2b 100755
--- a/Source/Particles/Deposition/ChargeDeposition.H
+++ b/Source/Particles/Deposition/ChargeDeposition.H
@@ -18,10 +18,10 @@
  * /param q            : species charge.
  */
 template <int depos_order>
-void doChargeDepositionShapeN(const amrex::Real * const xp,
-                              const amrex::Real * const yp,
-                              const amrex::Real * const zp,
-                              const amrex::Real * const wp,
+void doChargeDepositionShapeN(const amrex::ParticleReal * const xp,
+                              const amrex::ParticleReal * const yp,
+                              const amrex::ParticleReal * const zp,
+                              const amrex::ParticleReal * const wp,
                               const int * const ion_lev,
                               const amrex::Array4<amrex::Real>& rho_arr,
                               const long np_to_depose,
diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H
index 7a96dab9a..6da0f1155 100644
--- a/Source/Particles/Deposition/CurrentDeposition.H
+++ b/Source/Particles/Deposition/CurrentDeposition.H
@@ -24,13 +24,13 @@
  * /param q            : species charge.
  */
 template <int depos_order>
-void doDepositionShapeN(const amrex::Real * const xp,
-                        const amrex::Real * const yp,
-                        const amrex::Real * const zp,
-                        const amrex::Real * const wp,
-                        const amrex::Real * const uxp,
-                        const amrex::Real * const uyp,
-                        const amrex::Real * const uzp,
+void doDepositionShapeN(const amrex::ParticleReal * const xp,
+                        const amrex::ParticleReal * const yp,
+                        const amrex::ParticleReal * const zp,
+                        const amrex::ParticleReal * const wp,
+                        const amrex::ParticleReal * const uxp,
+                        const amrex::ParticleReal * const uyp,
+                        const amrex::ParticleReal * const uzp,
                         const int * const ion_lev,
                         const amrex::Array4<amrex::Real>& jx_arr,
                         const amrex::Array4<amrex::Real>& jy_arr,
@@ -189,13 +189,13 @@ void doDepositionShapeN(const amrex::Real * const xp,
  * \param n_rz_azimuthal_modes: Number of azimuthal modes when using RZ geometry
  */
 template <int depos_order>
-void doEsirkepovDepositionShapeN (const amrex::Real * const xp,
-                                  const amrex::Real * const yp,
-                                  const amrex::Real * const zp,
-                                  const amrex::Real * const wp,
-                                  const amrex::Real * const uxp,
-                                  const amrex::Real * const uyp,
-                                  const amrex::Real * const uzp,
+void doEsirkepovDepositionShapeN (const amrex::ParticleReal * const xp,
+                                  const amrex::ParticleReal * const yp,
+                                  const amrex::ParticleReal * const zp,
+                                  const amrex::ParticleReal * const wp,
+                                  const amrex::ParticleReal * const uxp,
+                                  const amrex::ParticleReal * const uyp,
+                                  const amrex::ParticleReal * const uzp,
                                   const int * ion_lev,
                                   const amrex::Array4<amrex::Real>& Jx_arr,
                                   const amrex::Array4<amrex::Real>& Jy_arr,
@@ -397,7 +397,7 @@ void doEsirkepovDepositionShapeN (const amrex::Real * const xp,
                     Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
                     for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
                         // The factor 2 comes from the normalization of the modes
-                        const Complex djr_cmplx = 2.*sdxi*xy_mid;
+                        const Complex djr_cmplx = amrex::Real(2.)*sdxi*xy_mid;
                         amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djr_cmplx.real());
                         amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djr_cmplx.imag());
                         xy_mid = xy_mid*xy_mid0;
@@ -418,7 +418,7 @@ void doEsirkepovDepositionShapeN (const amrex::Real * const xp,
                     for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
                         // The factor 2 comes from the normalization of the modes
                         // The minus sign comes from the different convention with respect to Davidson et al.
-                        const Complex djt_cmplx = -2.*I*(i_new-1 + i + xmin*dxi)*wq*invdtdx/(double)imode*
+                        const Complex djt_cmplx = -amrex::Real(2.)*I*(i_new-1 + i + xmin*dxi)*wq*invdtdx/(amrex::Real)imode*
                                                   (sx_new[i]*sz_new[k]*(xy_new - xy_mid) + sx_old[i]*sz_old[k]*(xy_mid - xy_old));
                         amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djt_cmplx.real());
                         amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djt_cmplx.imag());
@@ -438,7 +438,7 @@ void doEsirkepovDepositionShapeN (const amrex::Real * const xp,
                     Complex xy_mid = xy_mid0; // Throughout the following loop, xy_mid takes the value e^{i m theta}
                     for (int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
                         // The factor 2 comes from the normalization of the modes
-                        const Complex djz_cmplx = 2.*sdzk*xy_mid;
+                        const Complex djz_cmplx = amrex::Real(2.)*sdzk*xy_mid;
                         amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode-1), djz_cmplx.real());
                         amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0, 2*imode), djz_cmplx.imag());
                         xy_mid = xy_mid*xy_mid0;
diff --git a/Source/Particles/Gather/FieldGather.H b/Source/Particles/Gather/FieldGather.H
index 6727b0aa9..c5ec6fb5b 100644
--- a/Source/Particles/Gather/FieldGather.H
+++ b/Source/Particles/Gather/FieldGather.H
@@ -19,12 +19,12 @@
  * \param n_rz_azimuthal_modes: Number of azimuthal modes when using RZ geometry
  */
 template <int depos_order, int lower_in_v>
-void doGatherShapeN(const amrex::Real * const xp,
-                    const amrex::Real * const yp,
-                    const amrex::Real * const zp,
-                    amrex::Real * const Exp, amrex::Real * const Eyp,
-                    amrex::Real * const Ezp, amrex::Real * const Bxp,
-                    amrex::Real * const Byp, amrex::Real * const Bzp,
+void doGatherShapeN(const amrex::ParticleReal * const xp,
+                    const amrex::ParticleReal * const yp,
+                    const amrex::ParticleReal * const zp,
+                    amrex::ParticleReal * const Exp, amrex::ParticleReal * const Eyp,
+                    amrex::ParticleReal * const Ezp, amrex::ParticleReal * const Bxp,
+                    amrex::ParticleReal * const Byp, amrex::ParticleReal * const Bzp,
                     const amrex::Array4<const amrex::Real>& ex_arr,
                     const amrex::Array4<const amrex::Real>& ey_arr,
                     const amrex::Array4<const amrex::Real>& ez_arr,
diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package
index 95f36cf65..c9d520873 100644
--- a/Source/Particles/Make.package
+++ b/Source/Particles/Make.package
@@ -18,6 +18,7 @@ CEXE_headers += ShapeFactors.H
 include $(WARPX_HOME)/Source/Particles/Pusher/Make.package
 include $(WARPX_HOME)/Source/Particles/Deposition/Make.package
 include $(WARPX_HOME)/Source/Particles/Gather/Make.package
+include $(WARPX_HOME)/Source/Particles/Sorting/Make.package
 
 INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Particles
diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp
index bb795465e..715c97b99 100644
--- a/Source/Particles/MultiParticleContainer.cpp
+++ b/Source/Particles/MultiParticleContainer.cpp
@@ -545,12 +545,12 @@ namespace
         WarpXParticleContainer::ParticleType* particles_source = ptile_source.GetArrayOfStructs()().data();
         // --- source SoA particle data
         auto& soa_source = ptile_source.GetStructOfArrays();
-        GpuArray<Real*,PIdx::nattribs> attribs_source;
+        GpuArray<ParticleReal*,PIdx::nattribs> attribs_source;
         for (int ia = 0; ia < PIdx::nattribs; ++ia) {
             attribs_source[ia] = soa_source.GetRealData(ia).data();
         }
         // --- source runtime attribs
-        GpuArray<Real*,3> runtime_uold_source;
+        GpuArray<ParticleReal*,3> runtime_uold_source;
         // Prepare arrays for boosted frame diagnostics.
         runtime_uold_source[0] = soa_source.GetRealData(PIdx::ux).data();
         runtime_uold_source[1] = soa_source.GetRealData(PIdx::uy).data();
@@ -590,13 +590,13 @@ namespace
         WarpXParticleContainer::ParticleType* particles_product = ptile_product.GetArrayOfStructs()().data() + np_product_old;
         // --- product SoA particle data
         auto& soa_product = ptile_product.GetStructOfArrays();
-        GpuArray<Real*,PIdx::nattribs> attribs_product;
+        GpuArray<ParticleReal*,PIdx::nattribs> attribs_product;
         for (int ia = 0; ia < PIdx::nattribs; ++ia) {
             // First element is the first newly-created product particle
             attribs_product[ia] = soa_product.GetRealData(ia).data() + np_product_old;
         }
         // --- product runtime attribs
-        GpuArray<Real*,6> runtime_attribs_product;
+        GpuArray<ParticleReal*,6> runtime_attribs_product;
         bool do_boosted_product = WarpX::do_boosted_frame_diagnostic
             && pc_product->DoBoostedFrameDiags();
         if (do_boosted_product) {
diff --git a/Source/Particles/PhotonParticleContainer.H b/Source/Particles/PhotonParticleContainer.H
index 3ac304bdc..a6ffd1d76 100644
--- a/Source/Particles/PhotonParticleContainer.H
+++ b/Source/Particles/PhotonParticleContainer.H
@@ -41,9 +41,9 @@ public:
                          DtType a_dt_type=DtType::Full) override;
 
     virtual void PushPX(WarpXParIter& pti,
-                        amrex::Cuda::ManagedDeviceVector<amrex::Real>& xp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::Real>& yp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::Real>& zp,
+                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& xp,
+                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& yp,
+                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& zp,
                         amrex::Real dt, DtType a_dt_type=DtType::Full) override;
 
 
diff --git a/Source/Particles/PhotonParticleContainer.cpp b/Source/Particles/PhotonParticleContainer.cpp
index 9de441e5c..4a75ec9f3 100644
--- a/Source/Particles/PhotonParticleContainer.cpp
+++ b/Source/Particles/PhotonParticleContainer.cpp
@@ -34,27 +34,27 @@ void PhotonParticleContainer::InitData()
 
 void
 PhotonParticleContainer::PushPX(WarpXParIter& pti,
-                                Cuda::ManagedDeviceVector<Real>& xp,
-                                Cuda::ManagedDeviceVector<Real>& yp,
-                                Cuda::ManagedDeviceVector<Real>& zp,
+                                Cuda::ManagedDeviceVector<ParticleReal>& xp,
+                                Cuda::ManagedDeviceVector<ParticleReal>& yp,
+                                Cuda::ManagedDeviceVector<ParticleReal>& zp,
                                 Real dt, DtType a_dt_type)
 {
 
     // This wraps the momentum and position advance so that inheritors can modify the call.
     auto& attribs = pti.GetAttribs();
     // Extract pointers to the different particle quantities
-    Real* const AMREX_RESTRICT x = xp.dataPtr();
-    Real* const AMREX_RESTRICT y = yp.dataPtr();
-    Real* const AMREX_RESTRICT z = zp.dataPtr();
-    Real* const AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
-    Real* const AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
-    Real* const AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
-    const Real* const AMREX_RESTRICT Ex = attribs[PIdx::Ex].dataPtr();
-    const Real* const AMREX_RESTRICT Ey = attribs[PIdx::Ey].dataPtr();
-    const Real* const AMREX_RESTRICT Ez = attribs[PIdx::Ez].dataPtr();
-    const Real* const AMREX_RESTRICT Bx = attribs[PIdx::Bx].dataPtr();
-    const Real* const AMREX_RESTRICT By = attribs[PIdx::By].dataPtr();
-    const Real* const AMREX_RESTRICT Bz = attribs[PIdx::Bz].dataPtr();
+    ParticleReal* const AMREX_RESTRICT x = xp.dataPtr();
+    ParticleReal* const AMREX_RESTRICT y = yp.dataPtr();
+    ParticleReal* const AMREX_RESTRICT z = zp.dataPtr();
+    ParticleReal* const AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
+    ParticleReal* const AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
+    ParticleReal* const AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
+    const ParticleReal* const AMREX_RESTRICT Ex = attribs[PIdx::Ex].dataPtr();
+    const ParticleReal* const AMREX_RESTRICT Ey = attribs[PIdx::Ey].dataPtr();
+    const ParticleReal* const AMREX_RESTRICT Ez = attribs[PIdx::Ez].dataPtr();
+    const ParticleReal* const AMREX_RESTRICT Bx = attribs[PIdx::Bx].dataPtr();
+    const ParticleReal* const AMREX_RESTRICT By = attribs[PIdx::By].dataPtr();
+    const ParticleReal* const AMREX_RESTRICT Bz = attribs[PIdx::Bz].dataPtr();
 
     if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
     {
diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H
index ace1ec7f8..b558323a3 100644
--- a/Source/Particles/PhysicalParticleContainer.H
+++ b/Source/Particles/PhysicalParticleContainer.H
@@ -1,12 +1,13 @@
 #ifndef WARPX_PhysicalParticleContainer_H_
 #define WARPX_PhysicalParticleContainer_H_
 
-#include <map>
+#include <PlasmaInjector.H>
+#include <WarpXParticleContainer.H>
+#include <NCIGodfreyFilter.H>
 
 #include <AMReX_IArrayBox.H>
 
-#include <PlasmaInjector.H>
-#include <WarpXParticleContainer.H>
+#include <map>
 
 class PhysicalParticleContainer
     : public WarpXParticleContainer
@@ -87,9 +88,9 @@ public:
                          DtType a_dt_type=DtType::Full) override;
 
     virtual void PushPX(WarpXParIter& pti,
-                        amrex::Cuda::ManagedDeviceVector<amrex::Real>& xp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::Real>& yp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::Real>& zp,
+                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& xp,
+                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& yp,
+                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& zp,
                         amrex::Real dt, DtType a_dt_type=DtType::Full);
 
     virtual void PushP (int lev, amrex::Real dt,
@@ -100,8 +101,21 @@ public:
                         const amrex::MultiFab& By,
                         const amrex::MultiFab& Bz) override;
 
-    void copy_attribs(WarpXParIter& pti,const amrex::Real* xp,
-                        const amrex::Real* yp, const amrex::Real* zp);
+    void PartitionParticlesInBuffers(
+                        long& nfine_current,
+                        long& nfine_gather,
+                        long const np,
+                        WarpXParIter& pti,
+                        int const lev,
+                        amrex::iMultiFab const* current_masks,
+                        amrex::iMultiFab const* gather_masks,
+                        RealVector& uxp,
+                        RealVector& uyp,
+                        RealVector& uzp,
+                        RealVector& wp );
+
+    void copy_attribs(WarpXParIter& pti,const amrex::ParticleReal* xp,
+                        const amrex::ParticleReal* yp, const amrex::ParticleReal* zp);
 
     virtual void PostRestart () final {}
 
@@ -131,6 +145,33 @@ public:
 
     virtual void ConvertUnits (ConvertDirection convert_dir) override;
 
+/**
+ * \brief Apply NCI Godfrey filter to all components of E and B before gather
+ * \param lev MR level
+ * \param box box onto which the filter is applied
+ * \param exeli safeguard to avoid destructing arrays between ParIter iterations on GPU
+ * \param filtered_Ex Array containing filtered value
+ * \param Ex Field array before filtering (not modified)
+ * \param ex_ptr pointer to the array used for field gather.
+ *
+ * The NCI Godfrey filter is applied on Ex, the result is stored in filtered_Ex
+ * and the pointer is modified (before this function is called, it points to Ex
+ * and after this function is called, it points to Ex_filtered)
+ */
+    void applyNCIFilter (
+        int lev, const amrex::Box& box,
+        amrex::Elixir& exeli, amrex::Elixir& eyeli, amrex::Elixir& ezeli,
+        amrex::Elixir& bxeli, amrex::Elixir& byeli, amrex::Elixir& bzeli,
+        amrex::FArrayBox& filtered_Ex, amrex::FArrayBox& filtered_Ey,
+        amrex::FArrayBox& filtered_Ez, amrex::FArrayBox& filtered_Bx,
+        amrex::FArrayBox& filtered_By, amrex::FArrayBox& filtered_Bz,
+        const amrex::FArrayBox& Ex, const amrex::FArrayBox& Ey,
+        const amrex::FArrayBox& Ez, const amrex::FArrayBox& Bx,
+        const amrex::FArrayBox& By, const amrex::FArrayBox& Bz,
+        amrex::FArrayBox const * & exfab, amrex::FArrayBox const * & eyfab,
+        amrex::FArrayBox const * & ezfab, amrex::FArrayBox const * & bxfab,
+        amrex::FArrayBox const * & byfab, amrex::FArrayBox const * & bzfab);
+
 protected:
 
     std::string species_name;
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index b1e83d652..02dee1967 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -31,8 +31,11 @@ PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int isp
 
     pp.query("boost_adjust_transverse_positions", boost_adjust_transverse_positions);
     pp.query("do_backward_propagation", do_backward_propagation);
+
+    // Initialize splitting
     pp.query("do_splitting", do_splitting);
     pp.query("split_type", split_type);
+
     pp.query("do_continuous_injection", do_continuous_injection);
     // Whether to plot back-transformed (lab-frame) diagnostics
     // for this species.
@@ -196,7 +199,7 @@ PhysicalParticleContainer::CheckAndAddParticle(Real x, Real y, Real z,
                                                std::array<Real, 3> u,
                                                Real weight)
 {
-    std::array<Real,PIdx::nattribs> attribs;
+    std::array<ParticleReal,PIdx::nattribs> attribs;
     attribs.fill(0.0);
 
     // update attribs with input arguments
@@ -361,13 +364,13 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
         for (int dir=0; dir<AMREX_SPACEDIM; dir++) {
             if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) {
                 Real ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] );
-                overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]);
+                overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, Real(0.)) * dx[dir]);
             } else {
                 no_overlap = true; break;
             }
             if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) {
                 Real ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] );
-                overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]);
+                overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, Real(0.)) * dx[dir]);
             } else {
                 no_overlap = true; break;
             }
@@ -437,7 +440,7 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
 
         ParticleType* pp = particle_tile.GetArrayOfStructs()().data() + old_size;
         auto& soa = particle_tile.GetStructOfArrays();
-        GpuArray<Real*,PIdx::nattribs> pa;
+        GpuArray<ParticleReal*,PIdx::nattribs> pa;
         for (int ia = 0; ia < PIdx::nattribs; ++ia) {
             pa[ia] = soa.GetRealData(ia).data() + old_size;
         }
@@ -948,7 +951,6 @@ PhysicalParticleContainer::Evolve (int lev,
     BL_PROFILE_VAR_NS("PPC::Evolve::Copy", blp_copy);
     BL_PROFILE_VAR_NS("PPC::FieldGather", blp_fg);
     BL_PROFILE_VAR_NS("PPC::ParticlePush", blp_ppc_pp);
-    BL_PROFILE_VAR_NS("PPC::Evolve::partition", blp_partition);
 
     const std::array<Real,3>& dx = WarpX::CellSize(lev);
     const std::array<Real,3>& cdx = WarpX::CellSize(std::max(lev-1,0));
@@ -960,7 +962,6 @@ PhysicalParticleContainer::Evolve (int lev,
     BL_ASSERT(OnSameGrids(lev,jx));
 
     MultiFab* cost = WarpX::getCosts(lev);
-
     const iMultiFab* current_masks = WarpX::CurrentBufferMasks(lev);
     const iMultiFab* gather_masks = WarpX::GatherBufferMasks(lev);
 
@@ -991,10 +992,6 @@ PhysicalParticleContainer::Evolve (int lev,
 
         FArrayBox filtered_Ex, filtered_Ey, filtered_Ez;
         FArrayBox filtered_Bx, filtered_By, filtered_Bz;
-        std::vector<bool> inexflag;
-        Vector<long> pid;
-        RealVector tmp;
-        ParticleVector particle_tmp;
 
         for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
         {
@@ -1026,56 +1023,18 @@ PhysicalParticleContainer::Evolve (int lev,
             FArrayBox const* bzfab = &(Bz[pti]);
 
             Elixir exeli, eyeli, ezeli, bxeli, byeli, bzeli;
+
             if (WarpX::use_fdtd_nci_corr)
             {
-#if (AMREX_SPACEDIM == 2)
-                const Box& tbox = amrex::grow(pti.tilebox(),{static_cast<int>(WarpX::nox),
-                            static_cast<int>(WarpX::noz)});
-#else
-                const Box& tbox = amrex::grow(pti.tilebox(),{static_cast<int>(WarpX::nox),
-                            static_cast<int>(WarpX::noy),
-                            static_cast<int>(WarpX::noz)});
-#endif
-
-                // Filter Ex (Both 2D and 3D)
-                filtered_Ex.resize(amrex::convert(tbox,WarpX::Ex_nodal_flag));
-                // Safeguard for GPU
-                exeli = filtered_Ex.elixir();
-                // Apply filter on Ex, result stored in filtered_Ex
-                nci_godfrey_filter_exeybz[lev]->ApplyStencil(filtered_Ex, Ex[pti], filtered_Ex.box());
-                // Update exfab reference
-                exfab = &filtered_Ex;
-
-                // Filter Ez
-                filtered_Ez.resize(amrex::convert(tbox,WarpX::Ez_nodal_flag));
-                ezeli = filtered_Ez.elixir();
-                nci_godfrey_filter_bxbyez[lev]->ApplyStencil(filtered_Ez, Ez[pti], filtered_Ez.box());
-                ezfab = &filtered_Ez;
-
-                // Filter By
-                filtered_By.resize(amrex::convert(tbox,WarpX::By_nodal_flag));
-                byeli = filtered_By.elixir();
-                nci_godfrey_filter_bxbyez[lev]->ApplyStencil(filtered_By, By[pti], filtered_By.box());
-                byfab = &filtered_By;
-#if (AMREX_SPACEDIM == 3)
-                // Filter Ey
-                filtered_Ey.resize(amrex::convert(tbox,WarpX::Ey_nodal_flag));
-                eyeli = filtered_Ey.elixir();
-                nci_godfrey_filter_exeybz[lev]->ApplyStencil(filtered_Ey, Ey[pti], filtered_Ey.box());
-                eyfab = &filtered_Ey;
-
-                // Filter Bx
-                filtered_Bx.resize(amrex::convert(tbox,WarpX::Bx_nodal_flag));
-                bxeli = filtered_Bx.elixir();
-                nci_godfrey_filter_bxbyez[lev]->ApplyStencil(filtered_Bx, Bx[pti], filtered_Bx.box());
-                bxfab = &filtered_Bx;
-
-                // Filter Bz
-                filtered_Bz.resize(amrex::convert(tbox,WarpX::Bz_nodal_flag));
-                bzeli = filtered_Bz.elixir();
-                nci_godfrey_filter_exeybz[lev]->ApplyStencil(filtered_Bz, Bz[pti], filtered_Bz.box());
-                bzfab = &filtered_Bz;
-#endif
+                // Filter arrays Ex[pti], store the result in
+                // filtered_Ex and update pointer exfab so that it
+                // points to filtered_Ex (and do the same for all
+                // components of E and B).
+                applyNCIFilter(lev, pti.tilebox(), exeli, eyeli, ezeli, bxeli, byeli, bzeli,
+                               filtered_Ex, filtered_Ey, filtered_Ez,
+                               filtered_Bx, filtered_By, filtered_Bz,
+                               Ex[pti], Ey[pti], Ez[pti], Bx[pti], By[pti], Bz[pti],
+                               exfab, eyfab, ezfab, bxfab, byfab, bzfab);
             }
 
             Exp.assign(np,0.0);
@@ -1085,99 +1044,21 @@ PhysicalParticleContainer::Evolve (int lev,
             Byp.assign(np,WarpX::B_external[1]);
             Bzp.assign(np,WarpX::B_external[2]);
 
-            long nfine_current = np; //! number of particles depositing to fine grid
-            long nfine_gather = np;  //! number of particles gathering from fine grid
-            if (has_buffer && !do_not_push)
-            {
-                BL_PROFILE_VAR_START(blp_partition);
-                inexflag.resize(np);
-                auto& aos = pti.GetArrayOfStructs();
-                // We need to partition the large buffer first
-                iMultiFab const* bmasks = (WarpX::n_field_gather_buffer >= WarpX::n_current_deposition_buffer) ?
-                    gather_masks : current_masks;
-                int i = 0;
-                const auto& msk = (*bmasks)[pti];
-                for (const auto& p : aos) {
-                    const IntVect& iv = Index(p, lev);
-                    inexflag[i++] = msk(iv);
-                }
-
-                pid.resize(np);
-                std::iota(pid.begin(), pid.end(), 0L);
-
-                auto sep = std::stable_partition(pid.begin(), pid.end(),
-                                                 [&inexflag](long id) { return inexflag[id]; });
-
-                if (WarpX::n_current_deposition_buffer == WarpX::n_field_gather_buffer) {
-                    nfine_current = nfine_gather = std::distance(pid.begin(), sep);
-                } else if (sep != pid.end()) {
-                    int n_buf;
-                    if (bmasks == gather_masks) {
-                        nfine_gather = std::distance(pid.begin(), sep);
-                        bmasks = current_masks;
-                        n_buf = WarpX::n_current_deposition_buffer;
-                    } else {
-                        nfine_current = std::distance(pid.begin(), sep);
-                        bmasks = gather_masks;
-                        n_buf = WarpX::n_field_gather_buffer;
-                    }
-                    if (n_buf > 0)
-                    {
-                        const auto& msk2 = (*bmasks)[pti];
-                        for (auto it = sep; it != pid.end(); ++it) {
-                            const long id = *it;
-                            const IntVect& iv = Index(aos[id], lev);
-                            inexflag[id] = msk2(iv);
-                        }
-
-                        auto sep2 = std::stable_partition(sep, pid.end(),
-                                                          [&inexflag](long id) {return inexflag[id];});
-                        if (bmasks == gather_masks) {
-                            nfine_gather = std::distance(pid.begin(), sep2);
-                        } else {
-                            nfine_current = std::distance(pid.begin(), sep2);
-                        }
-                    }
-                }
-
-                // only deposit / gather to coarsest grid
-                if (m_deposit_on_main_grid && lev > 0) {
-                    nfine_current = 0;
-                }
-                if (m_gather_from_main_grid && lev > 0) {
-                    nfine_gather = 0;
-                }
-
-                if (nfine_current != np || nfine_gather != np)
-                {
-                    particle_tmp.resize(np);
-                    for (long ip = 0; ip < np; ++ip) {
-                        particle_tmp[ip] = aos[pid[ip]];
-                    }
-                    std::swap(aos(), particle_tmp);
-
-                    tmp.resize(np);
-                    for (long ip = 0; ip < np; ++ip) {
-                        tmp[ip] = wp[pid[ip]];
-                    }
-                    std::swap(wp, tmp);
-
-                    for (long ip = 0; ip < np; ++ip) {
-                        tmp[ip] = uxp[pid[ip]];
-                    }
-                    std::swap(uxp, tmp);
-
-                    for (long ip = 0; ip < np; ++ip) {
-                        tmp[ip] = uyp[pid[ip]];
-                    }
-                    std::swap(uyp, tmp);
-
-                    for (long ip = 0; ip < np; ++ip) {
-                        tmp[ip] = uzp[pid[ip]];
-                    }
-                    std::swap(uzp, tmp);
-                }
-                BL_PROFILE_VAR_STOP(blp_partition);
+            // Determine which particles deposit/gather in the buffer, and
+            // which particles deposit/gather in the fine patch
+            long nfine_current = np;
+            long nfine_gather = np;
+            if (has_buffer && !do_not_push) {
+                // - Modify `nfine_current` and `nfine_gather` (in place)
+                //    so that they correspond to the number of particles
+                //    that deposit/gather in the fine patch respectively.
+                // - Reorder the particle arrays,
+                //    so that the `nfine_current`/`nfine_gather` first particles
+                //    deposit/gather in the fine patch
+                //    and (thus) the `np-nfine_current`/`np-nfine_gather` last particles
+                //    deposit/gather in the buffer
+                PartitionParticlesInBuffers( nfine_current, nfine_gather, np,
+                    pti, lev, current_masks, gather_masks, uxp, uyp, uzp, wp );
             }
 
             const long np_current = (cjx) ? nfine_current : np;
@@ -1235,54 +1116,16 @@ PhysicalParticleContainer::Evolve (int lev,
 
                     if (WarpX::use_fdtd_nci_corr)
                     {
-#if (AMREX_SPACEDIM == 2)
-                        const Box& tbox = amrex::grow(cbox,{static_cast<int>(WarpX::nox),
-                                    static_cast<int>(WarpX::noz)});
-#else
-                        const Box& tbox = amrex::grow(cbox,{static_cast<int>(WarpX::nox),
-                                    static_cast<int>(WarpX::noy),
-                                    static_cast<int>(WarpX::noz)});
-#endif
-
-                        // Filter Ex (both 2D and 3D)
-                        filtered_Ex.resize(amrex::convert(tbox,WarpX::Ex_nodal_flag));
-                        // Safeguard for GPU
-                        exeli = filtered_Ex.elixir();
-                        // Apply filter on Ex, result stored in filtered_Ex
-                        nci_godfrey_filter_exeybz[lev-1]->ApplyStencil(filtered_Ex, (*cEx)[pti], filtered_Ex.box());
-                        // Update exfab reference
-                        cexfab = &filtered_Ex;
-
-                        // Filter Ez
-                        filtered_Ez.resize(amrex::convert(tbox,WarpX::Ez_nodal_flag));
-                        ezeli = filtered_Ez.elixir();
-                        nci_godfrey_filter_bxbyez[lev-1]->ApplyStencil(filtered_Ez, (*cEz)[pti], filtered_Ez.box());
-                        cezfab = &filtered_Ez;
-
-                        // Filter By
-                        filtered_By.resize(amrex::convert(tbox,WarpX::By_nodal_flag));
-                        byeli = filtered_By.elixir();
-                        nci_godfrey_filter_bxbyez[lev-1]->ApplyStencil(filtered_By, (*cBy)[pti], filtered_By.box());
-                        cbyfab = &filtered_By;
-#if (AMREX_SPACEDIM == 3)
-                        // Filter Ey
-                        filtered_Ey.resize(amrex::convert(tbox,WarpX::Ey_nodal_flag));
-                        eyeli = filtered_Ey.elixir();
-                        nci_godfrey_filter_exeybz[lev-1]->ApplyStencil(filtered_Ey, (*cEy)[pti], filtered_Ey.box());
-                        ceyfab = &filtered_Ey;
-
-                        // Filter Bx
-                        filtered_Bx.resize(amrex::convert(tbox,WarpX::Bx_nodal_flag));
-                        bxeli = filtered_Bx.elixir();
-                        nci_godfrey_filter_bxbyez[lev-1]->ApplyStencil(filtered_Bx, (*cBx)[pti], filtered_Bx.box());
-                        cbxfab = &filtered_Bx;
-
-                        // Filter Bz
-                        filtered_Bz.resize(amrex::convert(tbox,WarpX::Bz_nodal_flag));
-                        bzeli = filtered_Bz.elixir();
-                        nci_godfrey_filter_exeybz[lev-1]->ApplyStencil(filtered_Bz, (*cBz)[pti], filtered_Bz.box());
-                        cbzfab = &filtered_Bz;
-#endif
+                        // Filter arrays (*cEx)[pti], store the result in
+                        // filtered_Ex and update pointer cexfab so that it
+                        // points to filtered_Ex (and do the same for all
+                        // components of E and B)
+                        applyNCIFilter(lev-1, cbox, exeli, eyeli, ezeli, bxeli, byeli, bzeli,
+                                       filtered_Ex, filtered_Ey, filtered_Ez,
+                                       filtered_Bx, filtered_By, filtered_Bz,
+                                       (*cEx)[pti], (*cEy)[pti], (*cEz)[pti],
+                                       (*cBx)[pti], (*cBy)[pti], (*cBz)[pti],
+                                       cexfab, ceyfab, cezfab, cbxfab, cbyfab, cbzfab);
                     }
 
                     // Field gather for particles in gather buffers
@@ -1375,6 +1218,74 @@ PhysicalParticleContainer::Evolve (int lev,
     }
 }
 
+void
+PhysicalParticleContainer::applyNCIFilter (
+    int lev, const Box& box,
+    Elixir& exeli, Elixir& eyeli, Elixir& ezeli,
+    Elixir& bxeli, Elixir& byeli, Elixir& bzeli,
+    FArrayBox& filtered_Ex, FArrayBox& filtered_Ey, FArrayBox& filtered_Ez,
+    FArrayBox& filtered_Bx, FArrayBox& filtered_By, FArrayBox& filtered_Bz,
+    const FArrayBox& Ex, const FArrayBox& Ey, const FArrayBox& Ez,
+    const FArrayBox& Bx, const FArrayBox& By, const FArrayBox& Bz,
+    FArrayBox const * & ex_ptr, FArrayBox const * & ey_ptr,
+    FArrayBox const * & ez_ptr, FArrayBox const * & bx_ptr,
+    FArrayBox const * & by_ptr, FArrayBox const * & bz_ptr)
+{
+
+    // Get instances of NCI Godfrey filters
+    const auto& nci_godfrey_filter_exeybz = WarpX::GetInstance().nci_godfrey_filter_exeybz;
+    const auto& nci_godfrey_filter_bxbyez = WarpX::GetInstance().nci_godfrey_filter_bxbyez;
+
+#if (AMREX_SPACEDIM == 2)
+    const Box& tbox = amrex::grow(box,{static_cast<int>(WarpX::nox),
+                static_cast<int>(WarpX::noz)});
+#else
+    const Box& tbox = amrex::grow(box,{static_cast<int>(WarpX::nox),
+                static_cast<int>(WarpX::noy),
+                static_cast<int>(WarpX::noz)});
+#endif
+
+    // Filter Ex (Both 2D and 3D)
+    filtered_Ex.resize(amrex::convert(tbox,WarpX::Ex_nodal_flag));
+    // Safeguard for GPU
+    exeli = filtered_Ex.elixir();
+    // Apply filter on Ex, result stored in filtered_Ex
+    nci_godfrey_filter_exeybz[lev]->ApplyStencil(filtered_Ex, Ex, filtered_Ex.box());
+    // Update ex_ptr reference
+    ex_ptr = &filtered_Ex;
+
+    // Filter Ez
+    filtered_Ez.resize(amrex::convert(tbox,WarpX::Ez_nodal_flag));
+    ezeli = filtered_Ez.elixir();
+    nci_godfrey_filter_bxbyez[lev]->ApplyStencil(filtered_Ez, Ez, filtered_Ez.box());
+    ez_ptr = &filtered_Ez;
+
+    // Filter By
+    filtered_By.resize(amrex::convert(tbox,WarpX::By_nodal_flag));
+    byeli = filtered_By.elixir();
+    nci_godfrey_filter_bxbyez[lev]->ApplyStencil(filtered_By, By, filtered_By.box());
+    by_ptr = &filtered_By;
+#if (AMREX_SPACEDIM == 3)
+    // Filter Ey
+    filtered_Ey.resize(amrex::convert(tbox,WarpX::Ey_nodal_flag));
+    eyeli = filtered_Ey.elixir();
+    nci_godfrey_filter_exeybz[lev]->ApplyStencil(filtered_Ey, Ey, filtered_Ey.box());
+    ey_ptr = &filtered_Ey;
+
+    // Filter Bx
+    filtered_Bx.resize(amrex::convert(tbox,WarpX::Bx_nodal_flag));
+    bxeli = filtered_Bx.elixir();
+    nci_godfrey_filter_bxbyez[lev]->ApplyStencil(filtered_Bx, Bx, filtered_Bx.box());
+    bx_ptr = &filtered_Bx;
+
+    // Filter Bz
+    filtered_Bz.resize(amrex::convert(tbox,WarpX::Bz_nodal_flag));
+    bzeli = filtered_Bz.elixir();
+    nci_godfrey_filter_exeybz[lev]->ApplyStencil(filtered_Bz, Bz, filtered_Bz.box());
+    bz_ptr = &filtered_Bz;
+#endif
+}
+
 // Loop over all particles in the particle container and
 // split particles tagged with p.id()=DoSplitParticleID
 void
@@ -1382,7 +1293,7 @@ PhysicalParticleContainer::SplitParticles(int lev)
 {
     auto& mypc = WarpX::GetInstance().GetPartContainer();
     auto& pctmp_split = mypc.GetPCtmp();
-    Cuda::ManagedDeviceVector<Real> xp, yp, zp;
+    Cuda::ManagedDeviceVector<ParticleReal> xp, yp, zp;
     RealVector psplit_x, psplit_y, psplit_z, psplit_w;
     RealVector psplit_ux, psplit_uy, psplit_uz;
     long np_split_to_add = 0;
@@ -1398,7 +1309,16 @@ PhysicalParticleContainer::SplitParticles(int lev)
     for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
     {
         pti.GetPosition(xp, yp, zp);
+
+        // offset for split particles is computed as a function of cell size
+        // and number of particles per cell, so that a uniform distribution
+        // before splitting results in a uniform distribution after splitting
+        const amrex::Vector<int> ppc_nd = plasma_injector->num_particles_per_cell_each_dim;
         const std::array<Real,3>& dx = WarpX::CellSize(lev);
+        amrex::Vector<amrex::Real> split_offset = {dx[0]/2./ppc_nd[0],
+                                                   dx[1]/2./ppc_nd[1],
+                                                   dx[2]/2./ppc_nd[2]};
+
         // particle Array Of Structs data
         auto& particles = pti.GetArrayOfStructs();
         // particle Struct Of Arrays data
@@ -1421,9 +1341,9 @@ PhysicalParticleContainer::SplitParticles(int lev)
                     for (int ishift = -1; ishift < 2; ishift +=2 ){
                         for (int kshift = -1; kshift < 2; kshift +=2 ){
                             // Add one particle with offset in x and z
-                            psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
+                            psplit_x.push_back( xp[i] + ishift*split_offset[0] );
                             psplit_y.push_back( yp[i] );
-                            psplit_z.push_back( zp[i] + kshift*dx[2]/2 );
+                            psplit_z.push_back( zp[i] + kshift*split_offset[2] );
                             psplit_ux.push_back( uxp[i] );
                             psplit_uy.push_back( uyp[i] );
                             psplit_uz.push_back( uzp[i] );
@@ -1435,7 +1355,7 @@ PhysicalParticleContainer::SplitParticles(int lev)
                     // 4 particles in 2d
                     for (int ishift = -1; ishift < 2; ishift +=2 ){
                         // Add one particle with offset in x
-                        psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
+                        psplit_x.push_back( xp[i] + ishift*split_offset[0] );
                         psplit_y.push_back( yp[i] );
                         psplit_z.push_back( zp[i] );
                         psplit_ux.push_back( uxp[i] );
@@ -1445,7 +1365,7 @@ PhysicalParticleContainer::SplitParticles(int lev)
                         // Add one particle with offset in z
                         psplit_x.push_back( xp[i] );
                         psplit_y.push_back( yp[i] );
-                        psplit_z.push_back( zp[i] + ishift*dx[2]/2 );
+                        psplit_z.push_back( zp[i] + ishift*split_offset[2] );
                         psplit_ux.push_back( uxp[i] );
                         psplit_uy.push_back( uyp[i] );
                         psplit_uz.push_back( uzp[i] );
@@ -1460,9 +1380,9 @@ PhysicalParticleContainer::SplitParticles(int lev)
                         for (int jshift = -1; jshift < 2; jshift +=2 ){
                             for (int kshift = -1; kshift < 2; kshift +=2 ){
                                 // Add one particle with offset in x, y and z
-                                psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
-                                psplit_y.push_back( yp[i] + jshift*dx[1]/2 );
-                                psplit_z.push_back( zp[i] + kshift*dx[2]/2 );
+                                psplit_x.push_back( xp[i] + ishift*split_offset[0] );
+                                psplit_y.push_back( yp[i] + jshift*split_offset[1] );
+                                psplit_z.push_back( zp[i] + kshift*split_offset[2] );
                                 psplit_ux.push_back( uxp[i] );
                                 psplit_uy.push_back( uyp[i] );
                                 psplit_uz.push_back( uzp[i] );
@@ -1475,7 +1395,7 @@ PhysicalParticleContainer::SplitParticles(int lev)
                     // 6 particles in 3d
                     for (int ishift = -1; ishift < 2; ishift +=2 ){
                         // Add one particle with offset in x
-                        psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
+                        psplit_x.push_back( xp[i] + ishift*split_offset[0] );
                         psplit_y.push_back( yp[i] );
                         psplit_z.push_back( zp[i] );
                         psplit_ux.push_back( uxp[i] );
@@ -1484,7 +1404,7 @@ PhysicalParticleContainer::SplitParticles(int lev)
                         psplit_w.push_back( wp[i]/np_split );
                         // Add one particle with offset in y
                         psplit_x.push_back( xp[i] );
-                        psplit_y.push_back( yp[i] + ishift*dx[1]/2 );
+                        psplit_y.push_back( yp[i] + ishift*split_offset[1] );
                         psplit_z.push_back( zp[i] );
                         psplit_ux.push_back( uxp[i] );
                         psplit_uy.push_back( uyp[i] );
@@ -1493,7 +1413,7 @@ PhysicalParticleContainer::SplitParticles(int lev)
                         // Add one particle with offset in z
                         psplit_x.push_back( xp[i] );
                         psplit_y.push_back( yp[i] );
-                        psplit_z.push_back( zp[i] + ishift*dx[2]/2 );
+                        psplit_z.push_back( zp[i] + ishift*split_offset[2] );
                         psplit_ux.push_back( uxp[i] );
                         psplit_uy.push_back( uyp[i] );
                         psplit_uz.push_back( uzp[i] );
@@ -1531,27 +1451,27 @@ PhysicalParticleContainer::SplitParticles(int lev)
 
 void
 PhysicalParticleContainer::PushPX(WarpXParIter& pti,
-                                  Cuda::ManagedDeviceVector<Real>& xp,
-                                  Cuda::ManagedDeviceVector<Real>& yp,
-                                  Cuda::ManagedDeviceVector<Real>& zp,
+                                  Cuda::ManagedDeviceVector<ParticleReal>& xp,
+                                  Cuda::ManagedDeviceVector<ParticleReal>& yp,
+                                  Cuda::ManagedDeviceVector<ParticleReal>& zp,
                                   Real dt, DtType a_dt_type)
 {
 
     // This wraps the momentum and position advance so that inheritors can modify the call.
     auto& attribs = pti.GetAttribs();
     // Extract pointers to the different particle quantities
-    Real* const AMREX_RESTRICT x = xp.dataPtr();
-    Real* const AMREX_RESTRICT y = yp.dataPtr();
-    Real* const AMREX_RESTRICT z = zp.dataPtr();
-    Real* const AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
-    Real* const AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
-    Real* const AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
-    const Real* const AMREX_RESTRICT Ex = attribs[PIdx::Ex].dataPtr();
-    const Real* const AMREX_RESTRICT Ey = attribs[PIdx::Ey].dataPtr();
-    const Real* const AMREX_RESTRICT Ez = attribs[PIdx::Ez].dataPtr();
-    const Real* const AMREX_RESTRICT Bx = attribs[PIdx::Bx].dataPtr();
-    const Real* const AMREX_RESTRICT By = attribs[PIdx::By].dataPtr();
-    const Real* const AMREX_RESTRICT Bz = attribs[PIdx::Bz].dataPtr();
+    ParticleReal* const AMREX_RESTRICT x = xp.dataPtr();
+    ParticleReal* const AMREX_RESTRICT y = yp.dataPtr();
+    ParticleReal* const AMREX_RESTRICT z = zp.dataPtr();
+    ParticleReal* const AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
+    ParticleReal* const AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
+    ParticleReal* const AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
+    const ParticleReal* const AMREX_RESTRICT Ex = attribs[PIdx::Ex].dataPtr();
+    const ParticleReal* const AMREX_RESTRICT Ey = attribs[PIdx::Ey].dataPtr();
+    const ParticleReal* const AMREX_RESTRICT Ez = attribs[PIdx::Ez].dataPtr();
+    const ParticleReal* const AMREX_RESTRICT Bx = attribs[PIdx::Bx].dataPtr();
+    const ParticleReal* const AMREX_RESTRICT By = attribs[PIdx::By].dataPtr();
+    const ParticleReal* const AMREX_RESTRICT Bz = attribs[PIdx::Bz].dataPtr();
 
     if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags && (a_dt_type!=DtType::SecondHalf))
     {
@@ -1660,15 +1580,15 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
 
             // This wraps the momentum advance so that inheritors can modify the call.
             // Extract pointers to the different particle quantities
-            Real* const AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
-            Real* const AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
-            Real* const AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
-            const Real* const AMREX_RESTRICT Expp = Exp.dataPtr();
-            const Real* const AMREX_RESTRICT Eypp = Eyp.dataPtr();
-            const Real* const AMREX_RESTRICT Ezpp = Ezp.dataPtr();
-            const Real* const AMREX_RESTRICT Bxpp = Bxp.dataPtr();
-            const Real* const AMREX_RESTRICT Bypp = Byp.dataPtr();
-            const Real* const AMREX_RESTRICT Bzpp = Bzp.dataPtr();
+            ParticleReal* const AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
+            ParticleReal* const AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
+            ParticleReal* const AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
+            const ParticleReal* const AMREX_RESTRICT Expp = Exp.dataPtr();
+            const ParticleReal* const AMREX_RESTRICT Eypp = Eyp.dataPtr();
+            const ParticleReal* const AMREX_RESTRICT Ezpp = Ezp.dataPtr();
+            const ParticleReal* const AMREX_RESTRICT Bxpp = Bxp.dataPtr();
+            const ParticleReal* const AMREX_RESTRICT Bypp = Byp.dataPtr();
+            const ParticleReal* const AMREX_RESTRICT Bzpp = Bzp.dataPtr();
 
             // Loop over the particles and update their momentum
             const Real q = this->charge;
@@ -1694,23 +1614,23 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
     }
 }
 
-void PhysicalParticleContainer::copy_attribs(WarpXParIter& pti,const Real* xp,
-                                             const Real* yp, const Real* zp)
+void PhysicalParticleContainer::copy_attribs(WarpXParIter& pti,const ParticleReal* xp,
+                                             const ParticleReal* yp, const ParticleReal* zp)
 {
     auto& attribs = pti.GetAttribs();
-    Real* AMREX_RESTRICT uxp = attribs[PIdx::ux].dataPtr();
-    Real* AMREX_RESTRICT uyp = attribs[PIdx::uy].dataPtr();
-    Real* AMREX_RESTRICT uzp = attribs[PIdx::uz].dataPtr();
+    ParticleReal* AMREX_RESTRICT uxp = attribs[PIdx::ux].dataPtr();
+    ParticleReal* AMREX_RESTRICT uyp = attribs[PIdx::uy].dataPtr();
+    ParticleReal* AMREX_RESTRICT uzp = attribs[PIdx::uz].dataPtr();
 
     const auto np = pti.numParticles();
     const auto lev = pti.GetLevel();
     const auto index = pti.GetPairIndex();
-    Real* AMREX_RESTRICT xpold  = tmp_particle_data[lev][index][TmpIdx::xold ].dataPtr();
-    Real* AMREX_RESTRICT ypold  = tmp_particle_data[lev][index][TmpIdx::yold ].dataPtr();
-    Real* AMREX_RESTRICT zpold  = tmp_particle_data[lev][index][TmpIdx::zold ].dataPtr();
-    Real* AMREX_RESTRICT uxpold = tmp_particle_data[lev][index][TmpIdx::uxold].dataPtr();
-    Real* AMREX_RESTRICT uypold = tmp_particle_data[lev][index][TmpIdx::uyold].dataPtr();
-    Real* AMREX_RESTRICT uzpold = tmp_particle_data[lev][index][TmpIdx::uzold].dataPtr();
+    ParticleReal* AMREX_RESTRICT xpold  = tmp_particle_data[lev][index][TmpIdx::xold ].dataPtr();
+    ParticleReal* AMREX_RESTRICT ypold  = tmp_particle_data[lev][index][TmpIdx::yold ].dataPtr();
+    ParticleReal* AMREX_RESTRICT zpold  = tmp_particle_data[lev][index][TmpIdx::zold ].dataPtr();
+    ParticleReal* AMREX_RESTRICT uxpold = tmp_particle_data[lev][index][TmpIdx::uxold].dataPtr();
+    ParticleReal* AMREX_RESTRICT uypold = tmp_particle_data[lev][index][TmpIdx::uyold].dataPtr();
+    ParticleReal* AMREX_RESTRICT uzpold = tmp_particle_data[lev][index][TmpIdx::uzold].dataPtr();
 
     ParallelFor( np,
                  [=] AMREX_GPU_DEVICE (long i) {
@@ -1929,9 +1849,9 @@ PhysicalParticleContainer::FieldGather (WarpXParIter& pti,
     const Array4<const Real>& by_arr = byfab->array();
     const Array4<const Real>& bz_arr = bzfab->array();
 
-    const Real * const AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
-    const Real * const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
-    const Real * const AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
+    const ParticleReal * const AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
+    const ParticleReal * const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
+    const ParticleReal * const AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
 
     // Lower corner of tile box physical domain
     const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(box, gather_lev);
@@ -2078,15 +1998,15 @@ PhysicalParticleContainer::buildIonizationMask (const amrex::MFIter& mfi, const
     // Otherwise, resize ionization_mask, and get poiters to attribs arrays.
     ionization_mask.resize(np);
     int * const AMREX_RESTRICT p_ionization_mask = ionization_mask.data();
-    const Real * const AMREX_RESTRICT ux = soa.GetRealData(PIdx::ux).data();
-    const Real * const AMREX_RESTRICT uy = soa.GetRealData(PIdx::uy).data();
-    const Real * const AMREX_RESTRICT uz = soa.GetRealData(PIdx::uz).data();
-    const Real * const AMREX_RESTRICT ex = soa.GetRealData(PIdx::Ex).data();
-    const Real * const AMREX_RESTRICT ey = soa.GetRealData(PIdx::Ey).data();
-    const Real * const AMREX_RESTRICT ez = soa.GetRealData(PIdx::Ez).data();
-    const Real * const AMREX_RESTRICT bx = soa.GetRealData(PIdx::Bx).data();
-    const Real * const AMREX_RESTRICT by = soa.GetRealData(PIdx::By).data();
-    const Real * const AMREX_RESTRICT bz = soa.GetRealData(PIdx::Bz).data();
+    const ParticleReal * const AMREX_RESTRICT ux = soa.GetRealData(PIdx::ux).data();
+    const ParticleReal * const AMREX_RESTRICT uy = soa.GetRealData(PIdx::uy).data();
+    const ParticleReal * const AMREX_RESTRICT uz = soa.GetRealData(PIdx::uz).data();
+    const ParticleReal * const AMREX_RESTRICT ex = soa.GetRealData(PIdx::Ex).data();
+    const ParticleReal * const AMREX_RESTRICT ey = soa.GetRealData(PIdx::Ey).data();
+    const ParticleReal * const AMREX_RESTRICT ez = soa.GetRealData(PIdx::Ez).data();
+    const ParticleReal * const AMREX_RESTRICT bx = soa.GetRealData(PIdx::Bx).data();
+    const ParticleReal * const AMREX_RESTRICT by = soa.GetRealData(PIdx::By).data();
+    const ParticleReal * const AMREX_RESTRICT bz = soa.GetRealData(PIdx::Bz).data();
     int* ion_lev = soa.GetIntData(particle_icomps["ionization_level"]).data();
 
     Real c = PhysConst::c;
diff --git a/Source/Particles/Pusher/GetAndSetPosition.H b/Source/Particles/Pusher/GetAndSetPosition.H
index 3c74baeb2..f0dfa4c83 100644
--- a/Source/Particles/Pusher/GetAndSetPosition.H
+++ b/Source/Particles/Pusher/GetAndSetPosition.H
@@ -11,7 +11,7 @@
  *        and stores them in the variables `x`, `y`, `z`. */
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void GetPosition(
-    amrex::Real& x, amrex::Real& y, amrex::Real& z,
+    amrex::ParticleReal& x, amrex::ParticleReal& y, amrex::ParticleReal& z,
     const WarpXParticleContainer::ParticleType& p)
 {
 #if (AMREX_SPACEDIM==3)
@@ -20,7 +20,7 @@ void GetPosition(
     z = p.pos(2);
 #else
     x = p.pos(0);
-    y = std::numeric_limits<amrex::Real>::quiet_NaN();
+    y = std::numeric_limits<amrex::ParticleReal>::quiet_NaN();
     z = p.pos(1);
 #endif
 }
@@ -30,7 +30,7 @@ void GetPosition(
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void SetPosition(
     WarpXParticleContainer::ParticleType& p,
-    const amrex::Real x, const amrex::Real y, const amrex::Real z)
+    const amrex::ParticleReal x, const amrex::ParticleReal y, const amrex::ParticleReal z)
 {
 #if (AMREX_SPACEDIM==3)
     p.pos(0) = x;
@@ -49,10 +49,10 @@ void SetPosition(
  *         and store them in the variables `x`, `y`, `z` */
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void GetCartesianPositionFromCylindrical(
-    amrex::Real& x, amrex::Real& y, amrex::Real& z,
-    const WarpXParticleContainer::ParticleType& p, const amrex::Real theta)
+    amrex::ParticleReal& x, amrex::ParticleReal& y, amrex::ParticleReal& z,
+    const WarpXParticleContainer::ParticleType& p, const amrex::ParticleReal theta)
 {
-    const amrex::Real r = p.pos(0);
+    const amrex::ParticleReal r = p.pos(0);
     x = r*std::cos(theta);
     y = r*std::sin(theta);
     z = p.pos(1);
@@ -63,8 +63,8 @@ void GetCartesianPositionFromCylindrical(
  *        from the values of `x`, `y`, `z` */
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void SetCylindricalPositionFromCartesian(
-    WarpXParticleContainer::ParticleType& p, amrex::Real& theta,
-    const amrex::Real x, const amrex::Real y, const amrex::Real z )
+    WarpXParticleContainer::ParticleType& p, amrex::ParticleReal& theta,
+    const amrex::ParticleReal x, const amrex::ParticleReal y, const amrex::ParticleReal z )
 {
     theta = std::atan2(y, x);
     p.pos(0) = std::sqrt(x*x + y*y);
diff --git a/Source/Particles/Pusher/UpdateMomentumBoris.H b/Source/Particles/Pusher/UpdateMomentumBoris.H
index a33058347..205cc9a71 100644
--- a/Source/Particles/Pusher/UpdateMomentumBoris.H
+++ b/Source/Particles/Pusher/UpdateMomentumBoris.H
@@ -7,9 +7,9 @@
  *    given the value of its momenta `ux`, `uy`, `uz` */
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void UpdateMomentumBoris(
-    amrex::Real& ux, amrex::Real& uy, amrex::Real& uz,
-    const amrex::Real Ex, const amrex::Real Ey, const amrex::Real Ez,
-    const amrex::Real Bx, const amrex::Real By, const amrex::Real Bz,
+    amrex::ParticleReal& ux, amrex::ParticleReal& uy, amrex::ParticleReal& uz,
+    const amrex::ParticleReal Ex, const amrex::ParticleReal Ey, const amrex::ParticleReal Ez,
+    const amrex::ParticleReal Bx, const amrex::ParticleReal By, const amrex::ParticleReal Bz,
     const amrex::Real q, const amrex::Real m, const amrex::Real dt )
 {
     const amrex::Real econst = 0.5*q*dt/m;
diff --git a/Source/Particles/Pusher/UpdateMomentumVay.H b/Source/Particles/Pusher/UpdateMomentumVay.H
index 1f0f19e63..433a891c5 100644
--- a/Source/Particles/Pusher/UpdateMomentumVay.H
+++ b/Source/Particles/Pusher/UpdateMomentumVay.H
@@ -9,9 +9,9 @@
  *    given the value of its momenta `ux`, `uy`, `uz` */
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void UpdateMomentumVay(
-    amrex::Real& ux, amrex::Real& uy, amrex::Real& uz,
-    const amrex::Real Ex, const amrex::Real Ey, const amrex::Real Ez,
-    const amrex::Real Bx, const amrex::Real By, const amrex::Real Bz,
+    amrex::ParticleReal& ux, amrex::ParticleReal& uy, amrex::ParticleReal& uz,
+    const amrex::ParticleReal Ex, const amrex::ParticleReal Ey, const amrex::ParticleReal Ez,
+    const amrex::ParticleReal Bx, const amrex::ParticleReal By, const amrex::ParticleReal Bz,
     const amrex::Real q, const amrex::Real m, const amrex::Real dt )
 {
     // Constants
diff --git a/Source/Particles/Pusher/UpdatePosition.H b/Source/Particles/Pusher/UpdatePosition.H
index a9df63a30..da0e9cdf9 100644
--- a/Source/Particles/Pusher/UpdatePosition.H
+++ b/Source/Particles/Pusher/UpdatePosition.H
@@ -9,8 +9,8 @@
  *    given the value of its momenta `ux`, `uy`, `uz` */
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void UpdatePosition(
-    amrex::Real& x, amrex::Real& y, amrex::Real& z,
-    const amrex::Real ux, const amrex::Real uy, const amrex::Real uz,
+    amrex::ParticleReal& x, amrex::ParticleReal& y, amrex::ParticleReal& z,
+    const amrex::ParticleReal ux, const amrex::ParticleReal uy, const amrex::ParticleReal uz,
     const amrex::Real dt )
 {
 
diff --git a/Source/Particles/Pusher/UpdatePositionPhoton.H b/Source/Particles/Pusher/UpdatePositionPhoton.H
index bd6e6cd21..f95c2b09d 100644
--- a/Source/Particles/Pusher/UpdatePositionPhoton.H
+++ b/Source/Particles/Pusher/UpdatePositionPhoton.H
@@ -10,8 +10,8 @@
  *    given the value of its momenta `ux`, `uy`, `uz` */
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void UpdatePositionPhoton(
-    amrex::Real& x, amrex::Real& y, amrex::Real& z,
-    const amrex::Real ux, const amrex::Real uy, const amrex::Real uz,
+    amrex::ParticleReal& x, amrex::ParticleReal& y, amrex::ParticleReal& z,
+    const amrex::ParticleReal ux, const amrex::ParticleReal uy, const amrex::ParticleReal uz,
     const amrex::Real dt )
 {
     // Compute speed of light over inverse of momentum modulus
diff --git a/Source/Particles/RigidInjectedParticleContainer.H b/Source/Particles/RigidInjectedParticleContainer.H
index be3dd21f9..3abbb4afe 100644
--- a/Source/Particles/RigidInjectedParticleContainer.H
+++ b/Source/Particles/RigidInjectedParticleContainer.H
@@ -44,9 +44,9 @@ public:
                          DtType a_dt_type=DtType::Full) override;
 
     virtual void PushPX(WarpXParIter& pti,
-                        amrex::Cuda::ManagedDeviceVector<amrex::Real>& xp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::Real>& yp,
-                        amrex::Cuda::ManagedDeviceVector<amrex::Real>& zp,
+                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& xp,
+                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& yp,
+                        amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& zp,
                         amrex::Real dt, DtType a_dt_type=DtType::Full) override;
 
     virtual void PushP (int lev, amrex::Real dt,
diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp
index 7d129fc01..891ade76d 100644
--- a/Source/Particles/RigidInjectedParticleContainer.cpp
+++ b/Source/Particles/RigidInjectedParticleContainer.cpp
@@ -76,7 +76,7 @@ RigidInjectedParticleContainer::RemapParticles()
                 // Note that the particles are already in the boosted frame.
                 // This value is saved to advance the particles not injected yet
 
-                Cuda::ManagedDeviceVector<Real> xp, yp, zp;
+                Cuda::ManagedDeviceVector<ParticleReal> xp, yp, zp;
 
                 for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
                 {
@@ -136,7 +136,7 @@ RigidInjectedParticleContainer::BoostandRemapParticles()
 #pragma omp parallel
 #endif
     {
-        Cuda::ManagedDeviceVector<Real> xp, yp, zp;
+        Cuda::ManagedDeviceVector<ParticleReal> xp, yp, zp;
 
         for (WarpXParIter pti(*this, 0); pti.isValid(); ++pti)
         {
@@ -207,9 +207,9 @@ RigidInjectedParticleContainer::BoostandRemapParticles()
 
 void
 RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
-                                       Cuda::ManagedDeviceVector<Real>& xp,
-                                       Cuda::ManagedDeviceVector<Real>& yp,
-                                       Cuda::ManagedDeviceVector<Real>& zp,
+                                       Cuda::ManagedDeviceVector<ParticleReal>& xp,
+                                       Cuda::ManagedDeviceVector<ParticleReal>& yp,
+                                       Cuda::ManagedDeviceVector<ParticleReal>& zp,
                                        Real dt, DtType a_dt_type)
 {
 
@@ -220,21 +220,21 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
     auto& uzp = attribs[PIdx::uz];
 
     // Save the position and momenta, making copies
-    Cuda::ManagedDeviceVector<Real> xp_save, yp_save, zp_save;
+    Cuda::ManagedDeviceVector<ParticleReal> xp_save, yp_save, zp_save;
     RealVector uxp_save, uyp_save, uzp_save;
 
-    Real* const AMREX_RESTRICT x = xp.dataPtr();
-    Real* const AMREX_RESTRICT y = yp.dataPtr();
-    Real* const AMREX_RESTRICT z = zp.dataPtr();
-    Real* const AMREX_RESTRICT ux = uxp.dataPtr();
-    Real* const AMREX_RESTRICT uy = uyp.dataPtr();
-    Real* const AMREX_RESTRICT uz = uzp.dataPtr();
-    Real* const AMREX_RESTRICT Exp = attribs[PIdx::Ex].dataPtr();
-    Real* const AMREX_RESTRICT Eyp = attribs[PIdx::Ey].dataPtr();
-    Real* const AMREX_RESTRICT Ezp = attribs[PIdx::Ez].dataPtr();
-    Real* const AMREX_RESTRICT Bxp = attribs[PIdx::Bx].dataPtr();
-    Real* const AMREX_RESTRICT Byp = attribs[PIdx::By].dataPtr();
-    Real* const AMREX_RESTRICT Bzp = attribs[PIdx::Bz].dataPtr();
+    ParticleReal* const AMREX_RESTRICT x = xp.dataPtr();
+    ParticleReal* const AMREX_RESTRICT y = yp.dataPtr();
+    ParticleReal* const AMREX_RESTRICT z = zp.dataPtr();
+    ParticleReal* const AMREX_RESTRICT ux = uxp.dataPtr();
+    ParticleReal* const AMREX_RESTRICT uy = uyp.dataPtr();
+    ParticleReal* const AMREX_RESTRICT uz = uzp.dataPtr();
+    ParticleReal* const AMREX_RESTRICT Exp = attribs[PIdx::Ex].dataPtr();
+    ParticleReal* const AMREX_RESTRICT Eyp = attribs[PIdx::Ey].dataPtr();
+    ParticleReal* const AMREX_RESTRICT Ezp = attribs[PIdx::Ez].dataPtr();
+    ParticleReal* const AMREX_RESTRICT Bxp = attribs[PIdx::Bx].dataPtr();
+    ParticleReal* const AMREX_RESTRICT Byp = attribs[PIdx::By].dataPtr();
+    ParticleReal* const AMREX_RESTRICT Bzp = attribs[PIdx::Bz].dataPtr();
 
     if (!done_injecting_lev) {
         // If the old values are not already saved, create copies here.
@@ -271,12 +271,12 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
 
     if (!done_injecting_lev) {
 
-        Real* AMREX_RESTRICT x_save = xp_save.dataPtr();
-        Real* AMREX_RESTRICT y_save = yp_save.dataPtr();
-        Real* AMREX_RESTRICT z_save = zp_save.dataPtr();
-        Real* AMREX_RESTRICT ux_save = uxp_save.dataPtr();
-        Real* AMREX_RESTRICT uy_save = uyp_save.dataPtr();
-        Real* AMREX_RESTRICT uz_save = uzp_save.dataPtr();
+        ParticleReal* AMREX_RESTRICT x_save = xp_save.dataPtr();
+        ParticleReal* AMREX_RESTRICT y_save = yp_save.dataPtr();
+        ParticleReal* AMREX_RESTRICT z_save = zp_save.dataPtr();
+        ParticleReal* AMREX_RESTRICT ux_save = uxp_save.dataPtr();
+        ParticleReal* AMREX_RESTRICT uy_save = uyp_save.dataPtr();
+        ParticleReal* AMREX_RESTRICT uz_save = uzp_save.dataPtr();
 
         // Undo the push for particles not injected yet.
         // The zp are advanced a fixed amount.
@@ -415,16 +415,16 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
 
             // This wraps the momentum advance so that inheritors can modify the call.
             // Extract pointers to the different particle quantities
-            const Real* const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr();
-            Real* const AMREX_RESTRICT uxpp = uxp.dataPtr();
-            Real* const AMREX_RESTRICT uypp = uyp.dataPtr();
-            Real* const AMREX_RESTRICT uzpp = uzp.dataPtr();
-            const Real* const AMREX_RESTRICT Expp = Exp.dataPtr();
-            const Real* const AMREX_RESTRICT Eypp = Eyp.dataPtr();
-            const Real* const AMREX_RESTRICT Ezpp = Ezp.dataPtr();
-            const Real* const AMREX_RESTRICT Bxpp = Bxp.dataPtr();
-            const Real* const AMREX_RESTRICT Bypp = Byp.dataPtr();
-            const Real* const AMREX_RESTRICT Bzpp = Bzp.dataPtr();
+            const ParticleReal* const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr();
+            ParticleReal* const AMREX_RESTRICT uxpp = uxp.dataPtr();
+            ParticleReal* const AMREX_RESTRICT uypp = uyp.dataPtr();
+            ParticleReal* const AMREX_RESTRICT uzpp = uzp.dataPtr();
+            const ParticleReal* const AMREX_RESTRICT Expp = Exp.dataPtr();
+            const ParticleReal* const AMREX_RESTRICT Eypp = Eyp.dataPtr();
+            const ParticleReal* const AMREX_RESTRICT Ezpp = Ezp.dataPtr();
+            const ParticleReal* const AMREX_RESTRICT Bxpp = Bxp.dataPtr();
+            const ParticleReal* const AMREX_RESTRICT Bypp = Byp.dataPtr();
+            const ParticleReal* const AMREX_RESTRICT Bzpp = Bzp.dataPtr();
 
             // Loop over the particles and update their momentum
             const Real q = this->charge;
@@ -450,10 +450,10 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
             // Undo the push for particles not injected yet.
             // It is assumed that PushP will only be called on the first and last steps
             // and that no particles will cross zinject_plane.
-            const Real* const AMREX_RESTRICT ux_save = uxp_save.dataPtr();
-            const Real* const AMREX_RESTRICT uy_save = uyp_save.dataPtr();
-            const Real* const AMREX_RESTRICT uz_save = uzp_save.dataPtr();
-            const Real zz = zinject_plane_levels[lev];
+            const ParticleReal* const AMREX_RESTRICT ux_save = uxp_save.dataPtr();
+            const ParticleReal* const AMREX_RESTRICT uy_save = uyp_save.dataPtr();
+            const ParticleReal* const AMREX_RESTRICT uz_save = uzp_save.dataPtr();
+            const ParticleReal zz = zinject_plane_levels[lev];
             amrex::ParallelFor( pti.numParticles(),
                 [=] AMREX_GPU_DEVICE (long i) {
                 if (zp[i] <= zz) {
diff --git a/Source/Particles/Sorting/Make.package b/Source/Particles/Sorting/Make.package
new file mode 100644
index 000000000..750d2607e
--- /dev/null
+++ b/Source/Particles/Sorting/Make.package
@@ -0,0 +1,3 @@
+CEXE_sources += Partition.cpp
+INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Sorting
+VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Particles/Sorting
diff --git a/Source/Particles/Sorting/Partition.cpp b/Source/Particles/Sorting/Partition.cpp
new file mode 100644
index 000000000..683dbbd04
--- /dev/null
+++ b/Source/Particles/Sorting/Partition.cpp
@@ -0,0 +1,141 @@
+#include <WarpX.H>
+#include <PhysicalParticleContainer.H>
+#include <AMReX_Particles.H>
+
+using namespace amrex;
+
+/* \brief Determine which particles deposit/gather in the buffer, and
+ *        and reorder the particle arrays accordingly
+ *
+ *  More specifically:
+ *  - Modify `nfine_current` and `nfine_gather` (in place)
+ *     so that they correspond to the number of particles
+ *     that deposit/gather in the fine patch respectively.
+ *  - Reorder the particle arrays,
+ *     so that the `nfine_current`/`nfine_gather` first particles
+ *     deposit/gather in the fine patch
+ *     and (thus) the `np-nfine_current`/`np-nfine_gather` last particles
+ *     deposit/gather in the buffer
+ *
+ * \param nfine_current number of particles that deposit to the fine patch
+ *         (modified by this function)
+ * \param nfine_gather number of particles that gather into the fine patch
+ *         (modified by this function)
+ * \param np total number of particles in this tile
+ * \param pti object that holds the particle information for this tile
+ * \param lev current refinement level
+ * \param current_masks indicates, for each cell, whether that cell is
+ *       in the deposition buffers or in the interior of the fine patch
+ * \param gather_masks indicates, for each cell, whether that cell is
+ *       in the gather buffers or in the interior of the fine patch
+ * \param uxp, uyp, uzp, wp references to the particle momenta and weight
+ *         (modified by this function)
+ */
+void
+PhysicalParticleContainer::PartitionParticlesInBuffers(
+    long& nfine_current, long& nfine_gather, long const np,
+    WarpXParIter& pti, int const lev,
+    iMultiFab const* current_masks,
+    iMultiFab const* gather_masks,
+    RealVector& uxp, RealVector& uyp, RealVector& uzp, RealVector& wp)
+{
+    BL_PROFILE("PPC::Evolve::partition");
+
+    std::vector<bool> inexflag;
+    inexflag.resize(np);
+
+    auto& aos = pti.GetArrayOfStructs();
+
+    // We need to partition the large buffer first
+    iMultiFab const* bmasks =
+        (WarpX::n_field_gather_buffer >= WarpX::n_current_deposition_buffer) ?
+        gather_masks : current_masks;
+    int i = 0;
+    const auto& msk = (*bmasks)[pti];
+    for (const auto& p : aos) {
+        const IntVect& iv = Index(p, lev);
+        inexflag[i++] = msk(iv);
+    }
+
+    Vector<long> pid;
+    pid.resize(np);
+    std::iota(pid.begin(), pid.end(), 0L);
+    auto sep = std::stable_partition(pid.begin(), pid.end(),
+                                     [&inexflag](long id) { return inexflag[id]; });
+
+    if (WarpX::n_current_deposition_buffer == WarpX::n_field_gather_buffer) {
+        nfine_current = nfine_gather = std::distance(pid.begin(), sep);
+    } else if (sep != pid.end()) {
+        int n_buf;
+        if (bmasks == gather_masks) {
+            nfine_gather = std::distance(pid.begin(), sep);
+            bmasks = current_masks;
+            n_buf = WarpX::n_current_deposition_buffer;
+        } else {
+            nfine_current = std::distance(pid.begin(), sep);
+            bmasks = gather_masks;
+            n_buf = WarpX::n_field_gather_buffer;
+        }
+        if (n_buf > 0)
+        {
+            const auto& msk2 = (*bmasks)[pti];
+            for (auto it = sep; it != pid.end(); ++it) {
+                const long id = *it;
+                const IntVect& iv = Index(aos[id], lev);
+                inexflag[id] = msk2(iv);
+            }
+
+            auto sep2 = std::stable_partition(sep, pid.end(),
+                                              [&inexflag](long id) {return inexflag[id];});
+            if (bmasks == gather_masks) {
+                nfine_gather = std::distance(pid.begin(), sep2);
+            } else {
+                nfine_current = std::distance(pid.begin(), sep2);
+            }
+        }
+    }
+
+    // only deposit / gather to coarsest grid
+    if (m_deposit_on_main_grid && lev > 0) {
+        nfine_current = 0;
+    }
+    if (m_gather_from_main_grid && lev > 0) {
+        nfine_gather = 0;
+    }
+
+    if (nfine_current != np || nfine_gather != np)
+    {
+
+        ParticleVector particle_tmp;
+        particle_tmp.resize(np);
+
+        for (long ip = 0; ip < np; ++ip) {
+            particle_tmp[ip] = aos[pid[ip]];
+        }
+        std::swap(aos(), particle_tmp);
+
+        RealVector tmp;
+        tmp.resize(np);
+
+        for (long ip = 0; ip < np; ++ip) {
+            tmp[ip] = wp[pid[ip]];
+        }
+        std::swap(wp, tmp);
+
+        for (long ip = 0; ip < np; ++ip) {
+            tmp[ip] = uxp[pid[ip]];
+        }
+        std::swap(uxp, tmp);
+
+        for (long ip = 0; ip < np; ++ip) {
+            tmp[ip] = uyp[pid[ip]];
+        }
+        std::swap(uyp, tmp);
+
+        for (long ip = 0; ip < np; ++ip) {
+            tmp[ip] = uzp[pid[ip]];
+        }
+        std::swap(uzp, tmp);
+    }
+
+}
diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H
index ee75bc511..7b0d2d1d0 100644
--- a/Source/Particles/WarpXParticleContainer.H
+++ b/Source/Particles/WarpXParticleContainer.H
@@ -68,12 +68,12 @@ public:
     WarpXParIter (ContainerType& pc, int level);
 
 #if (AMREX_SPACEDIM == 2)
-    void GetPosition (amrex::Cuda::ManagedDeviceVector<amrex::Real>& x,
-                      amrex::Cuda::ManagedDeviceVector<amrex::Real>& y,
-                      amrex::Cuda::ManagedDeviceVector<amrex::Real>& z) const;
-    void SetPosition (const amrex::Cuda::ManagedDeviceVector<amrex::Real>& x,
-                      const amrex::Cuda::ManagedDeviceVector<amrex::Real>& y,
-                      const amrex::Cuda::ManagedDeviceVector<amrex::Real>& z);
+    void GetPosition (amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& x,
+                      amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& y,
+                      amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& z) const;
+    void SetPosition (const amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& x,
+                      const amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& y,
+                      const amrex::Cuda::ManagedDeviceVector<amrex::ParticleReal>& z);
 #endif
     const std::array<RealVector, PIdx::nattribs>& GetAttribs () const {
         return GetStructOfArrays().GetRealData();
@@ -104,7 +104,7 @@ class WarpXParticleContainer
 public:
     friend MultiParticleContainer;
 
-    // amrex::StructOfArrays with DiagIdx::nattribs amrex::Real components
+    // amrex::StructOfArrays with DiagIdx::nattribs amrex::ParticleReal components
     // and 0 int components for the particle data.
     using DiagnosticParticleData = amrex::StructOfArrays<DiagIdx::nattribs, 0>;
     // DiagnosticParticles is a vector, with one element per MR level.
@@ -232,17 +232,17 @@ public:
     amrex::Real maxParticleVelocity(bool local = false);
 
     void AddNParticles (int lev,
-                        int n, const amrex::Real* x, const amrex::Real* y, const amrex::Real* z,
-                        const amrex::Real* vx, const amrex::Real* vy, const amrex::Real* vz,
-                        int nattr, const amrex::Real* attr, int uniqueparticles, int id=-1);
+                        int n, const amrex::ParticleReal* x, const amrex::ParticleReal* y, const amrex::ParticleReal* z,
+                        const amrex::ParticleReal* vx, const amrex::ParticleReal* vy, const amrex::ParticleReal* vz,
+                        int nattr, const amrex::ParticleReal* attr, int uniqueparticles, int id=-1);
 
     void AddOneParticle (int lev, int grid, int tile,
-                         amrex::Real x, amrex::Real y, amrex::Real z,
-                         std::array<amrex::Real,PIdx::nattribs>& attribs);
+                         amrex::ParticleReal x, amrex::ParticleReal y, amrex::ParticleReal z,
+                         std::array<amrex::ParticleReal,PIdx::nattribs>& attribs);
 
     void AddOneParticle (ParticleTileType& particle_tile,
-                         amrex::Real x, amrex::Real y, amrex::Real z,
-                         std::array<amrex::Real,PIdx::nattribs>& attribs);
+                         amrex::ParticleReal x, amrex::ParticleReal y, amrex::ParticleReal z,
+                         std::array<amrex::ParticleReal,PIdx::nattribs>& attribs);
 
     virtual void ReadHeader (std::istream& is);
 
@@ -326,7 +326,7 @@ protected:
     amrex::Vector<amrex::FArrayBox> local_jy;
     amrex::Vector<amrex::FArrayBox> local_jz;
 
-    using DataContainer = amrex::Gpu::ManagedDeviceVector<amrex::Real>;
+    using DataContainer = amrex::Gpu::ManagedDeviceVector<amrex::ParticleReal>;
     using PairIndex = std::pair<int, int>;
 
     amrex::Vector<DataContainer> m_xp, m_yp, m_zp;
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 176c147da..65a82f233 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -25,7 +25,9 @@ WarpXParIter::WarpXParIter (ContainerType& pc, int level)
 
 #if (AMREX_SPACEDIM == 2)
 void
-WarpXParIter::GetPosition (Cuda::ManagedDeviceVector<Real>& x, Cuda::ManagedDeviceVector<Real>& y, Cuda::ManagedDeviceVector<Real>& z) const
+WarpXParIter::GetPosition (Gpu::ManagedDeviceVector<ParticleReal>& x,
+                           Gpu::ManagedDeviceVector<ParticleReal>& y,
+                           Gpu::ManagedDeviceVector<ParticleReal>& z) const
 {
     amrex::ParIter<0,0,PIdx::nattribs>::GetPosition(x, z);
 #ifdef WARPX_DIM_RZ
@@ -38,17 +40,19 @@ WarpXParIter::GetPosition (Cuda::ManagedDeviceVector<Real>& x, Cuda::ManagedDevi
         x[i] = x[i]*std::cos(theta[i]);
     }
 #else
-    y.resize(x.size(), std::numeric_limits<Real>::quiet_NaN());
+    y.resize(x.size(), std::numeric_limits<ParticleReal>::quiet_NaN());
 #endif
 }
 
 void
-WarpXParIter::SetPosition (const Cuda::ManagedDeviceVector<Real>& x, const Cuda::ManagedDeviceVector<Real>& y, const Cuda::ManagedDeviceVector<Real>& z)
+WarpXParIter::SetPosition (const Gpu::ManagedDeviceVector<ParticleReal>& x,
+                           const Gpu::ManagedDeviceVector<ParticleReal>& y,
+                           const Gpu::ManagedDeviceVector<ParticleReal>& z)
 {
 #ifdef WARPX_DIM_RZ
     auto& attribs = GetAttribs();
     auto& theta = attribs[PIdx::theta];
-    Cuda::ManagedDeviceVector<Real> r(x.size());
+    Gpu::ManagedDeviceVector<ParticleReal> r(x.size());
     for (unsigned int i=0 ; i < x.size() ; i++) {
         theta[i] = std::atan2(y[i], x[i]);
         r[i] = std::sqrt(x[i]*x[i] + y[i]*y[i]);
@@ -132,8 +136,8 @@ WarpXParticleContainer::AllocData ()
 
 void
 WarpXParticleContainer::AddOneParticle (int lev, int grid, int tile,
-                                        Real x, Real y, Real z,
-                                        std::array<Real,PIdx::nattribs>& attribs)
+                                        ParticleReal x, ParticleReal y, ParticleReal z,
+                                        std::array<ParticleReal,PIdx::nattribs>& attribs)
 {
     auto& particle_tile = DefineAndReturnParticleTile(lev, grid, tile);
     AddOneParticle(particle_tile, x, y, z, attribs);
@@ -141,8 +145,8 @@ WarpXParticleContainer::AddOneParticle (int lev, int grid, int tile,
 
 void
 WarpXParticleContainer::AddOneParticle (ParticleTileType& particle_tile,
-                                        Real x, Real y, Real z,
-                                        std::array<Real,PIdx::nattribs>& attribs)
+                                        ParticleReal x, ParticleReal y, ParticleReal z,
+                                        std::array<ParticleReal,PIdx::nattribs>& attribs)
 {
     ParticleType p;
     p.id()  = ParticleType::NextID();
@@ -171,12 +175,12 @@ WarpXParticleContainer::AddOneParticle (ParticleTileType& particle_tile,
 
 void
 WarpXParticleContainer::AddNParticles (int lev,
-                                       int n, const Real* x, const Real* y, const Real* z,
-                                       const Real* vx, const Real* vy, const Real* vz,
-                                       int nattr, const Real* attr, int uniqueparticles, int id)
+                                       int n, const ParticleReal* x, const ParticleReal* y, const ParticleReal* z,
+                                       const ParticleReal* vx, const ParticleReal* vy, const ParticleReal* vz,
+                                       int nattr, const ParticleReal* attr, int uniqueparticles, int id)
 {
     BL_ASSERT(nattr == 1);
-    const Real* weight = attr;
+    const ParticleReal* weight = attr;
 
     int ibegin, iend;
     if (uniqueparticles) {
@@ -204,7 +208,7 @@ WarpXParticleContainer::AddNParticles (int lev,
     std::size_t np = iend-ibegin;
 
 #ifdef WARPX_DIM_RZ
-    Vector<Real> theta(np);
+    Vector<ParticleReal> theta(np);
 #endif
 
     for (int i = ibegin; i < iend; ++i)
@@ -253,7 +257,7 @@ WarpXParticleContainer::AddNParticles (int lev,
         {
 #ifdef WARPX_DIM_RZ
             if (comp == PIdx::theta) {
-                particle_tile.push_back_real(comp, theta.front(), theta.back());
+                particle_tile.push_back_real(comp, theta.data(), theta.data() + np);
             }
             else {
                 particle_tile.push_back_real(comp, np, 0.0);
@@ -362,9 +366,9 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
     // CPU, tiling: deposit into local_jx
     // (same for jx and jz)
 
-    Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
-    Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
-    Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
+    ParticleReal* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
+    ParticleReal* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
+    ParticleReal* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
 
     // Lower corner of tile box physical domain
     // Note that this includes guard cells since it is after tilebox.ngrow
@@ -503,9 +507,9 @@ WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector& wp,
     // GPU, no tiling: deposit directly in rho
     // CPU, tiling: deposit into local_rho
 
-    Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
-    Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
-    Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
+    ParticleReal* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
+    ParticleReal* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
+    ParticleReal* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
 
     // Lower corner of tile box physical domain
     // Note that this includes guard cells since it is after tilebox.ngrow
@@ -731,7 +735,7 @@ std::array<Real, 3> WarpXParticleContainer::meanParticleVelocity(bool local) {
 
 Real WarpXParticleContainer::maxParticleVelocity(bool local) {
 
-    amrex::Real max_v = 0.0;
+    amrex::ParticleReal max_v = 0.0;
 
     for (int lev = 0; lev <= finestLevel(); ++lev)
     {
@@ -745,12 +749,12 @@ Real WarpXParticleContainer::maxParticleVelocity(bool local) {
             auto& uy = pti.GetAttribs(PIdx::uy);
             auto& uz = pti.GetAttribs(PIdx::uz);
             for (unsigned long i = 0; i < ux.size(); i++) {
-                max_v = std::max(max_v, sqrt(ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i]));
+                max_v = std::max(max_v, std::sqrt(ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i]));
             }
         }
     }
 
-    if (!local) ParallelDescriptor::ReduceRealMax(max_v);
+    if (!local) ParallelAllReduce::Max(max_v, ParallelDescriptor::Communicator());
     return max_v;
 }
 
@@ -819,17 +823,17 @@ WarpXParticleContainer::PushX (int lev, Real dt)
             ParticleType * AMREX_RESTRICT pstructs = &(pti.GetArrayOfStructs()[0]);
             // - momenta are stored as a struct of array, in `attribs`
             auto& attribs = pti.GetAttribs();
-            Real* AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
-            Real* AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
-            Real* AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
+            ParticleReal* AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
+            ParticleReal* AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
+            ParticleReal* AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
 #ifdef WARPX_DIM_RZ
-            Real* AMREX_RESTRICT theta = attribs[PIdx::theta].dataPtr();
+            ParticleReal* AMREX_RESTRICT theta = attribs[PIdx::theta].dataPtr();
 #endif
             // Loop over the particles and update their position
             amrex::ParallelFor( pti.numParticles(),
                 [=] AMREX_GPU_DEVICE (long i) {
                     ParticleType& p = pstructs[i]; // Particle object that gets updated
-                    Real x, y, z; // Temporary variables
+                    ParticleReal x, y, z; // Temporary variables
 #ifndef WARPX_DIM_RZ
                     GetPosition( x, y, z, p ); // Initialize x, y, z
                     UpdatePosition( x, y, z, ux[i], uy[i], uz[i], dt);
diff --git a/Source/Particles/deposit_cic.F90 b/Source/Particles/deposit_cic.F90
index 1fe80016f..2831ce96b 100644
--- a/Source/Particles/deposit_cic.F90
+++ b/Source/Particles/deposit_cic.F90
@@ -1,7 +1,7 @@
 module warpx_ES_deposit_cic
 
   use iso_c_binding
-  use amrex_fort_module, only : amrex_real
+  use amrex_fort_module, only : amrex_real, amrex_particle_real
 
   implicit none
 
@@ -28,8 +28,8 @@ contains
                                   ng)                                    &
        bind(c,name='warpx_deposit_cic_3d')
     integer, value,   intent(in)     :: ns, np
-    real(amrex_real), intent(in)     :: particles(ns,np)
-    real(amrex_real), intent(in)     :: weights(np)
+    real(amrex_particle_real), intent(in)     :: particles(ns,np)
+    real(amrex_particle_real), intent(in)     :: weights(np)
     real(amrex_real), intent(in)     :: charge
     integer,          intent(in)     :: lo(3)
     integer,          intent(in)     :: hi(3)
@@ -86,8 +86,8 @@ contains
                                   ng)                                    &
        bind(c,name='warpx_deposit_cic_2d')
     integer, value,   intent(in)     :: ns, np
-    real(amrex_real), intent(in)     :: particles(ns,np)
-    real(amrex_real), intent(in)     :: weights(np)
+    real(amrex_particle_real), intent(in)     :: particles(ns,np)
+    real(amrex_particle_real), intent(in)     :: weights(np)
     real(amrex_real), intent(in)     :: charge
     integer,          intent(in)     :: lo(2)
     integer,          intent(in)     :: hi(2)
diff --git a/Source/Particles/interpolate_cic.F90 b/Source/Particles/interpolate_cic.F90
index 005ab35f4..3eb361d2f 100644
--- a/Source/Particles/interpolate_cic.F90
+++ b/Source/Particles/interpolate_cic.F90
@@ -1,7 +1,7 @@
 module warpx_ES_interpolate_cic
 
   use iso_c_binding
-  use amrex_fort_module, only : amrex_real
+  use amrex_fort_module, only : amrex_real, amrex_particle_real
 
   implicit none
 
@@ -31,7 +31,7 @@ contains
                                       lo, hi, plo, dx, ng)    &
        bind(c,name='warpx_interpolate_cic_3d')
     integer, value,   intent(in)     :: ns, np
-    real(amrex_real), intent(in)     :: particles(ns,np)
+    real(amrex_particle_real), intent(in)     :: particles(ns,np)
     real(amrex_real), intent(inout)  :: Ex_p(np), Ey_p(np), Ez_p(np)
     integer,          intent(in)     :: ng
     integer,          intent(in)     :: lo(3)
@@ -103,7 +103,7 @@ contains
                              lo, hi, plo, dx, ng)    &
        bind(c,name='warpx_interpolate_cic_2d')
     integer, value,   intent(in)     :: ns, np
-    real(amrex_real), intent(in)     :: particles(ns,np)
+    real(amrex_particle_real), intent(in)     :: particles(ns,np)
     real(amrex_real), intent(inout)  :: Ex_p(np), Ey_p(np)
     integer,          intent(in)     :: ng
     integer,          intent(in)     :: lo(2)
@@ -157,7 +157,7 @@ contains
                                                  plo,  ng,   lev)        &
        bind(c,name='warpx_interpolate_cic_two_levels_3d')
     integer, value,   intent(in)     :: ns, np
-    real(amrex_real), intent(in)     :: particles(ns,np)
+    real(amrex_particle_real), intent(in)     :: particles(ns,np)
     real(amrex_real), intent(inout)  :: Ex_p(np), Ey_p(np), Ez_p(np)
     integer,          intent(in)     :: ng, lev
     integer,          intent(in)     :: lo(3), hi(3)
@@ -290,7 +290,7 @@ contains
                                                  plo,  ng,   lev)        &
        bind(c,name='warpx_interpolate_cic_two_levels_2d')
     integer, value,   intent(in)     :: ns, np
-    real(amrex_real), intent(in)     :: particles(ns,np)
+    real(amrex_particle_real), intent(in)     :: particles(ns,np)
     real(amrex_real), intent(inout)  :: Ex_p(np), Ey_p(np)
     integer,          intent(in)     :: ng, lev
     integer,          intent(in)     :: lo(2), hi(2)
diff --git a/Source/Particles/push_particles_ES.F90 b/Source/Particles/push_particles_ES.F90
index 53dd3c181..b84f48d5f 100644
--- a/Source/Particles/push_particles_ES.F90
+++ b/Source/Particles/push_particles_ES.F90
@@ -1,7 +1,7 @@
 module warpx_ES_push_particles
 
   use iso_c_binding
-  use amrex_fort_module, only : amrex_real
+  use amrex_fort_module, only : amrex_real, amrex_particle_real
 
   implicit none
 
@@ -36,8 +36,8 @@ contains
                                     prob_lo, prob_hi)       &
        bind(c,name='warpx_push_leapfrog_3d')
     integer, value,   intent(in)     :: ns, np
-    real(amrex_real), intent(inout)  :: particles(ns,np)
-    real(amrex_real), intent(inout)  :: vx_p(np), vy_p(np), vz_p(np)
+    real(amrex_particle_real), intent(inout)  :: particles(ns,np)
+    real(amrex_particle_real), intent(inout)  :: vx_p(np), vy_p(np), vz_p(np)
     real(amrex_real), intent(in)     :: Ex_p(np), Ey_p(np), Ez_p(np)
     real(amrex_real), intent(in)     :: charge
     real(amrex_real), intent(in)     :: mass
@@ -100,8 +100,8 @@ contains
                                     prob_lo, prob_hi)       &
        bind(c,name='warpx_push_leapfrog_2d')
     integer, value,   intent(in)     :: ns, np
-    real(amrex_real), intent(inout)  :: particles(ns,np)
-    real(amrex_real), intent(inout)  :: vx_p(np), vy_p(np)
+    real(amrex_particle_real), intent(inout)  :: particles(ns,np)
+    real(amrex_particle_real), intent(inout)  :: vx_p(np), vy_p(np)
     real(amrex_real), intent(in)     :: Ex_p(np), Ey_p(np)
     real(amrex_real), intent(in)     :: charge
     real(amrex_real), intent(in)     :: mass
@@ -167,8 +167,8 @@ contains
                                              prob_lo, prob_hi)       &
        bind(c,name='warpx_push_leapfrog_positions_3d')
     integer, value,   intent(in)    :: ns, np
-    real(amrex_real), intent(inout) :: particles(ns,np)
-    real(amrex_real), intent(inout) :: vx_p(np), vy_p(np), vz_p(np)
+    real(amrex_particle_real), intent(inout) :: particles(ns,np)
+    real(amrex_particle_real), intent(inout) :: vx_p(np), vy_p(np), vz_p(np)
     real(amrex_real), intent(in)    :: dt
     real(amrex_real), intent(in)    :: prob_lo(3), prob_hi(3)
 
@@ -219,8 +219,8 @@ contains
                                               prob_lo, prob_hi)       &
        bind(c,name='warpx_push_leapfrog_positions_2d')
     integer, value,   intent(in)    :: ns, np
-    real(amrex_real), intent(inout) :: particles(ns,np)
-    real(amrex_real), intent(inout) :: vx_p(np), vy_p(np)
+    real(amrex_particle_real), intent(inout) :: particles(ns,np)
+    real(amrex_particle_real), intent(inout) :: vx_p(np), vy_p(np)
     real(amrex_real), intent(in)    :: dt
     real(amrex_real), intent(in)    :: prob_lo(2), prob_hi(2)
 
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index a844e7aa0..116600c8b 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -103,7 +103,7 @@ IntVect WarpX::jz_nodal_flag(1,0);  // z is the second dimension to 2D AMReX
 
 IntVect WarpX::filter_npass_each_dir(1);
 
-int WarpX::n_field_gather_buffer = 0;
+int WarpX::n_field_gather_buffer = -1;
 int WarpX::n_current_deposition_buffer = -1;
 
 int WarpX::do_nodal = false;
@@ -149,8 +149,8 @@ WarpX::WarpX ()
 #endif
 
     t_new.resize(nlevs_max, 0.0);
-    t_old.resize(nlevs_max, -1.e100);
-    dt.resize(nlevs_max, 1.e100);
+    t_old.resize(nlevs_max, std::numeric_limits<Real>::lowest());
+    dt.resize(nlevs_max, std::numeric_limits<Real>::max());
 
     // Particle Container
     mypc = std::unique_ptr<MultiParticleContainer> (new MultiParticleContainer(this));
@@ -730,11 +730,19 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d
 
     if (mypc->nSpeciesDepositOnMainGrid() && n_current_deposition_buffer == 0) {
         n_current_deposition_buffer = 1;
+        // This forces the allocation of buffers and allows the code associated
+        // with buffers to run. But the buffer size of `1` is in fact not used,
+        // `deposit_on_main_grid` forces all particles (whether or not they
+        // are in buffers) to deposition on the main grid.
     }
 
     if (n_current_deposition_buffer < 0) {
         n_current_deposition_buffer = ngJ.max();
     }
+    if (n_field_gather_buffer < 0) {
+        // Field gather buffer should be larger than current deposition buffers
+        n_field_gather_buffer = n_current_deposition_buffer + 1;
+    }
 
     int ngF = (do_moving_window) ? 2 : 0;
     // CKC solver requires one additional guard cell
@@ -988,7 +996,7 @@ WarpX::LowerCorner(const Box& bx, int lev)
 #if (AMREX_SPACEDIM == 3)
     return { xyzmin[0], xyzmin[1], xyzmin[2] };
 #elif (AMREX_SPACEDIM == 2)
-    return { xyzmin[0], -1.e100, xyzmin[1] };
+    return { xyzmin[0], std::numeric_limits<Real>::lowest(), xyzmin[1] };
 #endif
 }
 
@@ -1000,7 +1008,7 @@ WarpX::UpperCorner(const Box& bx, int lev)
 #if (AMREX_SPACEDIM == 3)
     return { xyzmax[0], xyzmax[1], xyzmax[2] };
 #elif (AMREX_SPACEDIM == 2)
-    return { xyzmax[0], 1.e100, xyzmax[1] };
+    return { xyzmax[0], std::numeric_limits<Real>::max(), xyzmax[1] };
 #endif
 }
 
diff --git a/Source/main.cpp b/Source/main.cpp
index 551be9c30..cb183bc8d 100644
--- a/Source/main.cpp
+++ b/Source/main.cpp
@@ -12,6 +12,7 @@ using namespace amrex;
 
 int main(int argc, char* argv[])
 {
+#if defined AMREX_USE_MPI
 #if defined(_OPENMP) && defined(WARPX_USE_PSATD)
     int provided;
     MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided);
@@ -19,6 +20,7 @@ int main(int argc, char* argv[])
 #else
     MPI_Init(&argc, &argv);
 #endif
+#endif
 
     amrex::Initialize(argc,argv);
 
@@ -48,5 +50,7 @@ int main(int argc, char* argv[])
     BL_PROFILE_VAR_STOP(pmain);
 
     amrex::Finalize();
+#if defined AMREX_USE_MPI
     MPI_Finalize();
+#endif
 }
diff --git a/Tools/parallel_postproc.py b/Tools/parallel_postproc.py
deleted file mode 100644
index 0942118ac..000000000
--- a/Tools/parallel_postproc.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import matplotlib
-matplotlib.use('Agg')
-from mpi4py import MPI
-import glob, read_raw_data
-import numpy as np
-import scipy.constants as scc
-import  matplotlib.pyplot as plt
-
-species = 'electrons'
-fieldname = 'Ey'
-lambda0 = .81e-6
-
-# --- custom functions --- #
-# ------------------------ #
-omega0 = 2*np.pi*scc.c/lambda0
-# Read field fieldname and return normalized max
-def get_a0(res_dir, snapshot):
-    header = res_dir + '/Header'
-    print( snapshot )
-    allrd, info = read_raw_data.read_lab_snapshot(snapshot, header)
-    F = allrd[ fieldname ]
-    return info['z'][-1], np.max(np.abs(F)) * scc.e/(scc.m_e*omega0*scc.c)
-# Convert elements of a list to numpy arrays
-def convert_to_np_array(list_in):
-    list_out = []
-    for elem in list_in:
-        list_out.append( np.array( elem ) )
-    return list_out
-
-# --- MPI parallelization --- #
-# --------------------------- #
-# Get ordered list of snapshot files
-res_dir = './lab_frame_data/';
-file_list = glob.glob(res_dir + '/snapshot?????')
-file_list.sort()
-# Total number of files
-nfiles = len(file_list)
-# Each file has a unique number
-number_list = range(nfiles)
-comm_world = MPI.COMM_WORLD
-me = comm_world.Get_rank()
-nrank = comm_world.Get_size()
-# List of files to process for current proc
-my_list = file_list[ (me*nfiles)/nrank : ((me+1)*nfiles)/nrank ]
-# List if file numbers for current proc
-my_number_list = number_list[ (me*nfiles)/nrank : ((me+1)*nfiles)/nrank ]
-
-# --- Run parallel analysis --- #
-# ----------------------------- #
-# Each MPI rank reads roughly (nb snapshots)/(nb ranks) snapshots.
-# Works with any number of snapshots.
-for count, filename in enumerate(my_list):
-    zwin, a0 = get_a0( res_dir, filename )
-    uzlab = read_raw_data.get_particle_field(filename, species, 'uz')/scc.c
-    select_particles = (uzlab > 5.)
-    uzlab = uzlab[ select_particles ]
-    uzmean = np.mean(uzlab)
-
-# --- gather and rank 0 plots --- #
-# ------------------------------- #
-# Gather particle quantities to rank 0 to plot history of quantities.
-UZMEAN = comm_world.gather(uzmean, root=0)
-ZWIN = comm_world.gather(zwin, root=0)
-A0 = comm_world.gather(a0, root=0)
-# Rank 0 does the plot.
-if me == 0:
-    # Convert to numpy arrays
-    UZMEAN, ZWIN, A0 = convert_to_np_array([UZMEAN, ZWIN, A0])
-    # Plot and save
-    fig = plt.figure()
-    plt.subplot(2,1,1)
-    plt.plot(ZWIN*1.e3, UZMEAN)
-    plt.xlabel('z (mm)')
-    plt.ylabel('uz')
-    plt.title( 'beam energy' )
-    plt.grid()
-    plt.subplot(2,1,2)
-    plt.plot(ZWIN*1.e3, A0)
-    plt.ylabel('a0')
-    plt.title( 'beam propag angle' )
-    plt.grid()
-    fig.savefig('./image.png', bbox_inches='tight')
-    plt.close()
diff --git a/Tools/plot_parallel.py b/Tools/plot_parallel.py
index 6bc3ab378..494f2717d 100644
--- a/Tools/plot_parallel.py
+++ b/Tools/plot_parallel.py
@@ -81,8 +81,9 @@ if int(sys.version[0]) != 3:
 matplotlib.rcParams.update({'font.size': 14})
 pscolor = ['r','g','b','k','m','c','y','w']
 pssize = 1.
+# For 2D data, plot 2D array.
+# For 3D data, plot central x-z slice.
 yt_slicedir = {2:2, 3:1}
-yt_aspect = {2:.05, 3:20}
 
 # Get list of particle species.
 def get_species(a_file_list):
@@ -127,7 +128,9 @@ def plot_snapshot(filename):
             plt.clim(-vmax, vmax)
     if plotlib == 'yt':
         # Directly plot with yt
-        sl = yt.SlicePlot(ds, yt_slicedir[dim], args.field, aspect=yt_aspect[dim])
+        if dim==2: aspect = ds.domain_width[0]/ds.domain_width[1]
+        if dim==3: aspect = ds.domain_width[2]/ds.domain_width[0]
+        sl = yt.SlicePlot(ds, yt_slicedir[dim], args.field, aspect=aspect)
         if vmax is not None:
             sl.set_zlim(-vmax, vmax)
 
@@ -217,7 +220,7 @@ def reduce_evolved_quantity(z, q):
         global_q = np.empty_like(q)
         comm_world.Reduce(z, global_z, op=MPI.MAX)
         comm_world.Reduce(q, global_q, op=MPI.MAX)
-        return z, q
+        return global_z, global_q
     else:
         return z, q
 
diff --git a/Tools/video_yt.py b/Tools/video_yt.py
index a63881cb5..2dc8e7cf5 100644
--- a/Tools/video_yt.py
+++ b/Tools/video_yt.py
@@ -1,3 +1,16 @@
+'''
+This script loops over 3D plotfiles plt*****, generates a 3D rendering
+of the data with fields and particles, and saves one image per plotfile to
+plt_****_img.png. It was written for a laser-wakefield acceleration
+simulation, and contains a lot of custom values (for transparency,
+color intensity etc.), so feel free to modify it to meet your needs.
+
+Execute the file with
+> python video_yt.py
+to generate the images. It can be quite slow for even moderately large
+plotfiles.
+'''
+
 # Import statements
 import sys, os
 import yt, glob
@@ -93,7 +106,7 @@ def img_onestep(filename):
     sc.add_source(point1)
     sc.add_source(box_patch)
 # Save file
-    sc.save(filename + '_quarter.png', sigma_clip=1.)
+    sc.save(filename + '_img.png', sigma_clip=1.)
     return 0
 
 # Get plt folders in current folder and loop over them.
diff --git a/Tools/yt3d_mpi.py b/Tools/yt3d_mpi.py
index f18788570..2013446c7 100644
--- a/Tools/yt3d_mpi.py
+++ b/Tools/yt3d_mpi.py
@@ -1,3 +1,16 @@
+'''
+This script loops over 3D plotfiles plt*****, generates a 3D rendering
+of the data with fields and particles, and saves one image per plotfile to
+img_*****.png. It was written for a beam-driven wakefield acceleration
+simulation, and contains a lot of custom values (for transparency,
+color intensity etc.), so feel free to modify it to meet your needs.
+
+Execute the file with e.g.
+> mpirun -np 12 python yt3d_mpi.py
+to generate the images. It can be quite slow for even moderately large
+plotfiles.
+'''
+
 import yt, glob
 from mpi4py import MPI
 import numpy as np
@@ -107,9 +120,9 @@ def img_onestep(filename):
     cam.switch_orientation()
     # save image
     if rendering_type == 'smooth':
-        sc.save('img_' + str(my_number_list[count]).zfill(4), sigma_clip=5.)
+        sc.save('img_' + str(my_number_list[count]).zfill(5), sigma_clip=5.)
     if rendering_type == 'layers':
-        sc.save('img_' + str(my_number_list[count]).zfill(4), sigma_clip=2.)
+        sc.save('img_' + str(my_number_list[count]).zfill(5), sigma_clip=2.)
 
 file_list.sort()
 # Total number of files