7 files changed, 327 insertions, 235 deletions
diff --git a/Source/FortranInterface/WarpX_picsar.F90 b/Source/FortranInterface/WarpX_picsar.F90
index dc47245dd..3b2fc97a7 100644
--- a/Source/FortranInterface/WarpX_picsar.F90
+++ b/Source/FortranInterface/WarpX_picsar.F90
@@ -404,116 +404,4 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n
 
   end subroutine warpx_current_deposition_rz_volume_scaling
 
-  ! _________________________________________________________________
-  !>
-  !> @brief
-  !> Main subroutine for the particle pusher (velocity and position)
-  !>
-  !> @param[in] np number of super-particles
-  !> @param[in] xp,yp,zp particle position arrays
-  !> @param[in] uxp,uyp,uzp normalized momentum in each direction
-  !> @param[in] gaminv particle Lorentz factors
-  !> @param[in] ex,ey,ez particle electric fields in each direction
-  !> @param[in] bx,by,bz particle magnetic fields in each direction
-  !> @param[in] q charge
-  !> @param[in] m masse
-  !> @param[in] dt time step
-  !> @param[in] particle_pusher_algo Particle pusher algorithm
-  subroutine warpx_particle_pusher(np,xp,yp,zp,uxp,uyp,uzp, &
-                                  gaminv,&
-                                  ex,ey,ez,bx,by,bz,q,m,dt, &
-                                  particle_pusher_algo) &
-       bind(C, name="warpx_particle_pusher")
-
-    INTEGER(c_long), INTENT(IN)   :: np
-    REAL(amrex_real),INTENT(INOUT)    :: gaminv(np)
-    REAL(amrex_real),INTENT(INOUT)    :: xp(np),yp(np),zp(np)
-    REAL(amrex_real),INTENT(INOUT)    :: uxp(np),uyp(np),uzp(np)
-    REAL(amrex_real),INTENT(IN)       :: ex(np),ey(np),ez(np)
-    REAL(amrex_real),INTENT(IN)       :: bx(np),by(np),bz(np)
-    REAL(amrex_real),INTENT(IN)       :: q,m,dt
-    INTEGER(c_long), INTENT(IN)   :: particle_pusher_algo
-
-    SELECT CASE (particle_pusher_algo)
-
-    !! Vay pusher -- Full push
-    CASE (1_c_long)
-      CALL pxr_set_gamma(np,uxp,uyp,uzp,gaminv)
-
-      CALL pxr_ebcancelpush3d(np,uxp,uyp,uzp,gaminv, &
-                                 ex,ey,ez,  &
-                                 bx,by,bz,q,m,dt,0_c_long)
-    CASE DEFAULT
-
-      ! Momentum pusher in a single loop
-      CALL pxr_boris_push_u_3d(np,uxp,uyp,uzp,&
-                                     gaminv, &
-                                     ex,ey,ez, &
-                                     bx,by,bz, &
-                                     q,m,dt)
-
-    END SELECT
-
-    !!!! --- push particle species positions a time step
-#if (AMREX_SPACEDIM == 3) || (defined WARPX_RZ)
-    CALL pxr_pushxyz(np,xp,yp,zp,uxp,uyp,uzp,gaminv,dt)
-#elif (AMREX_SPACEDIM == 2)
-    CALL pxr_pushxz(np,xp,zp,uxp,uzp,gaminv,dt)
-#endif
-
-  end subroutine warpx_particle_pusher
-
-
-  ! _________________________________________________________________
-  !>
-  !> @brief
-  !> Main subroutine for the particle pusher (velocity)
-  !>
-  !> @param[in] np number of super-particles
-  !> @param[in] xp,yp,zp particle position arrays
-  !> @param[in] uxp,uyp,uzp normalized momentum in each direction
-  !> @param[in] gaminv particle Lorentz factors
-  !> @param[in] ex,ey,ez particle electric fields in each direction
-  !> @param[in] bx,by,bz particle magnetic fields in each direction
-  !> @param[in] q charge
-  !> @param[in] m masse
-  !> @param[in] dt time step
-  !> @param[in] particle_pusher_algo Particle pusher algorithm
-  subroutine warpx_particle_pusher_momenta(np,xp,yp,zp,uxp,uyp,uzp, &
-                                  gaminv,&
-                                  ex,ey,ez,bx,by,bz,q,m,dt, &
-                                  particle_pusher_algo) &
-       bind(C, name="warpx_particle_pusher_momenta")
-
-    INTEGER(c_long), INTENT(IN)   :: np
-    REAL(amrex_real),INTENT(INOUT)    :: gaminv(np)
-    REAL(amrex_real),INTENT(IN)       :: xp(np),yp(np),zp(np)
-    REAL(amrex_real),INTENT(INOUT)    :: uxp(np),uyp(np),uzp(np)
-    REAL(amrex_real),INTENT(IN)       :: ex(np),ey(np),ez(np)
-    REAL(amrex_real),INTENT(IN)       :: bx(np),by(np),bz(np)
-    REAL(amrex_real),INTENT(IN)       :: q,m,dt
-    INTEGER(c_long), INTENT(IN)   :: particle_pusher_algo
-
-    SELECT CASE (particle_pusher_algo)
-
-    !! Vay pusher -- Full push
-    CASE (1_c_long)
-      CALL pxr_set_gamma(np,uxp,uyp,uzp,gaminv)
-
-      CALL pxr_ebcancelpush3d(np,uxp,uyp,uzp,gaminv, &
-                                 ex,ey,ez,  &
-                                 bx,by,bz,q,m,dt,0_c_long)
-    CASE DEFAULT
-
-      ! Momentum pusher in a single loop
-      CALL pxr_boris_push_u_3d(np,uxp,uyp,uzp,&
-                                     gaminv, &
-                                     ex,ey,ez, &
-                                     bx,by,bz, &
-                                     q,m,dt)
-
-    END SELECT
-
-  end subroutine warpx_particle_pusher_momenta
-
 end module warpx_to_pxr_module
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index d47a7b220..b913ed5cd 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -7,6 +7,12 @@
 #include <WarpXConst.H>
 #include <WarpXWrappers.h>
 
+#include <WarpXAlgorithmSelection.H>
+
+// Import low-level single-particle kernels
+#include <UpdatePosition.H>
+#include <UpdateMomentumBoris.H>
+#include <UpdateMomentumVay.H>
 
 using namespace amrex;
 
@@ -1742,36 +1748,52 @@ PhysicalParticleContainer::PushPX(WarpXParIter& pti,
                                   Real dt)
 {
 
+    // This wraps the momentum and position advance so that inheritors can modify the call.
+    auto& attribs = pti.GetAttribs();
+    // Extract pointers to the different particle quantities
+    Real* const AMREX_RESTRICT x = xp.dataPtr();
+    Real* const AMREX_RESTRICT y = yp.dataPtr();
+    Real* const AMREX_RESTRICT z = zp.dataPtr();
+    Real* const AMREX_RESTRICT gi = giv.dataPtr();
+    Real* const AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
+    Real* const AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
+    Real* const AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
+    const Real* const AMREX_RESTRICT Ex = attribs[PIdx::Ex].dataPtr();
+    const Real* const AMREX_RESTRICT Ey = attribs[PIdx::Ey].dataPtr();
+    const Real* const AMREX_RESTRICT Ez = attribs[PIdx::Ez].dataPtr();
+    const Real* const AMREX_RESTRICT Bx = attribs[PIdx::Bx].dataPtr();
+    const Real* const AMREX_RESTRICT By = attribs[PIdx::By].dataPtr();
+    const Real* const AMREX_RESTRICT Bz = attribs[PIdx::Bz].dataPtr();
+
     if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
     {
-        copy_attribs(pti, xp.dataPtr(), yp.dataPtr(), zp.dataPtr());
+        copy_attribs(pti, x, y, z);
     }
 
-    // The following attributes should be included in CPP version of warpx_particle_pusher
-	// This wraps the call to warpx_particle_pusher so that inheritors can modify the call.
-    auto& attribs = pti.GetAttribs();
-    auto& uxp = attribs[PIdx::ux];
-    auto& uyp = attribs[PIdx::uy];
-    auto& uzp = attribs[PIdx::uz];
-    auto& Exp = attribs[PIdx::Ex];
-    auto& Eyp = attribs[PIdx::Ey];
-    auto& Ezp = attribs[PIdx::Ez];
-    auto& Bxp = attribs[PIdx::Bx];
-    auto& Byp = attribs[PIdx::By];
-    auto& Bzp = attribs[PIdx::Bz];
-    const long np  = pti.numParticles();
-    
-    warpx_particle_pusher(&np,
-                          xp.dataPtr(),
-                          yp.dataPtr(),
-                          zp.dataPtr(),
-                          uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
-                          giv.dataPtr(),
-                          Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
-                          Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
-                          &this->charge, &this->mass, &dt,
-                          &WarpX::particle_pusher_algo);
-
+    // Loop over the particles and update their momentum
+    const Real q = this->charge;
+    const Real m = this-> mass;
+    if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Boris){
+        amrex::ParallelFor( pti.numParticles(),
+            [=] AMREX_GPU_DEVICE (long i) {
+                UpdateMomentumBoris( ux[i], uy[i], uz[i], gi[i],
+                      Ex[i], Ey[i], Ez[i], Bx[i], By[i], Bz[i], q, m, dt);
+                UpdatePosition( x[i], y[i], z[i],
+                      ux[i], uy[i], uz[i], dt );
+            }
+        );
+    } else if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Vay) {
+        amrex::ParallelFor( pti.numParticles(),
+            [=] AMREX_GPU_DEVICE (long i) {
+                UpdateMomentumVay( ux[i], uy[i], uz[i], gi[i],
+                      Ex[i], Ey[i], Ez[i], Bx[i], By[i], Bz[i], q, m, dt);
+                UpdatePosition( x[i], y[i], z[i],
+                      ux[i], uy[i], uz[i], dt );
+            }
+        );
+    } else {
+      amrex::Abort("Unknown particle pusher");
+    };
 }
 
 void
@@ -1800,9 +1822,6 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
 
             auto& attribs = pti.GetAttribs();
 
-            auto& uxp = attribs[PIdx::ux];
-            auto& uyp = attribs[PIdx::uy];
-            auto& uzp = attribs[PIdx::uz];
             auto& Exp = attribs[PIdx::Ex];
             auto& Eyp = attribs[PIdx::Ey];
             auto& Ezp = attribs[PIdx::Ez];
@@ -1860,16 +1879,39 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
                 &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
                 &lvect_fieldgathe, &WarpX::field_gathering_algo);
 
-            warpx_particle_pusher_momenta(&np,
-                                          m_xp[thread_num].dataPtr(),
-                                          m_yp[thread_num].dataPtr(),
-                                          m_zp[thread_num].dataPtr(),
-                                          uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
-                                          m_giv[thread_num].dataPtr(),
-                                          Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
-                                          Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
-                                          &this->charge, &this->mass, &dt,
-                                          &WarpX::particle_pusher_algo);
+            // This wraps the momentum advance so that inheritors can modify the call.
+            // Extract pointers to the different particle quantities
+            Real* const AMREX_RESTRICT gi = m_giv[thread_num].dataPtr();
+            Real* const AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
+            Real* const AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
+            Real* const AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
+            const Real* const AMREX_RESTRICT Expp = Exp.dataPtr();
+            const Real* const AMREX_RESTRICT Eypp = Eyp.dataPtr();
+            const Real* const AMREX_RESTRICT Ezpp = Ezp.dataPtr();
+            const Real* const AMREX_RESTRICT Bxpp = Bxp.dataPtr();
+            const Real* const AMREX_RESTRICT Bypp = Byp.dataPtr();
+            const Real* const AMREX_RESTRICT Bzpp = Bzp.dataPtr();
+
+            // Loop over the particles and update their momentum
+            const Real q = this->charge;
+            const Real m = this-> mass;
+            if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Boris){
+                amrex::ParallelFor( pti.numParticles(),
+                    [=] AMREX_GPU_DEVICE (long i) {
+                        UpdateMomentumBoris( ux[i], uy[i], uz[i], gi[i],
+                              Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt);
+                    }
+                );
+            } else if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Vay) {
+                amrex::ParallelFor( pti.numParticles(),
+                    [=] AMREX_GPU_DEVICE (long i) {
+                        UpdateMomentumVay( ux[i], uy[i], uz[i], gi[i],
+                              Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt);
+                    }
+                );
+            } else {
+              amrex::Abort("Unknown particle pusher");
+            };
         }
     }
 }
diff --git a/Source/Particles/Pusher/Make.package b/Source/Particles/Pusher/Make.package
index 8c8e77905..95a38fa2d 100644
--- a/Source/Particles/Pusher/Make.package
+++ b/Source/Particles/Pusher/Make.package
@@ -1,4 +1,6 @@
 CEXE_headers += GetAndSetPosition.H
 CEXE_headers += UpdatePosition.H
+CEXE_headers += UpdateMomentumBoris.H
+CEXE_headers += UpdateMomentumVay.H
 INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Pusher
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Particles/Pusher
diff --git a/Source/Particles/Pusher/UpdateMomentumBoris.H b/Source/Particles/Pusher/UpdateMomentumBoris.H
new file mode 100644
index 000000000..71e9a8ed1
--- /dev/null
+++ b/Source/Particles/Pusher/UpdateMomentumBoris.H
@@ -0,0 +1,47 @@
+#ifndef WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_BORIS_H_
+#define WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_BORIS_H_
+
+#include <AMReX_REAL.H>
+
+/* \brief Push the particle's positions over one timestep,
+ *    given the value of its momenta `ux`, `uy`, `uz` */
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+void UpdateMomentumBoris(
+    amrex::Real& ux, amrex::Real& uy, amrex::Real& uz, amrex::Real& gaminv,
+    const amrex::Real Ex, const amrex::Real Ey, const amrex::Real Ez,
+    const amrex::Real Bx, const amrex::Real By, const amrex::Real Bz,
+    const amrex::Real q, const amrex::Real m, const amrex::Real dt )
+{
+    const amrex::Real econst = 0.5*q*dt/m;
+
+    // First half-push for E
+    ux += econst*Ex;
+    uy += econst*Ey;
+    uz += econst*Ez;
+    // Compute temporary gamma factor
+    constexpr amrex::Real inv_c2 = 1./(PhysConst::c*PhysConst::c);
+    const amrex::Real inv_gamma = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*inv_c2);
+    // Magnetic rotation
+    // - Compute temporary variables
+    const amrex::Real tx = econst*inv_gamma*Bx;
+    const amrex::Real ty = econst*inv_gamma*By;
+    const amrex::Real tz = econst*inv_gamma*Bz;
+    const amrex::Real tsqi = 2./(1. + tx*tx + ty*ty + tz*tz);
+    const amrex::Real sx = tx*tsqi;
+    const amrex::Real sy = ty*tsqi;
+    const amrex::Real sz = tz*tsqi;
+    const amrex::Real ux_p = ux + uy*tz - uz*ty;
+    const amrex::Real uy_p = uy + uz*tx - ux*tz;
+    const amrex::Real uz_p = uz + ux*ty - uy*tx;
+    // - Update momentum
+    ux += uy_p*sz - uz_p*sy;
+    uy += uz_p*sx - ux_p*sz;
+    uz += ux_p*sy - uy_p*sx;
+    // Second half-push for E
+    ux += econst*Ex;
+    uy += econst*Ey;
+    uz += econst*Ez;
+    gaminv = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*inv_c2);
+}
+
+#endif // WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_BORIS_H_
diff --git a/Source/Particles/Pusher/UpdateMomentumVay.H b/Source/Particles/Pusher/UpdateMomentumVay.H
new file mode 100644
index 000000000..044297e22
--- /dev/null
+++ b/Source/Particles/Pusher/UpdateMomentumVay.H
@@ -0,0 +1,54 @@
+#ifndef WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_VAY_H_
+#define WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_VAY_H_
+
+#include <AMReX_FArrayBox.H>
+#include <WarpXConst.H>
+#include <AMReX_REAL.H>
+
+/* \brief Push the particle's positions over one timestep,
+ *    given the value of its momenta `ux`, `uy`, `uz` */
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+void UpdateMomentumVay(
+    amrex::Real& ux, amrex::Real& uy, amrex::Real& uz, amrex::Real& gaminv,
+    const amrex::Real Ex, const amrex::Real Ey, const amrex::Real Ez,
+    const amrex::Real Bx, const amrex::Real By, const amrex::Real Bz,
+    const amrex::Real q, const amrex::Real m, const amrex::Real dt )
+{
+    // Constants
+    const amrex::Real econst = q*dt/m;
+    const amrex::Real bconst = 0.5*q*dt/m;
+    constexpr amrex::Real invclight = 1./PhysConst::c;
+    constexpr amrex::Real invclightsq = 1./(PhysConst::c*PhysConst::c);
+    // Compute initial gamma
+    const amrex::Real inv_gamma = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*invclightsq);
+    // Get tau
+    const amrex::Real taux = bconst*Bx;
+    const amrex::Real tauy = bconst*By;
+    const amrex::Real tauz = bconst*Bz;
+    const amrex::Real tausq = taux*taux+tauy*tauy+tauz*tauz;
+    // Get U', gamma'^2
+    const amrex::Real uxpr = ux + econst*Ex + (uy*tauz-uz*tauy)*inv_gamma;
+    const amrex::Real uypr = uy + econst*Ey + (uz*taux-ux*tauz)*inv_gamma;
+    const amrex::Real uzpr = uz + econst*Ez + (ux*tauy-uy*taux)*inv_gamma;
+    const amrex::Real gprsq = (1. + (uxpr*uxpr + uypr*uypr + uzpr*uzpr)*invclightsq);
+    // Get u*
+    const amrex::Real ust = (uxpr*taux + uypr*tauy + uzpr*tauz)*invclight;
+    // Get new gamma
+    const amrex::Real sigma = gprsq-tausq;
+    const amrex::Real gisq = 2./(sigma + std::sqrt(sigma*sigma + 4.*(tausq + ust*ust)) );
+    // Get t, s
+    const amrex::Real bg = bconst*std::sqrt(gisq);
+    const amrex::Real tx = bg*Bx;
+    const amrex::Real ty = bg*By;
+    const amrex::Real tz = bg*Bz;
+    const amrex::Real s = 1./(1.+tausq*gisq);
+    // Get t.u'
+    const amrex::Real tu = tx*uxpr + ty*uypr + tz*uzpr;
+    // Get new U
+    ux = s*(uxpr+tx*tu+uypr*tz-uzpr*ty);
+    uy = s*(uypr+ty*tu+uzpr*tx-uxpr*tz);
+    uz = s*(uzpr+tz*tu+uxpr*ty-uypr*tx);
+    gaminv = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*invclightsq);
+}
+
+#endif // WARPX_PARTICLES_PUSHER_UPDATEMOMENTUM_VAY_H_
diff --git a/Source/Particles/RigidInjectedParticleContainer.H b/Source/Particles/RigidInjectedParticleContainer.H
index 0b27a2f2f..b920ece0a 100644
--- a/Source/Particles/RigidInjectedParticleContainer.H
+++ b/Source/Particles/RigidInjectedParticleContainer.H
@@ -43,7 +43,7 @@ public:
                          amrex::Real dt) override;
 
     virtual void PushPX(WarpXParIter& pti,
-	                amrex::Cuda::ManagedDeviceVector<amrex::Real>& xp,
+                        amrex::Cuda::ManagedDeviceVector<amrex::Real>& xp,
                         amrex::Cuda::ManagedDeviceVector<amrex::Real>& yp,
                         amrex::Cuda::ManagedDeviceVector<amrex::Real>& zp,
                         amrex::Cuda::ManagedDeviceVector<amrex::Real>& giv,
@@ -77,7 +77,6 @@ private:
     // Temporary quantites
     amrex::Real zinject_plane_lev;
     amrex::Real zinject_plane_lev_previous;
-    amrex::Vector<int> done_injecting_temp;
     bool done_injecting_lev;
 
 };
diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp
index 9bd4cb4fc..cad82d6d4 100644
--- a/Source/Particles/RigidInjectedParticleContainer.cpp
+++ b/Source/Particles/RigidInjectedParticleContainer.cpp
@@ -10,6 +10,9 @@
 #include <WarpX_f.H>
 #include <WarpX.H>
 #include <WarpXConst.H>
+#include <WarpXAlgorithmSelection.H>
+#include <UpdateMomentumBoris.H>
+#include <UpdateMomentumVay.H>
 
 using namespace amrex;
 
@@ -204,48 +207,56 @@ RigidInjectedParticleContainer::BoostandRemapParticles()
 
 void
 RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
-	                               Cuda::ManagedDeviceVector<Real>& xp,
+                                       Cuda::ManagedDeviceVector<Real>& xp,
                                        Cuda::ManagedDeviceVector<Real>& yp,
                                        Cuda::ManagedDeviceVector<Real>& zp,
                                        Cuda::ManagedDeviceVector<Real>& giv,
                                        Real dt)
 {
 
-    if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
-    {
-        copy_attribs(pti, xp.dataPtr(), yp.dataPtr(), zp.dataPtr());
-    }
-    
-    // This wraps the call to warpx_particle_pusher so that inheritors can modify the call.
+    // This wraps the momentum and position advance so that inheritors can modify the call.
     auto& attribs = pti.GetAttribs();
     auto& uxp = attribs[PIdx::ux];
     auto& uyp = attribs[PIdx::uy];
     auto& uzp = attribs[PIdx::uz];
-    auto& Exp = attribs[PIdx::Ex];
-    auto& Eyp = attribs[PIdx::Ey];
-    auto& Ezp = attribs[PIdx::Ez];
-    auto& Bxp = attribs[PIdx::Bx];
-    auto& Byp = attribs[PIdx::By];
-    auto& Bzp = attribs[PIdx::Bz];
-    const long np  = pti.numParticles();
 
     // Save the position and momenta, making copies
     Cuda::ManagedDeviceVector<Real> xp_save, yp_save, zp_save;
     RealVector uxp_save, uyp_save, uzp_save;
 
+    Real* const AMREX_RESTRICT x = xp.dataPtr();
+    Real* const AMREX_RESTRICT y = yp.dataPtr();
+    Real* const AMREX_RESTRICT z = zp.dataPtr();
+    Real* const AMREX_RESTRICT gi = giv.dataPtr();
+    Real* const AMREX_RESTRICT ux = uxp.dataPtr();
+    Real* const AMREX_RESTRICT uy = uyp.dataPtr();
+    Real* const AMREX_RESTRICT uz = uzp.dataPtr();
+    Real* const AMREX_RESTRICT Exp = attribs[PIdx::Ex].dataPtr();
+    Real* const AMREX_RESTRICT Eyp = attribs[PIdx::Ey].dataPtr();
+    Real* const AMREX_RESTRICT Ezp = attribs[PIdx::Ez].dataPtr();
+    Real* const AMREX_RESTRICT Bxp = attribs[PIdx::Bx].dataPtr();
+    Real* const AMREX_RESTRICT Byp = attribs[PIdx::By].dataPtr();
+    Real* const AMREX_RESTRICT Bzp = attribs[PIdx::Bz].dataPtr();
+
     if (!done_injecting_lev) {
-        xp_save = xp;
-        yp_save = yp;
-        zp_save = zp;
-        uxp_save = uxp;
-        uyp_save = uyp;
-        uzp_save = uzp;
+        if (!(WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)) {
+            // If the old values are not already saved, create copies here.
+            xp_save = xp;
+            yp_save = yp;
+            zp_save = zp;
+            uxp_save = uxp;
+            uyp_save = uyp;
+            uzp_save = uzp;
+        }
+
         // Scale the fields of particles about to cross the injection plane.
         // This only approximates what should be happening. The particles
         // should by advanced a fraction of a time step instead.
         // Scaling the fields is much easier and may be good enough.
-        for (int i=0 ; i < zp.size() ; i++) {
-            const Real dtscale = dt - (zinject_plane_lev_previous - zp[i])/(vzbeam_ave_boosted + WarpX::beta_boost*PhysConst::c);
+        const Real v_boost = WarpX::beta_boost*PhysConst::c;
+        amrex::ParallelFor( pti.numParticles(),
+            [=] AMREX_GPU_DEVICE (long i) {
+            const Real dtscale = dt - (zinject_plane_lev_previous - z[i])/(vzbeam_ave_boosted + v_boost);
             if (0. < dtscale && dtscale < dt) {
                 Exp[i] *= dtscale;
                 Eyp[i] *= dtscale;
@@ -255,18 +266,10 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
                 Bzp[i] *= dtscale;
             }
         }
+        );
     }
 
-    warpx_particle_pusher(&np,
-                          xp.dataPtr(),
-                          yp.dataPtr(),
-                          zp.dataPtr(),
-                          uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
-                          giv.dataPtr(),
-                          Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
-                          Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
-                          &this->charge, &this->mass, &dt,
-                          &WarpX::particle_pusher_algo);
+    PhysicalParticleContainer::PushPX(pti, xp, yp, zp, giv, dt);
 
     if (!done_injecting_lev) {
 #ifdef _OPENMP
@@ -274,27 +277,50 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
 #else
         const int tid = 0;
 #endif
+
+        Real* AMREX_RESTRICT x_save;
+        Real* AMREX_RESTRICT y_save;
+        Real* AMREX_RESTRICT z_save;
+        Real* AMREX_RESTRICT ux_save;
+        Real* AMREX_RESTRICT uy_save;
+        Real* AMREX_RESTRICT uz_save;
+        if (!(WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)) {
+            x_save = xp_save.dataPtr();
+            y_save = yp_save.dataPtr();
+            z_save = zp_save.dataPtr();
+            ux_save = uxp_save.dataPtr();
+            uy_save = uyp_save.dataPtr();
+            uz_save = uzp_save.dataPtr();
+        } else {
+            x_save = pti.GetAttribs(particle_comps["xold"]).dataPtr();
+            y_save = pti.GetAttribs(particle_comps["yold"]).dataPtr();
+            z_save = pti.GetAttribs(particle_comps["zold"]).dataPtr();
+            ux_save = pti.GetAttribs(particle_comps["uxold"]).dataPtr();
+            uy_save = pti.GetAttribs(particle_comps["uyold"]).dataPtr();
+            uz_save = pti.GetAttribs(particle_comps["uzold"]).dataPtr();
+        }
+
         // Undo the push for particles not injected yet.
         // The zp are advanced a fixed amount.
-        for (int i=0 ; i < zp.size() ; i++) {
-            if (zp[i] <= zinject_plane_lev) {
-                uxp[i] = uxp_save[i];
-                uyp[i] = uyp_save[i];
-                uzp[i] = uzp_save[i];
-                giv[i] = 1./std::sqrt(1. + (uxp[i]*uxp[i] + uyp[i]*uyp[i] + uzp[i]*uzp[i])/(PhysConst::c*PhysConst::c));
-                xp[i] = xp_save[i];
-                yp[i] = yp_save[i];
+        amrex::ParallelFor( pti.numParticles(),
+            [=] AMREX_GPU_DEVICE (long i) {
+            if (z[i] <= zinject_plane_lev) {
+                ux[i] = ux_save[i];
+                uy[i] = uy_save[i];
+                uz[i] = uz_save[i];
+                gi[i] = 1./std::sqrt(1. + (ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i])/(PhysConst::c*PhysConst::c));
+                x[i] = x_save[i];
+                y[i] = y_save[i];
                 if (rigid_advance) {
-                    zp[i] = zp_save[i] + dt*vzbeam_ave_boosted;
+                    z[i] = z_save[i] + dt*vzbeam_ave_boosted;
                 }
                 else {
-                    zp[i] = zp_save[i] + dt*uzp[i]*giv[i];
+                    z[i] = z_save[i] + dt*uz[i]*gi[i];
                 }
-                done_injecting_temp[tid] = 0;
             }
         }
+        );
     }
-
 }
 
 void
@@ -314,28 +340,24 @@ RigidInjectedParticleContainer::Evolve (int lev,
     zinject_plane_levels[lev] -= dt*WarpX::beta_boost*PhysConst::c;
     zinject_plane_lev = zinject_plane_levels[lev];
 
-    // Setup check of whether more particles need to be injected
-#ifdef _OPENMP
-    const int nthreads = omp_get_max_threads();
-#else
-    const int nthreads = 1;
-#endif
-    done_injecting_temp.assign(nthreads, 1); // We do not use bool because vector<bool> is special.
+    // Set the done injecting flag whan the inject plane moves out of the
+    // simulation domain.
+    // It is much easier to do this check, rather than checking if all of the
+    // particles have crossed the inject plane.
+    const Real* plo = Geom(lev).ProbLo();
+    const Real* phi = Geom(lev).ProbHi();
+    done_injecting[lev] = (zinject_plane_levels[lev] < plo[2] || zinject_plane_levels[lev] > phi[2]);
     done_injecting_lev = done_injecting[lev];
 
     PhysicalParticleContainer::Evolve (lev,
-				       Ex, Ey, Ez,
-				       Bx, By, Bz,
-				       jx, jy, jz,
+                                       Ex, Ey, Ez,
+                                       Bx, By, Bz,
+                                       jx, jy, jz,
                                        cjx, cjy, cjz,
                                        rho, crho,
                                        cEx, cEy, cEz,
                                        cBx, cBy, cBz,
                                        t, dt);
-
-    // Check if all done_injecting_temp are still true.
-    done_injecting[lev] = std::all_of(done_injecting_temp.begin(), done_injecting_temp.end(),
-                                      [](int i) -> bool { return i; });
 }
 
 void
@@ -343,6 +365,8 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
                                        const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
                                        const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz)
 {
+    BL_PROFILE("RigidInjectedParticleContainer::PushP");
+
     if (do_not_push) return;
 
     const std::array<Real,3>& dx = WarpX::CellSize(lev);
@@ -351,8 +375,11 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
 #pragma omp parallel
 #endif
     {
-        Cuda::ManagedDeviceVector<Real> xp, yp, zp, giv;
-
+#ifdef _OPENMP
+        int thread_num = omp_get_thread_num();
+#else
+        int thread_num = 0;
+#endif
         for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
         {
             const Box& box = pti.validbox();
@@ -386,24 +413,24 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
             Byp.assign(np,WarpX::B_external[1]);
             Bzp.assign(np,WarpX::B_external[2]);
 
-            giv.resize(np);
+            m_giv[thread_num].resize(np);
 
             //
             // copy data from particle container to temp arrays
             //
-            pti.GetPosition(xp, yp, zp);
+            pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
 
             const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
             const int* ixyzmin_grid = box.loVect();
 
             const int ll4symtry          = false;
-            const int l_lower_order_in_v = true;
             long lvect_fieldgathe = 64;
+
             warpx_geteb_energy_conserving(
                 &np,
-                xp.dataPtr(),
-                yp.dataPtr(),
-                zp.dataPtr(),
+                m_xp[thread_num].dataPtr(),
+                m_yp[thread_num].dataPtr(),
+                m_zp[thread_num].dataPtr(),
                 Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
                 Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
                 ixyzmin_grid,
@@ -416,7 +443,7 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
                 BL_TO_FORTRAN_ANYD(bxfab),
                 BL_TO_FORTRAN_ANYD(byfab),
                 BL_TO_FORTRAN_ANYD(bzfab),
-                &ll4symtry, &l_lower_order_in_v, &WarpX::do_nodal,
+                &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
                 &lvect_fieldgathe, &WarpX::field_gathering_algo);
 
             // Save the position and momenta, making copies
@@ -424,27 +451,60 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt,
             auto uyp_save = uyp;
             auto uzp_save = uzp;
 
-            warpx_particle_pusher_momenta(&np,
-                                          xp.dataPtr(),
-                                          yp.dataPtr(),
-                                          zp.dataPtr(),
-                                          uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
-                                          giv.dataPtr(),
-                                          Exp.dataPtr(), Eyp.dataPtr(), Ezp.dataPtr(),
-                                          Bxp.dataPtr(), Byp.dataPtr(), Bzp.dataPtr(),
-                                          &this->charge, &this->mass, &dt,
-                                          &WarpX::particle_pusher_algo);
+            // This wraps the momentum advance so that inheritors can modify the call.
+            // Extract pointers to the different particle quantities
+            const Real* const AMREX_RESTRICT zp = m_zp[thread_num].dataPtr();
+            Real* const AMREX_RESTRICT gi = m_giv[thread_num].dataPtr();
+            Real* const AMREX_RESTRICT uxpp = uxp.dataPtr();
+            Real* const AMREX_RESTRICT uypp = uyp.dataPtr();
+            Real* const AMREX_RESTRICT uzpp = uzp.dataPtr();
+            const Real* const AMREX_RESTRICT uxp_savep = uxp_save.dataPtr();
+            const Real* const AMREX_RESTRICT uyp_savep = uyp_save.dataPtr();
+            const Real* const AMREX_RESTRICT uzp_savep = uzp_save.dataPtr();
+            const Real* const AMREX_RESTRICT Expp = Exp.dataPtr();
+            const Real* const AMREX_RESTRICT Eypp = Eyp.dataPtr();
+            const Real* const AMREX_RESTRICT Ezpp = Ezp.dataPtr();
+            const Real* const AMREX_RESTRICT Bxpp = Bxp.dataPtr();
+            const Real* const AMREX_RESTRICT Bypp = Byp.dataPtr();
+            const Real* const AMREX_RESTRICT Bzpp = Bzp.dataPtr();
+
+            // Loop over the particles and update their momentum
+            const Real q = this->charge;
+            const Real m = this->mass;
+            if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Boris){
+                amrex::ParallelFor( pti.numParticles(),
+                    [=] AMREX_GPU_DEVICE (long i) {
+                        UpdateMomentumBoris( uxpp[i], uypp[i], uzpp[i], gi[i],
+                              Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt);
+                    }
+                );
+            } else if (WarpX::particle_pusher_algo == ParticlePusherAlgo::Vay) {
+                amrex::ParallelFor( pti.numParticles(),
+                    [=] AMREX_GPU_DEVICE (long i) {
+                        UpdateMomentumVay( uxpp[i], uypp[i], uzpp[i], gi[i],
+                              Expp[i], Eypp[i], Ezpp[i], Bxpp[i], Bypp[i], Bzpp[i], q, m, dt);
+                    }
+                );
+            } else {
+              amrex::Abort("Unknown particle pusher");
+            };
 
             // Undo the push for particles not injected yet.
             // It is assumed that PushP will only be called on the first and last steps
             // and that no particles will cross zinject_plane.
-            for (int i=0 ; i < zp.size() ; i++) {
-                if (zp[i] <= zinject_plane_levels[lev]) {
-                    uxp[i] = uxp_save[i];
-                    uyp[i] = uyp_save[i];
-                    uzp[i] = uzp_save[i];
+            const Real* const AMREX_RESTRICT ux_save = uxp_save.dataPtr();
+            const Real* const AMREX_RESTRICT uy_save = uyp_save.dataPtr();
+            const Real* const AMREX_RESTRICT uz_save = uzp_save.dataPtr();
+            const Real zz = zinject_plane_levels[lev];
+            amrex::ParallelFor( pti.numParticles(),
+                [=] AMREX_GPU_DEVICE (long i) {
+                if (zp[i] <= zz) {
+                    uxpp[i] = ux_save[i];
+                    uypp[i] = uy_save[i];
+                    uzpp[i] = uz_save[i];
                 }
             }
+            );
 
         }
     }