19 files changed, 903 insertions, 535 deletions
diff --git a/Source/.DS_Store b/Source/.DS_Store
new file mode 100644
index 000000000..01640e062
--- /dev/null
+++ b/Source/.DS_Store
diff --git a/Source/Diagnostics/WarpXIO.cpp b/Source/Diagnostics/WarpXIO.cpp
index 24272c588..38399bf9e 100644
--- a/Source/Diagnostics/WarpXIO.cpp
+++ b/Source/Diagnostics/WarpXIO.cpp
@@ -755,7 +755,6 @@ WarpX::WriteSlicePlotFile () const
     // writing plotfile //
     const std::string& slice_plotfilename = amrex::Concatenate(slice_plot_file,istep[0]);
     amrex::Print() << " Writing slice plotfile " << slice_plotfilename << "\n";
-    const int ngrow = 0;
 
     Vector<std::string> rfs;
     VisMF::Header::Version current_version = VisMF::GetHeaderVersion();
diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
index a2b695568..948baf0a6 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
@@ -55,30 +55,30 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba,
 #ifdef AMREX_USE_GPU
         // Create cuFFT plans
         // Creating 3D plan for real to complex -- double precision
-        // Assuming CUDA is used for programming GPU 
-        // Note that D2Z is inherently forward plan 
-        // and  Z2D is inherently backward plan 
+        // Assuming CUDA is used for programming GPU
+        // Note that D2Z is inherently forward plan
+        // and  Z2D is inherently backward plan
         cufftResult result;
 #if (AMREX_SPACEDIM == 3)
-        result = cufftPlan3d( &forward_plan[mfi], fft_size[2], 
+        result = cufftPlan3d( &forward_plan[mfi], fft_size[2],
                               fft_size[1],fft_size[0], CUFFT_D2Z);
         if ( result != CUFFT_SUCCESS ) {
            amrex::Print() << " cufftplan3d forward failed! \n";
         }
 
-        result = cufftPlan3d( &backward_plan[mfi], fft_size[2], 
+        result = cufftPlan3d( &backward_plan[mfi], fft_size[2],
                               fft_size[1], fft_size[0], CUFFT_Z2D);
         if ( result != CUFFT_SUCCESS ) {
            amrex::Print() << " cufftplan3d backward failed! \n";
         }
 #else
-        result = cufftPlan2d( &forward_plan[mfi], fft_size[1], 
+        result = cufftPlan2d( &forward_plan[mfi], fft_size[1],
                               fft_size[0], CUFFT_D2Z );
         if ( result != CUFFT_SUCCESS ) {
            amrex::Print() << " cufftplan2d forward failed! \n";
         }
 
-        result = cufftPlan2d( &backward_plan[mfi], fft_size[1], 
+        result = cufftPlan2d( &backward_plan[mfi], fft_size[1],
                                fft_size[0], CUFFT_Z2D );
         if ( result != CUFFT_SUCCESS ) {
            amrex::Print() << " cufftplan2d backward failed! \n";
@@ -162,20 +162,20 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf,
             Array4<Real> tmp_arr = tmpRealField[mfi].array();
             ParallelFor( realspace_bx,
             [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
-                tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp);                
+                tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp);
             });
         }
 
         // Perform Fourier transform from `tmpRealField` to `tmpSpectralField`
 #ifdef AMREX_USE_GPU
-        // Perform Fast Fourier Transform on GPU using cuFFT 
-        // make sure that this is done on the same 
-        // GPU stream as the above copy        
+        // Perform Fast Fourier Transform on GPU using cuFFT
+        // make sure that this is done on the same
+        // GPU stream as the above copy
         cufftResult result;
-        cudaStream_t stream = amrex::Gpu::Device::cudaStream(); 
+        cudaStream_t stream = amrex::Gpu::Device::cudaStream();
         cufftSetStream ( forward_plan[mfi], stream);
-        result = cufftExecD2Z( forward_plan[mfi], 
-                               tmpRealField[mfi].dataPtr(), 
+        result = cufftExecD2Z( forward_plan[mfi],
+                               tmpRealField[mfi].dataPtr(),
                                reinterpret_cast<cuDoubleComplex*>(
                                tmpSpectralField[mfi].dataPtr()) );
         if ( result != CUFFT_SUCCESS ) {
@@ -271,13 +271,13 @@ SpectralFieldData::BackwardTransform( MultiFab& mf,
 
         // Perform Fourier transform from `tmpSpectralField` to `tmpRealField`
 #ifdef AMREX_USE_GPU
-        // Perform Fast Fourier Transform on GPU using cuFFT. 
-        // make sure that this is done on the same 
+        // Perform Fast Fourier Transform on GPU using cuFFT.
+        // make sure that this is done on the same
         // GPU stream as the above copy
         cufftResult result;
-        cudaStream_t stream = amrex::Gpu::Device::cudaStream(); 
+        cudaStream_t stream = amrex::Gpu::Device::cudaStream();
         cufftSetStream ( backward_plan[mfi], stream);
-        result = cufftExecZ2D( backward_plan[mfi], 
+        result = cufftExecZ2D( backward_plan[mfi],
                                reinterpret_cast<cuDoubleComplex*>(
                                tmpSpectralField[mfi].dataPtr()),
                                tmpRealField[mfi].dataPtr() );
@@ -289,16 +289,17 @@ SpectralFieldData::BackwardTransform( MultiFab& mf,
 #endif
 
         // Copy the temporary field `tmpRealField` to the real-space field `mf`
-
+        // (only in the valid cells ; not in the guard cells)
         // Normalize (divide by 1/N) since the FFT+IFFT results in a factor N
         {
-            const Box realspace_bx = tmpRealField[mfi].box();
             Array4<Real> mf_arr = mf[mfi].array();
             Array4<const Real> tmp_arr = tmpRealField[mfi].array();
             // Normalization: divide by the number of points in realspace
+            // (includes the guard cells)
+            const Box realspace_bx = tmpRealField[mfi].box();
             const Real inv_N = 1./realspace_bx.numPts();
 
-            ParallelFor( realspace_bx,
+            ParallelFor( mfi.validbox(),
             [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
                 // Copy and normalize field
                 mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k);
diff --git a/Source/FieldSolver/WarpXFFT.cpp b/Source/FieldSolver/WarpXFFT.cpp
index 13d92f6f3..f9cd7570a 100644
--- a/Source/FieldSolver/WarpXFFT.cpp
+++ b/Source/FieldSolver/WarpXFFT.cpp
@@ -90,7 +90,7 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba
 
         const FArrayBox& srcfab = mf_fft[mfi];
         const Box& srcbox = srcfab.box();
- 
+
         if (srcbox.contains(bx))
         {
             // Copy the interior region (without guard cells)
@@ -109,7 +109,7 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba
     mf.setVal(0.0, 0);
     mf.ParallelAdd(mftmp);
 
-  
+
 }
 
 }
@@ -129,19 +129,6 @@ WarpX::AllocLevelDataFFT (int lev)
     FFTDomainDecomposition(lev, ba_fp_fft, dm_fp_fft, ba_valid_fp_fft[lev], domain_fp_fft[lev],
                            geom[lev].Domain());
 
-    if (fft_hybrid_mpi_decomposition == false){
-        // Allocate and initialize objects for the spectral solver
-        // (all use the same distribution mapping)
-        std::array<Real,3> dx = CellSize(lev);
-#if (AMREX_SPACEDIM == 3)
-        RealVect dx_vect(dx[0], dx[1], dx[2]);
-#elif (AMREX_SPACEDIM == 2)
-        RealVect dx_vect(dx[0], dx[2]);
-#endif
-        spectral_solver_fp[lev].reset( new SpectralSolver( ba_fp_fft, dm_fp_fft,
-                 nox_fft, noy_fft, noz_fft, do_nodal, dx_vect, dt[lev] ) );
-    }
-
     // rho2 has one extra ghost cell, so that it's safe to deposit charge density after
     // pushing particle.
 
@@ -383,105 +370,106 @@ WarpX::PushPSATD (amrex::Real a_dt)
 {
     for (int lev = 0; lev <= finest_level; ++lev) {
         AMREX_ALWAYS_ASSERT_WITH_MESSAGE(dt[lev] == a_dt, "dt must be consistent");
-        PushPSATD(lev, a_dt);
+        if (fft_hybrid_mpi_decomposition){
+            PushPSATD_hybridFFT(lev, a_dt);
+        } else {
+            PushPSATD_localFFT(lev, a_dt);
+        }
     }
 }
 
+void WarpX::PushPSATD_localFFT (int lev, amrex::Real /* dt */)
+{
+    auto& solver = *spectral_solver_fp[lev];
+
+    // Perform forward Fourier transform
+    solver.ForwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex);
+    solver.ForwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey);
+    solver.ForwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez);
+    solver.ForwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx);
+    solver.ForwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By);
+    solver.ForwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz);
+    solver.ForwardTransform(*current_fp[lev][0], SpectralFieldIndex::Jx);
+    solver.ForwardTransform(*current_fp[lev][1], SpectralFieldIndex::Jy);
+    solver.ForwardTransform(*current_fp[lev][2], SpectralFieldIndex::Jz);
+    solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_old, 0);
+    solver.ForwardTransform(*rho_fp[lev], SpectralFieldIndex::rho_new, 1);
+
+    // Advance fields in spectral space
+    solver.pushSpectralFields();
+
+    // Perform backward Fourier Transform
+    solver.BackwardTransform(*Efield_fp[lev][0], SpectralFieldIndex::Ex);
+    solver.BackwardTransform(*Efield_fp[lev][1], SpectralFieldIndex::Ey);
+    solver.BackwardTransform(*Efield_fp[lev][2], SpectralFieldIndex::Ez);
+    solver.BackwardTransform(*Bfield_fp[lev][0], SpectralFieldIndex::Bx);
+    solver.BackwardTransform(*Bfield_fp[lev][1], SpectralFieldIndex::By);
+    solver.BackwardTransform(*Bfield_fp[lev][2], SpectralFieldIndex::Bz);
+}
+
 void
-WarpX::PushPSATD (int lev, amrex::Real /* dt */)
+WarpX::PushPSATD_hybridFFT (int lev, amrex::Real /* dt */)
 {
+#ifndef AMREX_USE_CUDA // Running on CPU ; use PICSAR code for the hybrid FFT
+
     BL_PROFILE_VAR_NS("WarpXFFT::CopyDualGrid", blp_copy);
     BL_PROFILE_VAR_NS("PICSAR::FftPushEB", blp_push_eb);
 
     auto period_fp = geom[lev].periodicity();
 
     BL_PROFILE_VAR_START(blp_copy);
-    Efield_fp_fft[lev][0]->ParallelCopy(*Efield_fp[lev][0], 0, 0, 1, 0, 0, period_fp);
-    Efield_fp_fft[lev][1]->ParallelCopy(*Efield_fp[lev][1], 0, 0, 1, 0, 0, period_fp);
-    Efield_fp_fft[lev][2]->ParallelCopy(*Efield_fp[lev][2], 0, 0, 1, 0, 0, period_fp);
-    Bfield_fp_fft[lev][0]->ParallelCopy(*Bfield_fp[lev][0], 0, 0, 1, 0, 0, period_fp);
-    Bfield_fp_fft[lev][1]->ParallelCopy(*Bfield_fp[lev][1], 0, 0, 1, 0, 0, period_fp);
-    Bfield_fp_fft[lev][2]->ParallelCopy(*Bfield_fp[lev][2], 0, 0, 1, 0, 0, period_fp);
-    current_fp_fft[lev][0]->ParallelCopy(*current_fp[lev][0], 0, 0, 1, 0, 0, period_fp);
-    current_fp_fft[lev][1]->ParallelCopy(*current_fp[lev][1], 0, 0, 1, 0, 0, period_fp);
-    current_fp_fft[lev][2]->ParallelCopy(*current_fp[lev][2], 0, 0, 1, 0, 0, period_fp);
-    rho_fp_fft[lev]->ParallelCopy(*rho_fp[lev], 0, 0, 2, 0, 0, period_fp);
+    Efield_fp_fft[lev][0]->ParallelCopy(*Efield_fp[lev][0], 0, 0, 1, Efield_fp[lev][0]->nGrow(), 0, period_fp);
+    Efield_fp_fft[lev][1]->ParallelCopy(*Efield_fp[lev][1], 0, 0, 1, Efield_fp[lev][1]->nGrow(), 0, period_fp);
+    Efield_fp_fft[lev][2]->ParallelCopy(*Efield_fp[lev][2], 0, 0, 1, Efield_fp[lev][2]->nGrow(), 0, period_fp);
+    Bfield_fp_fft[lev][0]->ParallelCopy(*Bfield_fp[lev][0], 0, 0, 1, Bfield_fp[lev][0]->nGrow(), 0, period_fp);
+    Bfield_fp_fft[lev][1]->ParallelCopy(*Bfield_fp[lev][1], 0, 0, 1, Bfield_fp[lev][1]->nGrow(), 0, period_fp);
+    Bfield_fp_fft[lev][2]->ParallelCopy(*Bfield_fp[lev][2], 0, 0, 1, Bfield_fp[lev][2]->nGrow(), 0, period_fp);
+    current_fp_fft[lev][0]->ParallelCopy(*current_fp[lev][0], 0, 0, 1, current_fp[lev][0]->nGrow(), 0, period_fp);
+    current_fp_fft[lev][1]->ParallelCopy(*current_fp[lev][1], 0, 0, 1, current_fp[lev][1]->nGrow(), 0, period_fp);
+    current_fp_fft[lev][2]->ParallelCopy(*current_fp[lev][2], 0, 0, 1, current_fp[lev][2]->nGrow(), 0, period_fp);
+    rho_fp_fft[lev]->ParallelCopy(*rho_fp[lev], 0, 0, 2, rho_fp[lev]->nGrow(), 0, period_fp);
     BL_PROFILE_VAR_STOP(blp_copy);
 
     BL_PROFILE_VAR_START(blp_push_eb);
-    if (fft_hybrid_mpi_decomposition){
-#ifndef AMREX_USE_CUDA // When running on CPU: use PICSAR code
-        if (Efield_fp_fft[lev][0]->local_size() == 1)
-           //Only one FFT patch on this MPI
-        {
-        	for (MFIter mfi(*Efield_fp_fft[lev][0]); mfi.isValid(); ++mfi)
-        	{
-                    warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][0])[mfi]),
-                                      WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][1])[mfi]),
-                                      WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][2])[mfi]),
-                                      WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][0])[mfi]),
-                                      WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][1])[mfi]),
-                                      WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][2])[mfi]),
-                                      WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][0])[mfi]),
-                                      WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][1])[mfi]),
-                                      WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][2])[mfi]),
-                                      WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],0),
-                                      WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],1));
-        	}
-        }
-        else if (Efield_fp_fft[lev][0]->local_size() == 0)
-          // No FFT patch on this MPI rank
-          // Still need to call the MPI-FFT routine.
-        {
-        	FArrayBox fab(Box(IntVect::TheZeroVector(), IntVect::TheUnitVector()));
-        	warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD(fab),
-        			  WARPX_TO_FORTRAN_ANYD(fab),
-        			  WARPX_TO_FORTRAN_ANYD(fab),
-        			  WARPX_TO_FORTRAN_ANYD(fab),
-        			  WARPX_TO_FORTRAN_ANYD(fab),
-        			  WARPX_TO_FORTRAN_ANYD(fab),
-        			  WARPX_TO_FORTRAN_ANYD(fab),
-        			  WARPX_TO_FORTRAN_ANYD(fab),
-        			  WARPX_TO_FORTRAN_ANYD(fab),
-        			  WARPX_TO_FORTRAN_ANYD(fab),
-        			  WARPX_TO_FORTRAN_ANYD(fab));
-        }
-        else
-          // Multiple FFT patches on this MPI rank
-        {
-    	amrex::Abort("WarpX::PushPSATD: TODO");
-        }
-#else // AMREX_USE_CUDA is defined ; running on GPU
-        amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU.");
-#endif
-    } else {
-        // Not using the hybrid decomposition
-        auto& solver = *spectral_solver_fp[lev];
-
-        // Perform forward Fourier transform
-        solver.ForwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex);
-        solver.ForwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey);
-        solver.ForwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez);
-        solver.ForwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx);
-        solver.ForwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By);
-        solver.ForwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz);
-        solver.ForwardTransform(*current_fp_fft[lev][0], SpectralFieldIndex::Jx);
-        solver.ForwardTransform(*current_fp_fft[lev][1], SpectralFieldIndex::Jy);
-        solver.ForwardTransform(*current_fp_fft[lev][2], SpectralFieldIndex::Jz);
-        solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_old, 0);
-        solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_new, 1);
-
-        // Advance fields in spectral space
-        solver.pushSpectralFields();
-
-        // Perform backward Fourier Transform
-        solver.BackwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex);
-        solver.BackwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey);
-        solver.BackwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez);
-        solver.BackwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx);
-        solver.BackwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By);
-        solver.BackwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz);
-
+    if (Efield_fp_fft[lev][0]->local_size() == 1)
+       //Only one FFT patch on this MPI
+    {
+    	for (MFIter mfi(*Efield_fp_fft[lev][0]); mfi.isValid(); ++mfi)
+    	{
+                warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][0])[mfi]),
+                                  WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][1])[mfi]),
+                                  WARPX_TO_FORTRAN_ANYD((*Efield_fp_fft[lev][2])[mfi]),
+                                  WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][0])[mfi]),
+                                  WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][1])[mfi]),
+                                  WARPX_TO_FORTRAN_ANYD((*Bfield_fp_fft[lev][2])[mfi]),
+                                  WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][0])[mfi]),
+                                  WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][1])[mfi]),
+                                  WARPX_TO_FORTRAN_ANYD((*current_fp_fft[lev][2])[mfi]),
+                                  WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],0),
+                                  WARPX_TO_FORTRAN_N_ANYD((*rho_fp_fft[lev])[mfi],1));
+    	}
+    }
+    else if (Efield_fp_fft[lev][0]->local_size() == 0)
+      // No FFT patch on this MPI rank
+      // Still need to call the MPI-FFT routine.
+    {
+    	FArrayBox fab(Box(IntVect::TheZeroVector(), IntVect::TheUnitVector()));
+    	warpx_fft_push_eb(WARPX_TO_FORTRAN_ANYD(fab),
+    			  WARPX_TO_FORTRAN_ANYD(fab),
+    			  WARPX_TO_FORTRAN_ANYD(fab),
+    			  WARPX_TO_FORTRAN_ANYD(fab),
+    			  WARPX_TO_FORTRAN_ANYD(fab),
+    			  WARPX_TO_FORTRAN_ANYD(fab),
+    			  WARPX_TO_FORTRAN_ANYD(fab),
+    			  WARPX_TO_FORTRAN_ANYD(fab),
+    			  WARPX_TO_FORTRAN_ANYD(fab),
+    			  WARPX_TO_FORTRAN_ANYD(fab),
+    			  WARPX_TO_FORTRAN_ANYD(fab));
+    }
+    else
+      // Multiple FFT patches on this MPI rank
+    {
+	amrex::Abort("WarpX::PushPSATD: TODO");
     }
     BL_PROFILE_VAR_STOP(blp_push_eb);
 
@@ -498,7 +486,8 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */)
     {
         amrex::Abort("WarpX::PushPSATD: TODO");
     }
+#else // AMREX_USE_CUDA is defined ; running on GPU
+        amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU.");
+#endif
 
 }
-
-
diff --git a/Source/FortranInterface/WarpX_f.F90 b/Source/FortranInterface/WarpX_f.F90
index 0560a9981..542cc95a1 100644
--- a/Source/FortranInterface/WarpX_f.F90
+++ b/Source/FortranInterface/WarpX_f.F90
@@ -8,39 +8,6 @@ module warpx_module
 
 contains
 
-  subroutine warpx_copy_attribs(np, xp, yp, zp, uxp, uyp, uzp, &
-       xpold, ypold, zpold, uxpold, uypold, uzpold) &
-       bind(c,name='warpx_copy_attribs')
-    integer(c_long),  intent(in)    :: np
-    real(amrex_real), intent(in)    :: xp(np)
-    real(amrex_real), intent(in)    :: yp(np)
-    real(amrex_real), intent(in)    :: zp(np)
-    real(amrex_real), intent(in)    :: uxp(np)
-    real(amrex_real), intent(in)    :: uyp(np)
-    real(amrex_real), intent(in)    :: uzp(np)
-    real(amrex_real), intent(inout) :: xpold(np)
-    real(amrex_real), intent(inout) :: ypold(np)
-    real(amrex_real), intent(inout) :: zpold(np)
-    real(amrex_real), intent(inout) :: uxpold(np)
-    real(amrex_real), intent(inout) :: uypold(np)
-    real(amrex_real), intent(inout) :: uzpold(np)
-
-    integer n
-
-    do n = 1, np
-
-       xpold(n) = xp(n)
-       ypold(n) = yp(n)
-       zpold(n) = zp(n)
-
-       uxpold(n) = uxp(n)
-       uypold(n) = uyp(n)
-       uzpold(n) = uzp(n)
-
-    end do
-
-  end subroutine warpx_copy_attribs
-
   subroutine warpx_compute_E (lo, hi, &
        phi, phlo, phhi, &
        Ex,  Exlo, Exhi, &
diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H
index 98dd8685d..0440148eb 100644
--- a/Source/FortranInterface/WarpX_f.H
+++ b/Source/FortranInterface/WarpX_f.H
@@ -75,12 +75,6 @@ extern "C"
 {
 #endif
 
-    void warpx_copy_attribs(const long* np,
-                            const amrex_real* xp, const amrex_real* yp, const amrex_real* zp,
-                            const amrex_real* uxp, const amrex_real* uyp, const amrex_real* uzp,
-                            amrex_real* xpold, amrex_real* ypold, amrex_real* zpold,
-                            amrex_real* uxpold, amrex_real* uypold, amrex_real* uzpold);
-
 	// Charge deposition
 	void warpx_charge_deposition(amrex::Real* rho,
             const long* np,	const amrex::Real* xp, const amrex::Real* yp, const amrex::Real* zp,	const amrex::Real* w,
diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp
index 515aa1f5d..3d3447a3c 100644
--- a/Source/Laser/LaserParticleContainer.cpp
+++ b/Source/Laser/LaserParticleContainer.cpp
@@ -504,15 +504,15 @@ LaserParticleContainer::Evolve (int lev,
             // Current Deposition
             //
             // Deposit inside domains
-            DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz,
-                           0, np_current, thread_num,
-                           lev, lev, dt);
+            DepositCurrentFortran(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz,
+                                  0, np_current, thread_num,
+                                  lev, lev, dt);
             bool has_buffer = cjx;
             if (has_buffer){
                 // Deposit in buffers
-                DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz,
-                               np_current, np-np_current, thread_num,
-                               lev, lev-1, dt);
+                DepositCurrentFortran(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz,
+                                      np_current, np-np_current, thread_num,
+                                      lev, lev-1, dt);
             }
 
             //
diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H
new file mode 100644
index 000000000..efda97892
--- /dev/null
+++ b/Source/Particles/Deposition/CurrentDeposition.H
@@ -0,0 +1,179 @@
+#ifndef CURRENTDEPOSITION_H_
+#define CURRENTDEPOSITION_H_
+
+using namespace amrex;
+
+// Compute shape factor and return index of leftmost cell where 
+// particle writes.
+// Specialized templates are defined below for orders 1, 2 and 3.
+template <int depos_order>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor(Real* const sx, Real xint) {return 0;};
+
+// Compute shape factor for order 1.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor <1> (Real* const sx, Real xmid){
+    int j = (int) xmid;
+    Real xint = xmid-j;
+    sx[0] = 1.0 - xint;
+    sx[1] = xint;
+    return j;
+}
+
+// Compute shape factor for order 2.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor <2> (Real* const sx, Real xmid){
+    int j = (int) (xmid+0.5);
+    Real xint = xmid-j;
+    sx[0] = 0.5*(0.5-xint)*(0.5-xint);
+    sx[1] = 0.75-xint*xint;
+    sx[2] = 0.5*(0.5+xint)*(0.5+xint);
+    // index of the leftmost cell where particle deposits
+    return j-1;
+}
+
+// Compute shape factor for order 3.
+template <>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+int compute_shape_factor <3> (Real* const sx, Real xmid){
+    int j = (int) xmid;
+    Real xint = xmid-j;
+    sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint);
+    sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0);
+    sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint));
+    sx[3] = 1.0/6.0*xint*xint*xint;
+    // index of the leftmost cell where particle deposits
+    return j-1;
+}
+
+/* \brief Current Deposition for thread thread_num
+ * /param xp, yp, zp   : Pointer to arrays of particle positions.
+ * \param wp           : Pointer to array of particle weights.
+ * \param uxp uyp uzp  : Pointer to arrays of particle momentum.
+ * \param jx_arr       : Array4 of current density, either full array or tile.
+ * \param jy_arr       : Array4 of current density, either full array or tile.
+ * \param jz_arr       : Array4 of current density, either full array or tile.
+ * \param offset       : Index of first particle for which current is deposited
+ * \param np_to_depose : Number of particles for which current is deposited.
+                        Particles [offset,offset+np_tp_depose] deposit current.
+ * \param dt           : Time step for particle level
+ * \param dx           : 3D cell size
+ * \param xyzmin       : Physical lower bounds of domain.
+ * \param lo           : Index lower bounds of domain.
+ * \param stagger_shift: 0 if nodal, 0.5 if staggered.
+ * /param q            : species charge.
+ */
+template <int depos_order>
+void doDepositionShapeN(const Real * const xp, const Real * const yp, const Real * const zp,
+                        const Real * const wp, const Real * const uxp,
+                        const Real * const uyp, const Real * const uzp,
+                        const amrex::Array4<amrex::Real>& jx_arr, 
+                        const amrex::Array4<amrex::Real>& jy_arr, 
+                        const amrex::Array4<amrex::Real>& jz_arr,
+                        const long offset, const long np_to_depose, 
+                        const amrex::Real dt, const std::array<amrex::Real,3>& dx,
+                        const std::array<Real, 3> xyzmin,
+                        const Dim3 lo,
+                        const amrex::Real stagger_shift, 
+                        const amrex::Real q)
+{
+    const Real dxi = 1.0/dx[0];
+    const Real dzi = 1.0/dx[2];
+    const Real dts2dx = 0.5*dt*dxi;
+    const Real dts2dz = 0.5*dt*dzi;
+#if (AMREX_SPACEDIM == 2)
+    const Real invvol = dxi*dzi;
+#else // (AMREX_SPACEDIM == 3)
+    const Real dyi = 1.0/dx[1];
+    const Real dts2dy = 0.5*dt*dyi;
+    const Real invvol = dxi*dyi*dzi;
+#endif
+
+    const Real xmin = xyzmin[0];
+    const Real ymin = xyzmin[1];
+    const Real zmin = xyzmin[2];
+    const Real clightsq = 1.0/PhysConst::c/PhysConst::c;
+
+    // Loop over particles and deposit into jx_arr, jy_arr and jz_arr
+    ParallelFor( np_to_depose,
+                 [=] AMREX_GPU_DEVICE (long ip) {
+                     // --- Get particle quantities
+                     const Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq
+						       + uyp[ip]*uyp[ip]*clightsq
+						       + uzp[ip]*uzp[ip]*clightsq);
+                     const Real wq  = q*wp[ip];
+                     const Real vx  = uxp[ip]*gaminv;
+                     const Real vy  = uyp[ip]*gaminv;
+                     const Real vz  = uzp[ip]*gaminv;
+                     // wqx, wqy wqz are particle current in each direction 
+                     const Real wqx = wq*invvol*vx;
+                     const Real wqy = wq*invvol*vy;
+                     const Real wqz = wq*invvol*vz;
+
+                     // --- Compute shape factors
+                     // x direction
+                     // Get particle position after 1/2 push back in position
+                     const Real xmid = (xp[ip]-xmin)*dxi-dts2dx*vx;
+                     // Compute shape factors for node-centered quantities
+                     Real AMREX_RESTRICT sx [depos_order + 1];
+                     // j: leftmost grid point (node-centered) that the particle touches
+                     const int j  = compute_shape_factor<depos_order>(sx,  xmid);
+                     // Compute shape factors for cell-centered quantities
+                     Real AMREX_RESTRICT sx0[depos_order + 1];
+                     // j0: leftmost grid point (cell-centered) that the particle touches
+                     const int j0 = compute_shape_factor<depos_order>(sx0, xmid-stagger_shift);
+                     
+#if (AMREX_SPACEDIM == 3)
+                     // y direction
+                     const Real ymid= (yp[ip]-ymin)*dyi-dts2dy*vy;
+                     Real AMREX_RESTRICT sy [depos_order + 1];
+		     const int k  = compute_shape_factor<depos_order>(sy,  ymid);
+                     Real AMREX_RESTRICT sy0[depos_order + 1];
+		     const int k0 = compute_shape_factor<depos_order>(sy0, ymid-stagger_shift);
+#endif
+                     // z direction
+                     const Real zmid= (zp[ip]-zmin)*dzi-dts2dz*vz;
+                     Real AMREX_RESTRICT sz [depos_order + 1];
+		     const int l  = compute_shape_factor<depos_order>(sz,  zmid);
+                     Real AMREX_RESTRICT sz0[depos_order + 1];
+		     const int l0 = compute_shape_factor<depos_order>(sz0, zmid-stagger_shift);
+
+                     // Deposit current into jx_arr, jy_arr and jz_arr
+#if (AMREX_SPACEDIM == 2)
+                     for (int iz=0; iz<=depos_order; iz++){
+                         for (int ix=0; ix<=depos_order; ix++){
+                             amrex::Gpu::Atomic::Add(
+                                 &jx_arr(lo.x+j0+ix, lo.y+l +iz, 0), 
+                                 sx0[ix]*sz [iz]*wqx);
+                             amrex::Gpu::Atomic::Add(
+                                 &jy_arr(lo.x+j +ix, lo.y+l +iz, 0), 
+                                 sx [ix]*sz [iz]*wqy);
+                             amrex::Gpu::Atomic::Add(
+                                 &jz_arr(lo.x+j +ix, lo.y+l0+iz, 0), 
+                                 sx [ix]*sz0[iz]*wqz);
+                         }
+                     }
+#else // (AMREX_SPACEDIM == 3)
+                     for (int iz=0; iz<=depos_order; iz++){
+                         for (int iy=0; iy<=depos_order; iy++){
+                             for (int ix=0; ix<=depos_order; ix++){
+                                 amrex::Gpu::Atomic::Add(
+                                     &jx_arr(lo.x+j0+ix, lo.y+k +iy, lo.z+l +iz),
+                                     sx0[ix]*sy [iy]*sz [iz]*wqx);
+                                 amrex::Gpu::Atomic::Add(
+                                     &jy_arr(lo.x+j +ix, lo.y+k0+iy, lo.z+l +iz), 
+                                     sx [ix]*sy0[iy]*sz [iz]*wqy);
+                                 amrex::Gpu::Atomic::Add(
+                                     &jz_arr(lo.x+j +ix, lo.y+k +iy, lo.z+l0+iz),
+                                     sx [ix]*sy [iy]*sz0[iz]*wqz);
+                             }
+                         }
+                     }
+#endif
+                 }
+        );
+}
+
+#endif // CURRENTDEPOSITION_H_
diff --git a/Source/Particles/Deposition/Make.package b/Source/Particles/Deposition/Make.package
new file mode 100644
index 000000000..0d5ebe2a7
--- /dev/null
+++ b/Source/Particles/Deposition/Make.package
@@ -0,0 +1,3 @@
+CEXE_headers += CurrentDeposition.H
+INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Deposition
+VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Particles/Deposition
diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package
index f33095738..2038472a1 100644
--- a/Source/Particles/Make.package
+++ b/Source/Particles/Make.package
@@ -11,6 +11,7 @@ CEXE_headers += RigidInjectedParticleContainer.H
 CEXE_headers += PhysicalParticleContainer.H
 
 include $(WARPX_HOME)/Source/Particles/Pusher/Make.package
+include $(WARPX_HOME)/Source/Particles/Deposition/Make.package
 
 INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Particles
diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H
index bd8cfb47e..d55764682 100644
--- a/Source/Particles/PhysicalParticleContainer.H
+++ b/Source/Particles/PhysicalParticleContainer.H
@@ -77,6 +77,9 @@ public:
                         const amrex::MultiFab& Bx,
                         const amrex::MultiFab& By,
                         const amrex::MultiFab& Bz) override;
+                        
+    void copy_attribs(WarpXParIter& pti,const amrex::Real* xp,
+                        const amrex::Real* yp, const amrex::Real* zp);
 
     virtual void PostRestart () final {}
 
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 43b46ec49..d47a7b220 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -12,7 +12,7 @@ using namespace amrex;
 
 long PhysicalParticleContainer::
 NumParticlesToAdd(const Box& overlap_box, const RealBox& overlap_realbox,
-		  const RealBox& tile_realbox, const RealBox& particle_real_box)
+                  const RealBox& tile_realbox, const RealBox& particle_real_box)
 {
     const int lev = 0;
     const Geometry& geom = Geom(lev);
@@ -24,43 +24,43 @@ NumParticlesToAdd(const Box& overlap_box, const RealBox& overlap_realbox,
     for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv))
     {
         int fac;
-	if (do_continuous_injection) {
+        if (do_continuous_injection) {
 #if ( AMREX_SPACEDIM == 3 )
-	    Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
-	    Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
-	    Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
+            Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
+            Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
+            Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2];
 #elif ( AMREX_SPACEDIM == 2 )
-	    Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
-	    Real y = 0;
-	    Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
+            Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0];
+            Real y = 0;
+            Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1];
 #endif
-	    fac = GetRefineFac(x, y, z);
-	} else {
-	    fac = 1.0;
-	}
+            fac = GetRefineFac(x, y, z);
+        } else {
+            fac = 1.0;
+        }
 	
-	int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
-	for (int i_part=0; i_part<ref_num_ppc;i_part++) {
-	    std::array<Real, 3> r;
-	    plasma_injector->getPositionUnitBox(r, i_part, fac);
+        int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac);
+        for (int i_part=0; i_part<ref_num_ppc;i_part++) {
+            std::array<Real, 3> r;
+            plasma_injector->getPositionUnitBox(r, i_part, fac);
 #if ( AMREX_SPACEDIM == 3 )
-	    Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
-	    Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
-	    Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
+            Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
+            Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1];
+            Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2];
 #elif ( AMREX_SPACEDIM == 2 )
-	    Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
-	    Real y = 0;
-	    Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
+            Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0];
+            Real y = 0;
+            Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1];
 #endif
-	    // If the new particle is not inside the tile box,
-	    // go to the next generated particle.
+            // If the new particle is not inside the tile box,
+            // go to the next generated particle.
 #if ( AMREX_SPACEDIM == 3 )
-	    if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
+            if(!tile_realbox.contains( RealVect{x, y, z} )) continue;
 #elif ( AMREX_SPACEDIM == 2 )
-	    if(!tile_realbox.contains( RealVect{x, z} )) continue;
+            if(!tile_realbox.contains( RealVect{x, z} )) continue;
 #endif
-	    ++np;
-	}
+            ++np;
+        }
     }
     
     return np;
@@ -170,8 +170,8 @@ void PhysicalParticleContainer::MapParticletoBoostedFrame(Real& x, Real& y, Real
 
     // Move the particles to where they will be at t = 0 in the boosted frame
     if (boost_adjust_transverse_positions) {
-      x = xpr - tpr*vxpr;
-      y = ypr - tpr*vypr;
+        x = xpr - tpr*vxpr;
+        y = ypr - tpr*vypr;
     }
 
     z = zpr - tpr*vzpr;
@@ -323,9 +323,9 @@ void
 PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox)
 {
 #ifdef AMREX_USE_GPU
-  AddPlasmaGPU(lev, part_realbox);
+    AddPlasmaGPU(lev, part_realbox);
 #else
-  AddPlasmaCPU(lev, part_realbox);
+    AddPlasmaCPU(lev, part_realbox);
 #endif
 }
 
@@ -416,7 +416,7 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox)
                 // Count the number of cells in this direction in overlap_realbox
                 overlap_box.setSmall( dir, 0 );
                 overlap_box.setBig( dir,
-				    int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
+                                    int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
             }
             if (no_overlap == 1) {
                 continue; // Go to the next tile
@@ -483,54 +483,54 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox)
                     Real dens;
                     std::array<Real, 3> u;
                     if (WarpX::gamma_boost == 1.){
-                      // Lab-frame simulation
-                      // If the particle is not within the species's
-                      // xmin, xmax, ymin, ymax, zmin, zmax, go to
-                      // the next generated particle.
-                      if (!plasma_injector->insideBounds(xb, yb, z)) continue;
-                      plasma_injector->getMomentum(u, x, y, z);
-                      dens = plasma_injector->getDensity(x, y, z);
+                        // Lab-frame simulation
+                        // If the particle is not within the species's
+                        // xmin, xmax, ymin, ymax, zmin, zmax, go to
+                        // the next generated particle.
+                        if (!plasma_injector->insideBounds(xb, yb, z)) continue;
+                        plasma_injector->getMomentum(u, x, y, z);
+                        dens = plasma_injector->getDensity(x, y, z);
                     } else {
-                      // Boosted-frame simulation
-                      Real c = PhysConst::c;
-                      Real gamma_boost = WarpX::gamma_boost;
-                      Real beta_boost = WarpX::beta_boost;
-                      // Since the user provides the density distribution
-                      // at t_lab=0 and in the lab-frame coordinates,
-                      // we need to find the lab-frame position of this
-                      // particle at t_lab=0, from its boosted-frame coordinates
-                      // Assuming ballistic motion, this is given by:
-                      // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
-                      // where betaz_lab is the speed of the particle in the lab frame
-                      //
-                      // In order for this equation to be solvable, betaz_lab
-                      // is explicitly assumed to have no dependency on z0_lab
-                      plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
-                      // At this point u is the lab-frame momentum
-                      // => Apply the above formula for z0_lab
-                      Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
-                      Real betaz_lab = u[2]/gamma_lab/c;
-                      Real t = WarpX::GetInstance().gett_new(lev);
-                      Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
-                      // If the particle is not within the lab-frame zmin, zmax, etc.
-                      // go to the next generated particle.
-                      if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue;
-                      // call `getDensity` with lab-frame parameters
-                      dens = plasma_injector->getDensity(x, y, z0_lab);
-                      // At this point u and dens are the lab-frame quantities
-                      // => Perform Lorentz transform
-                      dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
-                      u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
+                        // Boosted-frame simulation
+                        Real c = PhysConst::c;
+                        Real gamma_boost = WarpX::gamma_boost;
+                        Real beta_boost = WarpX::beta_boost;
+                        // Since the user provides the density distribution
+                        // at t_lab=0 and in the lab-frame coordinates,
+                        // we need to find the lab-frame position of this
+                        // particle at t_lab=0, from its boosted-frame coordinates
+                        // Assuming ballistic motion, this is given by:
+                        // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
+                        // where betaz_lab is the speed of the particle in the lab frame
+                        //
+                        // In order for this equation to be solvable, betaz_lab
+                        // is explicitly assumed to have no dependency on z0_lab
+                        plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
+                        // At this point u is the lab-frame momentum
+                        // => Apply the above formula for z0_lab
+                        Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
+                        Real betaz_lab = u[2]/gamma_lab/c;
+                        Real t = WarpX::GetInstance().gett_new(lev);
+                        Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
+                        // If the particle is not within the lab-frame zmin, zmax, etc.
+                        // go to the next generated particle.
+                        if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue;
+                        // call `getDensity` with lab-frame parameters
+                        dens = plasma_injector->getDensity(x, y, z0_lab);
+                        // At this point u and dens are the lab-frame quantities
+                        // => Perform Lorentz transform
+                        dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
+                        u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
                     }
                     Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac));
 #ifdef WARPX_RZ
                     if (plasma_injector->radially_weighted) {
-                      weight *= 2*MathConst::pi*xb;
+                        weight *= 2*MathConst::pi*xb;
                     } else {
-                      // This is not correct since it might shift the particle
-                      // out of the local grid
-                      x = std::sqrt(xb*rmax);
-                      weight *= dx[0];
+                        // This is not correct since it might shift the particle
+                        // out of the local grid
+                        x = std::sqrt(xb*rmax);
+                        weight *= dx[0];
                     }
 #endif
                     attribs[PIdx::w ] = weight;
@@ -550,18 +550,18 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox)
                         particle_tile.push_back_real(particle_comps["uzold"], u[2]);
                     }
 
-		    AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs);
+                    AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs);
                 }
             }
 
             if (cost) {
-	        wt = (amrex::second() - wt) / tile_box.d_numPts();
+                wt = (amrex::second() - wt) / tile_box.d_numPts();
                 Array4<Real> const& costarr = cost->array(mfi);
                 amrex::ParallelFor(tile_box,
-                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-                {
-                    costarr(i,j,k) += wt;
-                });
+                                   [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+                                   {
+                                       costarr(i,j,k) += wt;
+                                   });
             }
         }
     }
@@ -655,7 +655,7 @@ PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox)
                 // Count the number of cells in this direction in overlap_realbox
                 overlap_box.setSmall( dir, 0 );
                 overlap_box.setBig( dir,
-				    int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
+                                    int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1);
             }
             if (no_overlap == 1) {
                 continue; // Go to the next tile
@@ -664,8 +664,8 @@ PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox)
             const int grid_id = mfi.index();
             const int tile_id = mfi.LocalTileIndex();
 
-	    Cuda::HostVector<ParticleType> host_particles;
-	    std::array<Cuda::HostVector<Real>, PIdx::nattribs> host_attribs;
+            Cuda::HostVector<ParticleType> host_particles;
+            std::array<Cuda::HostVector<Real>, PIdx::nattribs> host_attribs;
 	    
             // Loop through the cells of overlap_box and inject
             // the corresponding particles
@@ -725,54 +725,54 @@ PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox)
                     Real dens;
                     std::array<Real, 3> u;
                     if (WarpX::gamma_boost == 1.){
-                      // Lab-frame simulation
-                      // If the particle is not within the species's
-                      // xmin, xmax, ymin, ymax, zmin, zmax, go to
-                      // the next generated particle.
-                      if (!plasma_injector->insideBounds(xb, yb, z)) continue;
-                      plasma_injector->getMomentum(u, x, y, z);
-                      dens = plasma_injector->getDensity(x, y, z);
+                        // Lab-frame simulation
+                        // If the particle is not within the species's
+                        // xmin, xmax, ymin, ymax, zmin, zmax, go to
+                        // the next generated particle.
+                        if (!plasma_injector->insideBounds(xb, yb, z)) continue;
+                        plasma_injector->getMomentum(u, x, y, z);
+                        dens = plasma_injector->getDensity(x, y, z);
                     } else {
-                      // Boosted-frame simulation
-                      Real c = PhysConst::c;
-                      Real gamma_boost = WarpX::gamma_boost;
-                      Real beta_boost = WarpX::beta_boost;
-                      // Since the user provides the density distribution
-                      // at t_lab=0 and in the lab-frame coordinates,
-                      // we need to find the lab-frame position of this
-                      // particle at t_lab=0, from its boosted-frame coordinates
-                      // Assuming ballistic motion, this is given by:
-                      // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
-                      // where betaz_lab is the speed of the particle in the lab frame
-                      //
-                      // In order for this equation to be solvable, betaz_lab
-                      // is explicitly assumed to have no dependency on z0_lab
-                      plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
-                      // At this point u is the lab-frame momentum
-                      // => Apply the above formula for z0_lab
-                      Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
-                      Real betaz_lab = u[2]/gamma_lab/c;
-                      Real t = WarpX::GetInstance().gett_new(lev);
-                      Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
-                      // If the particle is not within the lab-frame zmin, zmax, etc.
-                      // go to the next generated particle.
-                      if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue;
-                      // call `getDensity` with lab-frame parameters
-                      dens = plasma_injector->getDensity(x, y, z0_lab);
-                      // At this point u and dens are the lab-frame quantities
-                      // => Perform Lorentz transform
-                      dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
-                      u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
+                        // Boosted-frame simulation
+                        Real c = PhysConst::c;
+                        Real gamma_boost = WarpX::gamma_boost;
+                        Real beta_boost = WarpX::beta_boost;
+                        // Since the user provides the density distribution
+                        // at t_lab=0 and in the lab-frame coordinates,
+                        // we need to find the lab-frame position of this
+                        // particle at t_lab=0, from its boosted-frame coordinates
+                        // Assuming ballistic motion, this is given by:
+                        // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) )
+                        // where betaz_lab is the speed of the particle in the lab frame
+                        //
+                        // In order for this equation to be solvable, betaz_lab
+                        // is explicitly assumed to have no dependency on z0_lab
+                        plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency
+                        // At this point u is the lab-frame momentum
+                        // => Apply the above formula for z0_lab
+                        Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) );
+                        Real betaz_lab = u[2]/gamma_lab/c;
+                        Real t = WarpX::GetInstance().gett_new(lev);
+                        Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) );
+                        // If the particle is not within the lab-frame zmin, zmax, etc.
+                        // go to the next generated particle.
+                        if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue;
+                        // call `getDensity` with lab-frame parameters
+                        dens = plasma_injector->getDensity(x, y, z0_lab);
+                        // At this point u and dens are the lab-frame quantities
+                        // => Perform Lorentz transform
+                        dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab );
+                        u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab );
                     }
                     Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac));
 #ifdef WARPX_RZ
                     if (plasma_injector->radially_weighted) {
-                      weight *= 2*MathConst::pi*xb;
+                        weight *= 2*MathConst::pi*xb;
                     } else {
-                      // This is not correct since it might shift the particle
-                      // out of the local grid
-                      x = std::sqrt(xb*rmax);
-                      weight *= dx[0];
+                        // This is not correct since it might shift the particle
+                        // out of the local grid
+                        x = std::sqrt(xb*rmax);
+                        weight *= dx[0];
                     }
 #endif
                     attribs[PIdx::w ] = weight;
@@ -793,50 +793,50 @@ PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox)
                         particle_tile.push_back_real(particle_comps["uzold"], u[2]);
                     }
 
-		    ParticleType p;
-		    p.id()  = ParticleType::NextID();
-		    p.cpu() = ParallelDescriptor::MyProc();
+                    ParticleType p;
+                    p.id()  = ParticleType::NextID();
+                    p.cpu() = ParallelDescriptor::MyProc();
 #if (AMREX_SPACEDIM == 3)
-		    p.pos(0) = x;
-		    p.pos(1) = y;
-		    p.pos(2) = z;
+                    p.pos(0) = x;
+                    p.pos(1) = y;
+                    p.pos(2) = z;
 #elif (AMREX_SPACEDIM == 2)
 #ifdef WARPX_RZ
                     attribs[PIdx::theta] = theta;
 #endif
-		    p.pos(0) = xb;
-		    p.pos(1) = z;
+                    p.pos(0) = xb;
+                    p.pos(1) = z;
 #endif
 
-		    host_particles.push_back(p);
-		    for (int kk = 0; kk < PIdx::nattribs; ++kk)
-		      host_attribs[kk].push_back(attribs[kk]);
+                    host_particles.push_back(p);
+                    for (int kk = 0; kk < PIdx::nattribs; ++kk)
+                        host_attribs[kk].push_back(attribs[kk]);
                 }
             }
 
-	    auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)];
+            auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)];
             auto old_size = particle_tile.GetArrayOfStructs().size();
             auto new_size = old_size + host_particles.size();
-	    particle_tile.resize(new_size);
+            particle_tile.resize(new_size);
 
             Cuda::thrust_copy(host_particles.begin(),
                               host_particles.end(),
                               particle_tile.GetArrayOfStructs().begin() + old_size);
 
-	    for (int kk = 0; kk < PIdx::nattribs; ++kk) {
+            for (int kk = 0; kk < PIdx::nattribs; ++kk) {
                 Cuda::thrust_copy(host_attribs[kk].begin(),
                                   host_attribs[kk].end(),
                                   particle_tile.GetStructOfArrays().GetRealData(kk).begin() + old_size);
-	    }
+            }
 	    			 
             if (cost) {
-	        wt = (amrex::second() - wt) / tile_box.d_numPts();
+                wt = (amrex::second() - wt) / tile_box.d_numPts();
                 Array4<Real> const& costarr = cost->array(mfi);
                 amrex::ParallelFor(tile_box,
-                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-                {
-                    costarr(i,j,k) += wt;
-                });
+                                   [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+                                   {
+                                       costarr(i,j,k) += wt;
+                                   });
             }
         }		
     }
@@ -963,13 +963,13 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>,
                 WRPX_INTERPOLATE_CIC(particles.dataPtr(), nstride, np,
                                      Exp.dataPtr(), Eyp.dataPtr(),
 #if AMREX_SPACEDIM == 3
-                Ezp.dataPtr(),
+                                     Ezp.dataPtr(),
 #endif
-                                exfab.dataPtr(), eyfab.dataPtr(),
+                                     exfab.dataPtr(), eyfab.dataPtr(),
 #if AMREX_SPACEDIM == 3
-                                ezfab.dataPtr(),
+                                     ezfab.dataPtr(),
 #endif
-                                box.loVect(), box.hiVect(), plo, dx, &ng);
+                                     box.loVect(), box.hiVect(), plo, dx, &ng);
             } else {
 
                 const FArrayBox& exfab_coarse = coarse_Ex[pti];
@@ -1004,7 +1004,7 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>,
 
 void
 PhysicalParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<MultiFab>, 3> >& E,
-                                           Vector<std::unique_ptr<MultiFab> >& rho,
+                                     Vector<std::unique_ptr<MultiFab> >& rho,
                                      Real t, Real dt)
 {
     BL_PROFILE("PPC::EvolveES()");
@@ -1014,7 +1014,7 @@ PhysicalParticleContainer::EvolveES (const Vector<std::array<std::unique_ptr<Mul
         BL_ASSERT(OnSameGrids(lev, *rho[lev]));
         const auto& gm = m_gdb->Geom(lev);
         const RealBox& prob_domain = gm.ProbDomain();
-	for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) {
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) {
             // Particle structs
             auto& particles = pti.GetArrayOfStructs();
             int nstride = particles.dataShape().first;
@@ -1071,11 +1071,11 @@ PhysicalParticleContainer::FieldGather (int lev,
     {
         Cuda::ManagedDeviceVector<Real> xp, yp, zp;
 
-	for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
-	{
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+        {
             Real wt = amrex::second();
 
-	    const Box& box = pti.validbox();
+            const Box& box = pti.validbox();
 
             auto& attribs = pti.GetAttribs();
 
@@ -1088,63 +1088,63 @@ PhysicalParticleContainer::FieldGather (int lev,
 
             const long np = pti.numParticles();
 
-	    // Data on the grid
-	    const FArrayBox& exfab = Ex[pti];
-	    const FArrayBox& eyfab = Ey[pti];
-	    const FArrayBox& ezfab = Ez[pti];
-	    const FArrayBox& bxfab = Bx[pti];
-	    const FArrayBox& byfab = By[pti];
-	    const FArrayBox& bzfab = Bz[pti];
-
-	    Exp.assign(np,0.0);
-	    Eyp.assign(np,0.0);
-	    Ezp.assign(np,0.0);
-	    Bxp.assign(np,0.0);
-	    Byp.assign(np,0.0);
-	    Bzp.assign(np,0.0);
-
-	    //
-	    // copy data from particle container to temp arrays
-	    //
+            // Data on the grid
+            const FArrayBox& exfab = Ex[pti];
+            const FArrayBox& eyfab = Ey[pti];
+            const FArrayBox& ezfab = Ez[pti];
+            const FArrayBox& bxfab = Bx[pti];
+            const FArrayBox& byfab = By[pti];
+            const FArrayBox& bzfab = Bz[pti];
+
+            Exp.assign(np,0.0);
+            Eyp.assign(np,0.0);
+            Ezp.assign(np,0.0);
+            Bxp.assign(np,0.0);
+            Byp.assign(np,0.0);
+            Bzp.assign(np,0.0);
+
+            //
+            // copy data from particle container to temp arrays
+            //
             pti.GetPosition(xp, yp, zp);
 
             const std::array<Real,3>& xyzmin = WarpX::LowerCorner(box, lev);
             const int* ixyzmin = box.loVect();
 
-	    //
-	    // Field Gather
-	    //
-	    const int ll4symtry          = false;
+            //
+            // Field Gather
+            //
+            const int ll4symtry          = false;
             long lvect_fieldgathe = 64;
-	    warpx_geteb_energy_conserving(
-	       &np,
-               xp.dataPtr(),
-               yp.dataPtr(),
-               zp.dataPtr(),
-	       Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
-	       Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
-               ixyzmin,
-               &xyzmin[0], &xyzmin[1], &xyzmin[2],
-               &dx[0], &dx[1], &dx[2],
-	       &WarpX::nox, &WarpX::noy, &WarpX::noz,
-               BL_TO_FORTRAN_ANYD(exfab),
-	       BL_TO_FORTRAN_ANYD(eyfab),
-	       BL_TO_FORTRAN_ANYD(ezfab),
-               BL_TO_FORTRAN_ANYD(bxfab),
-	       BL_TO_FORTRAN_ANYD(byfab),
-	       BL_TO_FORTRAN_ANYD(bzfab),
-	       &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
-	       &lvect_fieldgathe, &WarpX::field_gathering_algo);
+            warpx_geteb_energy_conserving(
+                &np,
+                xp.dataPtr(),
+                yp.dataPtr(),
+                zp.dataPtr(),
+                Exp.dataPtr(),Eyp.dataPtr(),Ezp.dataPtr(),
+                Bxp.dataPtr(),Byp.dataPtr(),Bzp.dataPtr(),
+                ixyzmin,
+                &xyzmin[0], &xyzmin[1], &xyzmin[2],
+                &dx[0], &dx[1], &dx[2],
+                &WarpX::nox, &WarpX::noy, &WarpX::noz,
+                BL_TO_FORTRAN_ANYD(exfab),
+                BL_TO_FORTRAN_ANYD(eyfab),
+                BL_TO_FORTRAN_ANYD(ezfab),
+                BL_TO_FORTRAN_ANYD(bxfab),
+                BL_TO_FORTRAN_ANYD(byfab),
+                BL_TO_FORTRAN_ANYD(bzfab),
+                &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal,
+                &lvect_fieldgathe, &WarpX::field_gathering_algo);
 
             if (cost) {
                 const Box& tbx = pti.tilebox();
                 wt = (amrex::second() - wt) / tbx.d_numPts();
                 Array4<Real> const& costarr = cost->array(pti);
                 amrex::ParallelFor(tbx,
-                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-                {
-                    costarr(i,j,k) += wt;
-                });
+                                   [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+                                   {
+                                       costarr(i,j,k) += wt;
+                                   });
             }
         }
     }
@@ -1152,9 +1152,9 @@ PhysicalParticleContainer::FieldGather (int lev,
 
 void
 PhysicalParticleContainer::Evolve (int lev,
-				   const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
-				   const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz,
-				   MultiFab& jx, MultiFab& jy, MultiFab& jz,
+                                   const MultiFab& Ex, const MultiFab& Ey, const MultiFab& Ez,
+                                   const MultiFab& Bx, const MultiFab& By, const MultiFab& Bz,
+                                   MultiFab& jx, MultiFab& jy, MultiFab& jz,
                                    MultiFab* cjx, MultiFab* cjy, MultiFab* cjz,
                                    MultiFab* rho, MultiFab* crho,
                                    const MultiFab* cEx, const MultiFab* cEy, const MultiFab* cEz,
@@ -1200,8 +1200,8 @@ PhysicalParticleContainer::Evolve (int lev,
         RealVector tmp;
         ParticleVector particle_tmp;
 
-	for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
-	{
+        for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
+        {
             Real wt = amrex::second();
 
             const Box& box = pti.validbox();
@@ -1282,14 +1282,14 @@ PhysicalParticleContainer::Evolve (int lev,
 #endif
             }
 
-	    Exp.assign(np,0.0);
-	    Eyp.assign(np,0.0);
-	    Ezp.assign(np,0.0);
-	    Bxp.assign(np,WarpX::B_external[0]);
-	    Byp.assign(np,WarpX::B_external[1]);
-	    Bzp.assign(np,WarpX::B_external[2]);
+            Exp.assign(np,0.0);
+            Eyp.assign(np,0.0);
+            Ezp.assign(np,0.0);
+            Bxp.assign(np,WarpX::B_external[0]);
+            Byp.assign(np,WarpX::B_external[1]);
+            Bzp.assign(np,WarpX::B_external[2]);
 
-	    m_giv[thread_num].resize(np);
+            m_giv[thread_num].resize(np);
 
             long nfine_current = np;
             long nfine_gather = np;
@@ -1384,12 +1384,12 @@ PhysicalParticleContainer::Evolve (int lev,
 
             const long np_current = (cjx) ? nfine_current : np;
             
-	    //
-	    // copy data from particle container to temp arrays
-	    //
-	    BL_PROFILE_VAR_START(blp_copy);
+            //
+            // copy data from particle container to temp arrays
+            //
+            BL_PROFILE_VAR_START(blp_copy);
             pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
-	    BL_PROFILE_VAR_STOP(blp_copy);
+            BL_PROFILE_VAR_STOP(blp_copy);
 
             if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev);
             
@@ -1529,17 +1529,30 @@ PhysicalParticleContainer::Evolve (int lev,
                 //
                 // Current Deposition
                 //
-                // Deposit inside domains
-                DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz,
-                               0, np_current, thread_num,
-                               lev, lev, dt);
-                if (has_buffer){
-                    // Deposit in buffers
-                    DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz,
-                                   np_current, np-np_current, thread_num,
-                                   lev, lev-1, dt);
+                if (WarpX::use_picsar_deposition) {
+                    // Deposit inside domains
+                    DepositCurrentFortran(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz,
+                                          0, np_current, thread_num,
+                                          lev, lev, dt);
+                    if (has_buffer){
+                        // Deposit in buffers
+                        DepositCurrentFortran(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz,
+                                              np_current, np-np_current, thread_num,
+                                              lev, lev-1, dt);
+                    }
+                } else {
+                    // Deposit inside domains
+                    DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz,
+                                   0, np_current, thread_num,
+                                   lev, lev, dt);
+                    if (has_buffer){
+                        // Deposit in buffers
+                        DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz,
+                                       np_current, np-np_current, thread_num,
+                                       lev, lev-1, dt);
+                    }
                 }
-  
+
                 //
                 // copy particle data back
                 //
@@ -1555,10 +1568,10 @@ PhysicalParticleContainer::Evolve (int lev,
                 wt = (amrex::second() - wt) / tbx.d_numPts();
                 Array4<Real> const& costarr = cost->array(pti);
                 amrex::ParallelFor(tbx,
-                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
-                {
-                    costarr(i,j,k) += wt;
-                });
+                                   [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+                                   {
+                                       costarr(i,j,k) += wt;
+                                   });
             }
         }
     }
@@ -1590,9 +1603,9 @@ PhysicalParticleContainer::SplitParticles(int lev)
     {
         pti.GetPosition(xp, yp, zp);
         const std::array<Real,3>& dx = WarpX::CellSize(lev);
-	// particle Array Of Structs data
+        // particle Array Of Structs data
         auto& particles = pti.GetArrayOfStructs();
-	// particle Struct Of Arrays data
+        // particle Struct Of Arrays data
         auto& attribs = pti.GetAttribs();
         auto& wp  = attribs[PIdx::w ];
         auto& uxp = attribs[PIdx::ux];
@@ -1602,13 +1615,13 @@ PhysicalParticleContainer::SplitParticles(int lev)
         for(int i=0; i<np; i++){
             auto& p = particles[i];
             if (p.id() == DoSplitParticleID){
-		// If particle is tagged, split it and put the 
-		// split particles in local arrays psplit_x etc.
+                // If particle is tagged, split it and put the 
+                // split particles in local arrays psplit_x etc.
                 np_split_to_add += np_split;
 #if (AMREX_SPACEDIM==2)
                 if (split_type==0){
-		    // Split particle in two along each axis
-		    // 4 particles in 2d
+                    // Split particle in two along each axis
+                    // 4 particles in 2d
                     for (int ishift = -1; ishift < 2; ishift +=2 ){
                         for (int kshift = -1; kshift < 2; kshift +=2 ){
                             // Add one particle with offset in x and z
@@ -1622,8 +1635,8 @@ PhysicalParticleContainer::SplitParticles(int lev)
                         }
                     }
                 } else {
-		    // Split particle in two along each diagonal
-		    // 4 particles in 2d
+                    // Split particle in two along each diagonal
+                    // 4 particles in 2d
                     for (int ishift = -1; ishift < 2; ishift +=2 ){
                         // Add one particle with offset in x
                         psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
@@ -1644,26 +1657,26 @@ PhysicalParticleContainer::SplitParticles(int lev)
                     }
                 }
 #elif (AMREX_SPACEDIM==3)
-		if (split_type==0){
-		    // Split particle in two along each axis
-		    // 6 particles in 2d
-		    for (int ishift = -1; ishift < 2; ishift +=2 ){
-			for (int jshift = -1; jshift < 2; jshift +=2 ){
-			    for (int kshift = -1; kshift < 2; kshift +=2 ){
-				// Add one particle with offset in x, y and z
-				psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
-				psplit_y.push_back( yp[i] + jshift*dx[1]/2 );
-				psplit_z.push_back( zp[i] + jshift*dx[2]/2 );
-				psplit_ux.push_back( uxp[i] );
-				psplit_uy.push_back( uyp[i] );
-				psplit_uz.push_back( uzp[i] );
-				psplit_w.push_back( wp[i]/np_split );
-			    }
-			}
-		    }
-		} else {
-		    // Split particle in two along each diagonal
-		    // 8 particles in 3d
+                if (split_type==0){
+                    // Split particle in two along each axis
+                    // 6 particles in 2d
+                    for (int ishift = -1; ishift < 2; ishift +=2 ){
+                        for (int jshift = -1; jshift < 2; jshift +=2 ){
+                            for (int kshift = -1; kshift < 2; kshift +=2 ){
+                                // Add one particle with offset in x, y and z
+                                psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
+                                psplit_y.push_back( yp[i] + jshift*dx[1]/2 );
+                                psplit_z.push_back( zp[i] + jshift*dx[2]/2 );
+                                psplit_ux.push_back( uxp[i] );
+                                psplit_uy.push_back( uyp[i] );
+                                psplit_uz.push_back( uzp[i] );
+                                psplit_w.push_back( wp[i]/np_split );
+                            }
+                        }
+                    }
+                } else {
+                    // Split particle in two along each diagonal
+                    // 8 particles in 3d
                     for (int ishift = -1; ishift < 2; ishift +=2 ){
                         // Add one particle with offset in x
                         psplit_x.push_back( xp[i] + ishift*dx[0]/2 );
@@ -1690,9 +1703,9 @@ PhysicalParticleContainer::SplitParticles(int lev)
                         psplit_uz.push_back( uzp[i] );
                         psplit_w.push_back( wp[i]/np_split );
                     }
-		}
+                }
 #endif
-		// invalidate the particle
+                // invalidate the particle
                 p.m_idata.id = -p.m_idata.id;
             }
         }
@@ -1704,16 +1717,16 @@ PhysicalParticleContainer::SplitParticles(int lev)
 	// AddNParticles calls Redistribute, so that particles
 	// in pctmp_split are in the proper grids and tiles
 	pctmp_split.AddNParticles(lev, 
-                                  np_split_to_add,
-                                  psplit_x.dataPtr(),
-                                  psplit_y.dataPtr(),
-                                  psplit_z.dataPtr(),
-                                  psplit_ux.dataPtr(),
-                                  psplit_uy.dataPtr(),
-                                  psplit_uz.dataPtr(),
-                                  1,
-                                  psplit_w.dataPtr(),
-                                  1, NoSplitParticleID);
+                              np_split_to_add,
+                              psplit_x.dataPtr(),
+                              psplit_y.dataPtr(),
+                              psplit_z.dataPtr(),
+                              psplit_ux.dataPtr(),
+                              psplit_uy.dataPtr(),
+                              psplit_uz.dataPtr(),
+                              1,
+                              psplit_w.dataPtr(),
+                              1, NoSplitParticleID);
 	// Copy particles from tmp to current particle container
     addParticles(pctmp_split,1);
 	// Clear tmp container
@@ -1722,14 +1735,20 @@ PhysicalParticleContainer::SplitParticles(int lev)
 
 void
 PhysicalParticleContainer::PushPX(WarpXParIter& pti,
-	                          Cuda::ManagedDeviceVector<Real>& xp,
+                                  Cuda::ManagedDeviceVector<Real>& xp,
                                   Cuda::ManagedDeviceVector<Real>& yp,
                                   Cuda::ManagedDeviceVector<Real>& zp,
                                   Cuda::ManagedDeviceVector<Real>& giv,
                                   Real dt)
 {
 
-    // This wraps the call to warpx_particle_pusher so that inheritors can modify the call.
+    if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
+    {
+        copy_attribs(pti, xp.dataPtr(), yp.dataPtr(), zp.dataPtr());
+    }
+
+    // The following attributes should be included in CPP version of warpx_particle_pusher
+	// This wraps the call to warpx_particle_pusher so that inheritors can modify the call.
     auto& attribs = pti.GetAttribs();
     auto& uxp = attribs[PIdx::ux];
     auto& uyp = attribs[PIdx::uy];
@@ -1741,22 +1760,7 @@ PhysicalParticleContainer::PushPX(WarpXParIter& pti,
     auto& Byp = attribs[PIdx::By];
     auto& Bzp = attribs[PIdx::Bz];
     const long np  = pti.numParticles();
-
-    if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
-    {
-        auto& xpold    = pti.GetAttribs(particle_comps["xold"]);
-        auto& ypold    = pti.GetAttribs(particle_comps["yold"]);
-        auto& zpold    = pti.GetAttribs(particle_comps["zold"]);
-        auto& uxpold   = pti.GetAttribs(particle_comps["uxold"]);
-        auto& uypold   = pti.GetAttribs(particle_comps["uyold"]);
-        auto& uzpold   = pti.GetAttribs(particle_comps["uzold"]);
-
-        warpx_copy_attribs(&np, xp.dataPtr(), yp.dataPtr(), zp.dataPtr(),
-                           uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
-                           xpold.dataPtr(), ypold.dataPtr(), zpold.dataPtr(),
-                           uxpold.dataPtr(), uypold.dataPtr(), uzpold.dataPtr());
-    }
-
+    
     warpx_particle_pusher(&np,
                           xp.dataPtr(),
                           yp.dataPtr(),
@@ -1791,8 +1795,8 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
         int thread_num = 0;
 #endif      
         for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
-	{
-	    const Box& box = pti.validbox();
+        {
+            const Box& box = pti.validbox();
 
             auto& attribs = pti.GetAttribs();
 
@@ -1808,26 +1812,26 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
 
             const long np = pti.numParticles();
 
-	    // Data on the grid
-	    const FArrayBox& exfab = Ex[pti];
-	    const FArrayBox& eyfab = Ey[pti];
-	    const FArrayBox& ezfab = Ez[pti];
-	    const FArrayBox& bxfab = Bx[pti];
-	    const FArrayBox& byfab = By[pti];
-	    const FArrayBox& bzfab = Bz[pti];
-
-	    Exp.assign(np,0.0);
-	    Eyp.assign(np,0.0);
-	    Ezp.assign(np,0.0);
-	    Bxp.assign(np,WarpX::B_external[0]);
-	    Byp.assign(np,WarpX::B_external[1]);
-	    Bzp.assign(np,WarpX::B_external[2]);
-
-	    m_giv[thread_num].resize(np);
-
-	    //
-	    // copy data from particle container to temp arrays
-	    //
+            // Data on the grid
+            const FArrayBox& exfab = Ex[pti];
+            const FArrayBox& eyfab = Ey[pti];
+            const FArrayBox& ezfab = Ez[pti];
+            const FArrayBox& bxfab = Bx[pti];
+            const FArrayBox& byfab = By[pti];
+            const FArrayBox& bzfab = Bz[pti];
+
+            Exp.assign(np,0.0);
+            Eyp.assign(np,0.0);
+            Ezp.assign(np,0.0);
+            Bxp.assign(np,WarpX::B_external[0]);
+            Byp.assign(np,WarpX::B_external[1]);
+            Bzp.assign(np,WarpX::B_external[2]);
+
+            m_giv[thread_num].resize(np);
+
+            //
+            // copy data from particle container to temp arrays
+            //
             pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
 
             const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev);
@@ -1870,6 +1874,38 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
     }
 }
 
+void PhysicalParticleContainer::copy_attribs(WarpXParIter& pti,const Real* xp,
+                                             const Real* yp, const Real* zp)
+{
+
+    auto& attribs = pti.GetAttribs();
+    
+    Real* AMREX_RESTRICT uxp = attribs[PIdx::ux].dataPtr();
+    Real* AMREX_RESTRICT uyp = attribs[PIdx::uy].dataPtr();
+    Real* AMREX_RESTRICT uzp = attribs[PIdx::uz].dataPtr();
+    
+    Real* AMREX_RESTRICT xpold = pti.GetAttribs(particle_comps["xold"]).dataPtr();
+    Real* AMREX_RESTRICT ypold = pti.GetAttribs(particle_comps["yold"]).dataPtr();
+    Real* AMREX_RESTRICT zpold = pti.GetAttribs(particle_comps["zold"]).dataPtr();
+    Real* AMREX_RESTRICT uxpold = pti.GetAttribs(particle_comps["uxold"]).dataPtr();
+    Real* AMREX_RESTRICT uypold = pti.GetAttribs(particle_comps["uyold"]).dataPtr();
+    Real* AMREX_RESTRICT uzpold = pti.GetAttribs(particle_comps["uzold"]).dataPtr();
+    
+    const long np = pti.numParticles();
+    
+    ParallelFor( np,
+                 [=] AMREX_GPU_DEVICE (long i) {
+                     xpold[i]=xp[i];
+                     ypold[i]=yp[i];
+                     zpold[i]=zp[i];
+            
+                     uxpold[i]=uxp[i];
+                     uypold[i]=uyp[i];
+                     uzpold[i]=uzp[i];
+                 }
+        );
+}
+
 void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real z_old,
                                                  const Real z_new, const Real t_boost,
                                                  const Real t_lab, const Real dt,
diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp
index 2a3e8dd0d..9bd4cb4fc 100644
--- a/Source/Particles/RigidInjectedParticleContainer.cpp
+++ b/Source/Particles/RigidInjectedParticleContainer.cpp
@@ -211,6 +211,11 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
                                        Real dt)
 {
 
+    if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
+    {
+        copy_attribs(pti, xp.dataPtr(), yp.dataPtr(), zp.dataPtr());
+    }
+    
     // This wraps the call to warpx_particle_pusher so that inheritors can modify the call.
     auto& attribs = pti.GetAttribs();
     auto& uxp = attribs[PIdx::ux];
@@ -224,21 +229,6 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
     auto& Bzp = attribs[PIdx::Bz];
     const long np  = pti.numParticles();
 
-    if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
-    {
-        auto& xpold    = pti.GetAttribs(particle_comps["xold"]);
-        auto& ypold    = pti.GetAttribs(particle_comps["yold"]);
-        auto& zpold    = pti.GetAttribs(particle_comps["zold"]);
-        auto& uxpold   = pti.GetAttribs(particle_comps["uxold"]);
-        auto& uypold   = pti.GetAttribs(particle_comps["uyold"]);
-        auto& uzpold   = pti.GetAttribs(particle_comps["uzold"]);
-
-        warpx_copy_attribs(&np, xp.dataPtr(), yp.dataPtr(), zp.dataPtr(),
-                           uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
-                           xpold.dataPtr(), ypold.dataPtr(), zpold.dataPtr(),
-                           uxpold.dataPtr(), uypold.dataPtr(), uzpold.dataPtr());
-    }
-
     // Save the position and momenta, making copies
     Cuda::ManagedDeviceVector<Real> xp_save, yp_save, zp_save;
     RealVector uxp_save, uyp_save, uzp_save;
diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H
index 0e800bf1d..662b2e1b8 100644
--- a/Source/Particles/WarpXParticleContainer.H
+++ b/Source/Particles/WarpXParticleContainer.H
@@ -189,6 +189,21 @@ public:
                                 int depos_lev,
                                 amrex::Real dt);
 
+    virtual void DepositCurrentFortran(WarpXParIter& pti,
+                                       RealVector& wp,
+                                       RealVector& uxp,
+                                       RealVector& uyp,
+                                       RealVector& uzp,
+                                       amrex::MultiFab* jx,
+                                       amrex::MultiFab* jy,
+                                       amrex::MultiFab* jz,
+                                       const long offset,
+                                       const long np_to_depose,
+                                       int thread_num,
+                                       int lev,
+                                       int depos_lev,
+                                       amrex::Real dt);
+
     // If particles start outside of the domain, ContinuousInjection 
     // makes sure that they are initialized when they enter the domain, and 
     // NOT before. Virtual function, overriden by derived classes. 
@@ -253,8 +268,8 @@ public:
         AddIntComp(comm);
     }
 
-    int DoBoostedFrameDiags () const { return do_boosted_frame_diags; }
-    
+    int DoBoostedFrameDiags () const { return do_boosted_frame_diags; }    
+
 protected:
 
     std::map<std::string, int> particle_comps;
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 317f46fd4..7316dcc95 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -10,6 +10,7 @@
 // Import low-level single-particle kernels
 #include <GetAndSetPosition.H>
 #include <UpdatePosition.H>
+#include <CurrentDeposition.H>
 
 using namespace amrex;
 
@@ -279,7 +280,7 @@ WarpXParticleContainer::AddNParticles (int lev,
     Redistribute();
 }
 
-/* \brief Current Deposition for thread thread_num
+/* \brief Current Deposition for thread thread_num using PICSAR
  * \param pti         : Particle iterator
  * \param wp          : Array of particle weights
  * \param uxp uyp uzp : Array of particle
@@ -292,16 +293,15 @@ WarpXParticleContainer::AddNParticles (int lev,
  * \param lev         : Level of box that contains particles
  * \param depos_lev   : Level on which particles deposit (if buffers are used)
  * \param dt          : Time step for particle level
- * \param ngJ         : Number of ghosts cells (of lev)
  */
 void
-WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
-                                       RealVector& wp, RealVector& uxp,
-                                       RealVector& uyp, RealVector& uzp,
-                                       MultiFab* jx, MultiFab* jy, MultiFab* jz,
-                                       const long offset, const long np_to_depose,
-                                       int thread_num, int lev, int depos_lev,
-                                       Real dt)
+WarpXParticleContainer::DepositCurrentFortran(WarpXParIter& pti,
+                                              RealVector& wp, RealVector& uxp,
+                                              RealVector& uyp, RealVector& uzp,
+                                              MultiFab* jx, MultiFab* jy, MultiFab* jz,
+                                              const long offset, const long np_to_depose,
+                                              int thread_num, int lev, int depos_lev,
+                                              Real dt)
 {
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) ||
                                      (depos_lev==(lev  )),
@@ -414,6 +414,130 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
 #endif
 }
 
+/* \brief Current Deposition for thread thread_num
+ * \param pti         : Particle iterator
+ * \param wp          : Array of particle weights
+ * \param uxp uyp uzp : Array of particle
+ * \param jx jy jz    : Full array of current density
+ * \param offset      : Index of first particle for which current is deposited
+ * \param np_to_depose: Number of particles for which current is deposited.
+                        Particles [offset,offset+np_tp_depose] deposit current
+ * \param thread_num  : Thread number (if tiling)
+ * \param lev         : Level of box that contains particles
+ * \param depos_lev   : Level on which particles deposit (if buffers are used)
+ * \param dt          : Time step for particle level
+ */
+void
+WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
+                                       RealVector& wp, RealVector& uxp,
+                                       RealVector& uyp, RealVector& uzp,
+                                       MultiFab* jx, MultiFab* jy, MultiFab* jz,
+                                       const long offset, const long np_to_depose,
+                                       int thread_num, int lev, int depos_lev,
+                                       Real dt)
+{    
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) ||
+                                     (depos_lev==(lev  )),
+                                     "Deposition buffers only work for lev-1");
+    // If no particles, do not do anything
+    if (np_to_depose == 0) return;
+
+    const long ngJ = jx->nGrow();
+    const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0));
+    int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal();
+    Real q = this->charge;
+    const Real stagger_shift = j_is_nodal ? 0.0 : 0.5;
+
+    BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
+
+    // Get tile box where current is deposited.
+    // The tile box is different when depositing in the buffers (depos_lev<lev)
+    // or when depositing inside the level (depos_lev=lev)
+    Box tilebox;
+    if (lev == depos_lev) {
+        tilebox = pti.tilebox();
+    } else {
+        const IntVect& ref_ratio = WarpX::RefRatio(depos_lev);
+        tilebox = amrex::coarsen(pti.tilebox(),ref_ratio);
+    }
+    
+    // Staggered tile boxes (different in each direction)
+    Box tbx = convert(tilebox, WarpX::jx_nodal_flag);
+    Box tby = convert(tilebox, WarpX::jy_nodal_flag);
+    Box tbz = convert(tilebox, WarpX::jz_nodal_flag);
+    tilebox.grow(ngJ);
+
+#ifdef AMREX_USE_GPU
+    // No tiling on GPU: jx_ptr points to the full
+    // jx array (same for jy_ptr and jz_ptr).
+    Array4<Real> const& jx_arr = jx->array(pti);
+    Array4<Real> const& jy_arr = jy->array(pti);
+    Array4<Real> const& jz_arr = jz->array(pti);
+#else
+    // Tiling is on: jx_ptr points to local_jx[thread_num]
+    // (same for jy_ptr and jz_ptr)
+    tbx.grow(ngJ);
+    tby.grow(ngJ);
+    tbz.grow(ngJ);
+
+    local_jx[thread_num].resize(tbx);
+    local_jy[thread_num].resize(tby);
+    local_jz[thread_num].resize(tbz);
+
+    // local_jx[thread_num] is set to zero
+    local_jx[thread_num].setVal(0.0);
+    local_jy[thread_num].setVal(0.0);
+    local_jz[thread_num].setVal(0.0);
+
+    Array4<Real> const& jx_arr = local_jx[thread_num].array();
+    Array4<Real> const& jy_arr = local_jy[thread_num].array();
+    Array4<Real> const& jz_arr = local_jz[thread_num].array();
+#endif
+    // GPU, no tiling: deposit directly in jx
+    // CPU, tiling: deposit into local_jx
+    // (same for jx and jz)
+
+    Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
+    Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
+    Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
+
+    // Lower corner of tile box physical domain
+    const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);;
+    // xyzmin is built on pti.tilebox(), so it does 
+    // not include staggering, so the stagger_shift has to be done by hand.
+    // Alternatively, we could define xyzminx from tbx (and the same for 3 
+    // directions and for jx, jy, jz). This way, sx0 would not be needed.
+    // Better for memory? worth trying?    
+    const Dim3 lo = lbound(tilebox);
+
+    if        (WarpX::nox == 1){
+        doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), 
+                              uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, 
+                              jz_arr, offset, np_to_depose, dt, dx,
+                              xyzmin, lo, stagger_shift, q);
+    } else if (WarpX::nox == 2){
+        doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), 
+                              uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, 
+                              jz_arr, offset, np_to_depose, dt, dx,
+                              xyzmin, lo, stagger_shift, q);
+    } else if (WarpX::nox == 3){
+        doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), 
+                              uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, 
+                              jz_arr, offset, np_to_depose, dt, dx,
+                              xyzmin, lo, stagger_shift, q);
+    }
+
+#ifndef AMREX_USE_GPU
+    BL_PROFILE_VAR_START(blp_accumulate);
+    // CPU, tiling: atomicAdd local_jx into jx
+    // (same for jx and jz)
+    (*jx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1);
+    (*jy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1);
+    (*jz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1);
+    BL_PROFILE_VAR_STOP(blp_accumulate);
+#endif
+}
+
 void
 WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
                                         MultiFab* rhomf, MultiFab* crhomf, int icomp,
@@ -932,3 +1056,5 @@ WarpXParticleContainer::particlePostLocate(ParticleType& p,
     if (pld.m_lev == lev-1){
     }
 }
+
+
diff --git a/Source/Python/WarpXWrappers.cpp b/Source/Python/WarpXWrappers.cpp
index 3c1a930b3..3ed4830f5 100644
--- a/Source/Python/WarpXWrappers.cpp
+++ b/Source/Python/WarpXWrappers.cpp
@@ -10,22 +10,26 @@
 
 namespace 
 {
-    double** getMultiFabPointers(const amrex::MultiFab& mf, int *num_boxes, int *ngrow, int **shapes)
+    double** getMultiFabPointers(const amrex::MultiFab& mf, int *num_boxes, int *ncomps, int *ngrow, int **shapes)
     {
+        *ncomps = mf.nComp();
         *ngrow = mf.nGrow();
         *num_boxes = mf.local_size();
-        *shapes = (int*) malloc(AMREX_SPACEDIM * (*num_boxes) * sizeof(int));
+        int shapesize = AMREX_SPACEDIM;
+        if (mf.nComp() > 1) shapesize += 1;
+        *shapes = (int*) malloc(shapesize * (*num_boxes) * sizeof(int));
         double** data = (double**) malloc((*num_boxes) * sizeof(double*));
         
-        int i = 0;
 #ifdef _OPENMP
 #pragma omp parallel
 #endif
-        for ( amrex::MFIter mfi(mf, false); mfi.isValid(); ++mfi, ++i ) {
+        for ( amrex::MFIter mfi(mf, false); mfi.isValid(); ++mfi ) {
+            int i = mfi.LocalIndex();
             data[i] = (double*) mf[mfi].dataPtr();
             for (int j = 0; j < AMREX_SPACEDIM; ++j) {
-                (*shapes)[AMREX_SPACEDIM*i+j] = mf[mfi].box().length(j); 
+                (*shapes)[shapesize*i+j] = mf[mfi].box().length(j); 
             }
+            if (mf.nComp() > 1) (*shapes)[shapesize*i+2] = mf.nComp();
         }
         return data;
     }
@@ -197,9 +201,9 @@ extern "C"
     }
 
     double** warpx_getEfield(int lev, int direction,
-                             int *return_size, int *ngrow, int **shapes) {
+                             int *return_size, int *ncomps, int *ngrow, int **shapes) {
         auto & mf = WarpX::GetInstance().getEfield(lev, direction);
-        return getMultiFabPointers(mf, return_size, ngrow, shapes);
+        return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
     }
 
     int* warpx_getEfieldLoVects(int lev, int direction,
@@ -209,9 +213,9 @@ extern "C"
     }
 
     double** warpx_getEfieldCP(int lev, int direction,
-                               int *return_size, int *ngrow, int **shapes) {
+                               int *return_size, int *ncomps, int *ngrow, int **shapes) {
         auto & mf = WarpX::GetInstance().getEfield_cp(lev, direction);
-        return getMultiFabPointers(mf, return_size, ngrow, shapes);
+        return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
     }
 
     int* warpx_getEfieldCPLoVects(int lev, int direction,
@@ -221,9 +225,9 @@ extern "C"
     }
 
     double** warpx_getEfieldFP(int lev, int direction,
-                               int *return_size, int *ngrow, int **shapes) {
+                               int *return_size, int *ncomps, int *ngrow, int **shapes) {
         auto & mf = WarpX::GetInstance().getEfield_fp(lev, direction);
-        return getMultiFabPointers(mf, return_size, ngrow, shapes);
+        return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
     }
 
     int* warpx_getEfieldFPLoVects(int lev, int direction,
@@ -233,9 +237,9 @@ extern "C"
     }
 
     double** warpx_getBfield(int lev, int direction,
-                             int *return_size, int *ngrow, int **shapes) {
+                             int *return_size, int *ncomps, int *ngrow, int **shapes) {
         auto & mf = WarpX::GetInstance().getBfield(lev, direction);
-        return getMultiFabPointers(mf, return_size, ngrow, shapes);
+        return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
     }
 
     int* warpx_getBfieldLoVects(int lev, int direction,
@@ -245,9 +249,9 @@ extern "C"
     }
 
     double** warpx_getBfieldCP(int lev, int direction,
-                               int *return_size, int *ngrow, int **shapes) {
+                               int *return_size, int *ncomps, int *ngrow, int **shapes) {
         auto & mf = WarpX::GetInstance().getBfield_cp(lev, direction);
-        return getMultiFabPointers(mf, return_size, ngrow, shapes);
+        return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
     }
 
     int* warpx_getBfieldCPLoVects(int lev, int direction,
@@ -257,9 +261,9 @@ extern "C"
     }
 
     double** warpx_getBfieldFP(int lev, int direction,
-                               int *return_size, int *ngrow, int **shapes) {
+                               int *return_size, int *ncomps, int *ngrow, int **shapes) {
         auto & mf = WarpX::GetInstance().getBfield_fp(lev, direction);
-        return getMultiFabPointers(mf, return_size, ngrow, shapes);
+        return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
     }
 
     int* warpx_getBfieldFPLoVects(int lev, int direction,
@@ -269,9 +273,9 @@ extern "C"
     }
 
     double** warpx_getCurrentDensity(int lev, int direction,
-                                     int *return_size, int *ngrow, int **shapes) {
+                                     int *return_size, int *ncomps, int *ngrow, int **shapes) {
         auto & mf = WarpX::GetInstance().getcurrent(lev, direction);
-        return getMultiFabPointers(mf, return_size, ngrow, shapes);
+        return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
     }
 
     int* warpx_getCurrentDensityLoVects(int lev, int direction,
@@ -281,9 +285,9 @@ extern "C"
     }
 
     double** warpx_getCurrentDensityCP(int lev, int direction,
-                                       int *return_size, int *ngrow, int **shapes) {
+                                       int *return_size, int *ncomps, int *ngrow, int **shapes) {
         auto & mf = WarpX::GetInstance().getcurrent_cp(lev, direction);
-        return getMultiFabPointers(mf, return_size, ngrow, shapes);
+        return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
     }
 
     int* warpx_getCurrentDensityCPLoVects(int lev, int direction,
@@ -293,9 +297,9 @@ extern "C"
     }
 
     double** warpx_getCurrentDensityFP(int lev, int direction,
-                                       int *return_size, int *ngrow, int **shapes) {
+                                       int *return_size, int *ncomps, int *ngrow, int **shapes) {
         auto & mf = WarpX::GetInstance().getcurrent_fp(lev, direction);
-        return getMultiFabPointers(mf, return_size, ngrow, shapes);
+        return getMultiFabPointers(mf, return_size, ncomps, ngrow, shapes);
     }
 
     int* warpx_getCurrentDensityFPLoVects(int lev, int direction,
diff --git a/Source/Python/WarpXWrappers.h b/Source/Python/WarpXWrappers.h
index 94fbb0d30..44e0ed4e1 100644
--- a/Source/Python/WarpXWrappers.h
+++ b/Source/Python/WarpXWrappers.h
@@ -62,19 +62,19 @@ extern "C" {
     long warpx_getNumParticles(int speciesnumber);
     
     double** warpx_getEfield(int lev, int direction, 
-                             int *return_size, int* ngrow, int **shapes);
+                             int *return_size, int* ncomps, int* ngrow, int **shapes);
     
     int* warpx_getEfieldLoVects(int lev, int direction, 
                                 int *return_size, int* ngrow);
     
     double** warpx_getBfield(int lev, int direction, 
-                             int *return_size, int* ngrow, int **shapes);
+                             int *return_size, int* ncomps, int* ngrow, int **shapes);
     
     int* warpx_getBfieldLoVects(int lev, int direction, 
                                 int *return_size, int* ngrow);
     
     double** warpx_getCurrentDensity(int lev, int direction, 
-                                     int *return_size, int* ngrow, int **shapes);
+                                     int *return_size, int* ncomps, int* ngrow, int **shapes);
     
     int* warpx_getCurrentDensityLoVects(int lev, int direction, 
                                         int *return_size, int* ngrow);
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 4ad3d119f..ca7596589 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -81,6 +81,7 @@ public:
     static amrex::Vector<amrex::Real> B_external;
 
     // Algorithms
+    static long use_picsar_deposition;
     static long current_deposition_algo;
     static long charge_deposition_algo;
     static long field_gathering_algo;
@@ -656,7 +657,8 @@ private:
 
     void EvolvePSATD (int numsteps);
     void PushPSATD (amrex::Real dt);
-    void PushPSATD (int lev, amrex::Real dt);
+    void PushPSATD_localFFT (int lev, amrex::Real dt);
+    void PushPSATD_hybridFFT (int lev, amrex::Real dt);
 
 #endif
 
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 877882037..08948227c 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -37,6 +37,7 @@ Vector<int> WarpX::boost_direction = {0,0,0};
 int WarpX::do_compute_max_step_from_zmax = 0;
 Real WarpX::zmax_plasma_to_compute_max_step = 0.;
 
+long WarpX::use_picsar_deposition = 1;
 long WarpX::current_deposition_algo;
 long WarpX::charge_deposition_algo;
 long WarpX::field_gathering_algo;
@@ -484,7 +485,17 @@ WarpX::ReadParameters ()
 
     {
         ParmParse pp("algo");
+        // If not in RZ mode, read use_picsar_deposition
+        // In RZ mode, use_picsar_deposition is on, as the C++ version 
+        // of the deposition does not support RZ
+#ifndef WARPX_RZ
+        pp.query("use_picsar_deposition", use_picsar_deposition);
+#endif
         current_deposition_algo = GetAlgorithmInteger(pp, "current_deposition");
+        if (!use_picsar_deposition){
+            AMREX_ALWAYS_ASSERT_WITH_MESSAGE( current_deposition_algo >= 2, 
+                "if not use_picsar_deposition, cannot use Esirkepov deposition.");
+        }
         charge_deposition_algo = GetAlgorithmInteger(pp, "charge_deposition");
         field_gathering_algo = GetAlgorithmInteger(pp, "field_gathering");
         particle_pusher_algo = GetAlgorithmInteger(pp, "particle_pusher");
@@ -556,8 +567,10 @@ WarpX::MakeNewLevelFromScratch (int lev, Real time, const BoxArray& new_grids,
     InitLevelData(lev, time);
 
 #ifdef WARPX_USE_PSATD
-    AllocLevelDataFFT(lev);
-    InitLevelDataFFT(lev, time);
+    if (fft_hybrid_mpi_decomposition){
+        AllocLevelDataFFT(lev);
+        InitLevelDataFFT(lev, time);
+    }
 #endif
 }
 
@@ -693,6 +706,37 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d
     // CKC solver requires one additional guard cell
     if (maxwell_fdtd_solver_id == 1) ngF = std::max( ngF, 1 );
 
+#ifdef WARPX_USE_PSATD
+    if (fft_hybrid_mpi_decomposition == false){
+        // All boxes should have the same number of guard cells
+        // (to avoid temporary parallel copies)
+        // Thus take the max of the required number of guards for each field
+        // Also: the number of guard cell should be enough to contain
+        // the stencil of the FFT solver. Here, this number (`ngFFT`)
+        // is determined *empirically* to be the order of the solver
+        // for nodal, and half the order of the solver for staggered.
+        IntVect ngFFT;
+        if (do_nodal) {
+            ngFFT = IntVect(AMREX_D_DECL(nox_fft, noy_fft, noz_fft));
+        } else {
+            ngFFT = IntVect(AMREX_D_DECL(nox_fft/2, noy_fft/2, noz_fft/2));
+        }
+        for (int i_dim=0; i_dim<AMREX_SPACEDIM; i_dim++ ){
+            int ng_required = ngFFT[i_dim];
+            // Get the max
+            ng_required = std::max( ng_required, ngE[i_dim] );
+            ng_required = std::max( ng_required, ngJ[i_dim] );
+            ng_required = std::max( ng_required, ngRho[i_dim] );
+            ng_required = std::max( ng_required, ngF );
+            // Set the guard cells to this max
+            ngE[i_dim] = ng_required;
+            ngJ[i_dim] = ng_required;
+            ngRho[i_dim] = ng_required;
+            ngF = ng_required;
+        }
+    }
+#endif
+
     AllocLevelMFs(lev, ba, dm, ngE, ngJ, ngRho, ngF);
 }
 
@@ -743,6 +787,21 @@ WarpX::AllocLevelMFs (int lev, const BoxArray& ba, const DistributionMapping& dm
         rho_fp[lev].reset(new MultiFab(amrex::convert(ba,IntVect::TheUnitVector()),dm,2,ngRho));
         rho_fp_owner_masks[lev] = std::move(rho_fp[lev]->OwnerMask(period));
     }
+    if (fft_hybrid_mpi_decomposition == false){
+        // Allocate and initialize the spectral solver
+        std::array<Real,3> dx = CellSize(lev);
+#if (AMREX_SPACEDIM == 3)
+        RealVect dx_vect(dx[0], dx[1], dx[2]);
+#elif (AMREX_SPACEDIM == 2)
+        RealVect dx_vect(dx[0], dx[2]);
+#endif
+        // Get the cell-centered box, with guard cells
+        BoxArray realspace_ba = ba;  // Copy box
+        realspace_ba.enclosedCells().grow(ngE); // cell-centered + guard cells
+        // Define spectral solver
+        spectral_solver_fp[lev].reset( new SpectralSolver( realspace_ba, dm,
+            nox_fft, noy_fft, noz_fft, do_nodal, dx_vect, dt[lev] ) );
+    }
 #endif
 
     //