From 9f99d4f819c7760d174c8888134496bdee5b03f0 Mon Sep 17 00:00:00 2001
From: Revathi Jambunathan <revanathan@login5.summit.olcf.ornl.gov>
Date: Mon, 6 May 2019 20:54:02 -0400
Subject: printoutstatement to debug the swap in rho_old and rho_new in the gpu
 vs cpu

---
 Source/Particles/WarpXParticleContainer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Source/Particles/WarpXParticleContainer.cpp')
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 2edd3c636..695faaa62 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -608,7 +608,7 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
       const std::array<Real, 3>& xyzmin = xyzmin_tile;
 
 #ifdef AMREX_USE_GPU
-      data_ptr = (*rhomf)[pti].dataPtr();
+      data_ptr = (*rhomf)[pti].dataPtr(icomp);
       auto rholen = (*rhomf)[pti].length();
 #else
       tile_box.grow(ngRho);
-- 
cgit v1.2.3


From eb76cadbc5e76111679f6189e6728e462af41f66 Mon Sep 17 00:00:00 2001
From: Revathi Jambunathan <revanathan@login5.summit.olcf.ornl.gov>
Date: Wed, 8 May 2019 16:21:18 -0400
Subject: temporary print files for debugging

---
 GNUmakefile                                        |  2 +-
 Source/Evolve/WarpXEvolveEM.cpp                    | 58 +++---------------
 .../SpectralSolver/SpectralFieldData.cpp           | 52 ++++++----------
 Source/FieldSolver/WarpXFFT.cpp                    | 70 +++++++---------------
 Source/Initialization/WarpXInitData.cpp            | 20 ++++---
 Source/Parallelization/WarpXComm.cpp               |  7 ++-
 Source/Particles/WarpXParticleContainer.cpp        |  2 +-
 Source/WarpX.cpp                                   |  2 +
 Source/main.cpp                                    |  3 +
 9 files changed, 72 insertions(+), 144 deletions(-)

(limited to 'Source/Particles/WarpXParticleContainer.cpp')

diff --git a/GNUmakefile b/GNUmakefile
index c662281b3..e3ed55d80 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -29,7 +29,7 @@ USE_ASCENT_INSITU = FALSE
 
 WarpxBinDir = Bin
 
-USE_PSATD = TRUE
+USE_PSATD = FALSE
 USE_RZ = FALSE
 
 DO_ELECTROSTATIC = FALSE
diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp
index 025ff993a..1c5dc0104 100644
--- a/Source/Evolve/WarpXEvolveEM.cpp
+++ b/Source/Evolve/WarpXEvolveEM.cpp
@@ -69,27 +69,20 @@ WarpX::EvolveEM (int numsteps)
         // At the beginning, we have B^{n} and E^{n}.
         // Particles have p^{n} and x^{n}.
         // is_synchronized is true.
+        amrex::Print() << " in evolve before fill boundary \n";
         if (is_synchronized) {
             FillBoundaryE();
             FillBoundaryB();
             UpdateAuxilaryData();
             // on first step, push p by -0.5*dt
+            amrex::Print() << " in evolve before pushP \n";
             for (int lev = 0; lev <= finest_level; ++lev) {
                 mypc->PushP(lev, -0.5*dt[lev],
                             *Efield_aux[lev][0],*Efield_aux[lev][1],*Efield_aux[lev][2],
                             *Bfield_aux[lev][0],*Bfield_aux[lev][1],*Bfield_aux[lev][2]);
             }
             is_synchronized = false;
-
-            for (MFIter mfi(*rho_fp[0]); mfi.isValid(); ++mfi)
-            {
-               MultiFab &mf = *rho_fp[0];
-               Box realspace_bx = mf[mfi].box(); // Copy the box
-               Array4<const Real> mf_arr = mf[mfi].array();
-               amrex::Print() << " at if synchronized rho " << mf_arr(0,0,0,0) ;
-               amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n";
-            }
-
+            amrex::Print() << " in evolve after pushP \n";
 
         } else {
            // Beyond one step, we have E^{n} and B^{n}.
@@ -98,15 +91,8 @@ WarpX::EvolveEM (int numsteps)
             FillBoundaryB();
             UpdateAuxilaryData();
 
-            for (MFIter mfi(*rho_fp[0]); mfi.isValid(); ++mfi)
-            {
-               MultiFab &mf = *rho_fp[0];
-               Box realspace_bx = mf[mfi].box(); // Copy the box
-               Array4<const Real> mf_arr = mf[mfi].array();
-               amrex::Print() << " at if synchronized rho " << mf_arr(0,0,0,0) ;
-               amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n";
-            }
         }
+        amrex::Print() << " in evolve after fill boundary \n";
 
         if (do_subcycling == 0 || finest_level == 0) {
             OneStep_nosub(cur_time);
@@ -116,6 +102,7 @@ WarpX::EvolveEM (int numsteps)
             amrex::Print() << "Error: do_subcycling = " << do_subcycling << std::endl;
             amrex::Abort("Unsupported do_subcycling type");
         }
+        amrex::Print() << " in evolve after onestep no sub  \n";
 
 #ifdef WARPX_USE_PY
         if (warpx_py_beforeEsolve) warpx_py_beforeEsolve();
@@ -279,52 +266,23 @@ WarpX::OneStep_nosub (Real cur_time)
     if (warpx_py_beforedeposition) warpx_py_beforedeposition();
 #endif
 
-    for (MFIter mfi(*rho_fp[0]); mfi.isValid(); ++mfi)
-    {
-       MultiFab &mf = *rho_fp[0];
-       Box realspace_bx = mf[mfi].box(); // Copy the box
-       Array4<const Real> mf_arr = mf[mfi].array();
-       amrex::Print() << " at before pushf particles and depose rho " << mf_arr(0,0,0,0) ;
-       amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n";
-    }
     PushParticlesandDepose(cur_time);
-    for (MFIter mfi(*rho_fp[0]); mfi.isValid(); ++mfi)
-    {
-       MultiFab &mf = *rho_fp[0];
-       Box realspace_bx = mf[mfi].box(); // Copy the box
-       Array4<const Real> mf_arr = mf[mfi].array();
-       amrex::Print() << " at after pushf particles and depose rho " << mf_arr(0,0,0,0) ;
-       amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n";
-    }
+
 #ifdef WARPX_USE_PY
     if (warpx_py_afterdeposition) warpx_py_afterdeposition();
 #endif
 
     SyncCurrent();
-    for (MFIter mfi(*rho_fp[0]); mfi.isValid(); ++mfi)
-    {
-       MultiFab &mf = *rho_fp[0];
-       Box realspace_bx = mf[mfi].box(); // Copy the box
-       Array4<const Real> mf_arr = mf[mfi].array();
-       amrex::Print() << " before sync rho " << mf_arr(0,0,0,0) ;
-       amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n";
-    }
 
     SyncRho(rho_fp, rho_cp);
 
-    for (MFIter mfi(*rho_fp[0]); mfi.isValid(); ++mfi)
-    {
-       MultiFab &mf = *rho_fp[0];
-       Box realspace_bx = mf[mfi].box(); // Copy the box
-       Array4<const Real> mf_arr = mf[mfi].array();
-       amrex::Print() << " after sync rho " << mf_arr(0,0,0,0) ;
-       amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n";
-    }
     // Push E and B from {n} to {n+1}
     // (And update guard cells immediately afterwards)
 #ifdef WARPX_USE_PSATD
     PushPSATD(dt[0]);
+    amrex::Print() << " before fill bndry E \n";
     FillBoundaryE();
+    amrex::Print() << " before fill bndry B \n";
     FillBoundaryB();
 #else
     EvolveF(0.5*dt[0], DtType::FirstHalf);
diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
index 7c2061f8d..5998bdd2b 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
@@ -55,15 +55,13 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba,
 #ifdef AMREX_USE_GPU
         // Add cuFFT-specific code
         // Creating 3D plan for real to complex -- double precision
+        cudaDeviceSynchronize();
         cufftResult result;
         result = cufftPlan3d( &forward_plan[mfi], fft_size[2], 
                               fft_size[1],fft_size[0], CUFFT_D2Z);
         if ( result != CUFFT_SUCCESS ) {
            amrex::Print() << " cufftplan3d forward failed! \n";
         }
-        if ( result == CUFFT_SUCCESS ) {
-           amrex::Print() << " created cufft forward plan\n";
-        }
         // Add 2D cuFFT-spacific code for D2Z
         // Note that D2Z is inherently forward plan 
 
@@ -73,9 +71,7 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba,
         if ( result != CUFFT_SUCCESS ) {
            amrex::Print() << " cufftplan3d backward failed! \n";
         }
-        if ( result == CUFFT_SUCCESS ) {
-           amrex::Print() << " created cufft backward plan\n";
-        }
+        cudaDeviceSynchronize();
 
 #else
         // Create FFTW plans
@@ -156,15 +152,9 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf,
             [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
                 tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp);                
             });
-#ifdef AMREX_USE_GPU
-            cudaDeviceSynchronize();
-#endif
-            amrex::Print() << " in forward trans icomp " << i_comp << " "  << tmp_arr(0,0,0) << " mf arr " ;
-            amrex::Print() << " " << mf_arr(0,0,0,0);
-            amrex::Print() << " " << mf_arr(15,15,15,0);
-            amrex::Print() << " " << mf_arr(0,0,0,1);
-            amrex::Print() << " " << mf_arr(15,15,15,1);
-            amrex::Print() << "\n";
+//#ifdef AMREX_USE_GPU
+//            cudaDeviceSynchronize();
+//#endif
         }
 
         // Perform Fourier transform from `tmpRealField` to `tmpSpectralField`
@@ -173,8 +163,9 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf,
         // GPU stream as the above copy
         cudaDeviceSynchronize();
         cufftResult result;
-        //cudaStream_t stream = amrex::Cuda::Device::cudaStream(); 
-        //cufftSetStream ( forward_plan[mfi], stream);
+        cudaStream_t stream = amrex::Cuda::Device::cudaStream(); 
+        amrex::Print() << " stream is " << stream << "\n";
+        cufftSetStream ( forward_plan[mfi], stream);
         result = cufftExecD2Z( forward_plan[mfi], 
                                tmpRealField[mfi].dataPtr(), 
                                reinterpret_cast<cuDoubleComplex*>(
@@ -182,13 +173,9 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf,
         if ( result != CUFFT_SUCCESS ) {
            amrex::Print() << " cufftplan3d execute failed ! \n";
         }
-        if ( result == CUFFT_SUCCESS ) {
-           amrex::Print() << " created cufft forward transform\n";
-        }
         cudaDeviceSynchronize();
 #else
         fftw_execute( forward_plan[mfi] );
-        amrex::Print() << " forward fft on cpu\n";
 #endif
 
         // Copy the spectral-space field `tmpSpectralField` to the appropriate
@@ -219,8 +206,6 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf,
                 // Copy field into the right index
                 fields_arr(i,j,k,field_index) = spectral_field_value;
             });
-//            amrex::Print() << " in forward trans after D2Z" << fields_arr(0,0,0,0) ;
-            amrex::Print() << "\n";
         }
     }
 }
@@ -279,10 +264,11 @@ SpectralFieldData::BackwardTransform( MultiFab& mf,
 #ifdef AMREX_USE_GPU
         // Add cuFFT-specific code ; make sure that this is done on the same
         // GPU stream as the above copy
-        cudaDeviceSynchronize(); 
+        cudaDeviceSynchronize();
         cufftResult result;
-        //cudaStream_t stream = amrex::Cuda::Device::cudaStream(); 
-        //cufftSetStream ( backward_plan[mfi], stream);
+        cudaStream_t stream = amrex::Cuda::Device::cudaStream(); 
+        amrex::Print() << " stream is " << stream << "\n";
+        cufftSetStream ( backward_plan[mfi], stream);
         result = cufftExecZ2D( backward_plan[mfi], 
                                reinterpret_cast<cuDoubleComplex*>(
                                tmpSpectralField[mfi].dataPtr()),
@@ -293,10 +279,9 @@ SpectralFieldData::BackwardTransform( MultiFab& mf,
         if ( result == CUFFT_SUCCESS ) {
            amrex::Print() << " created cufft inverse transform\n";
         }
-        cudaDeviceSynchronize(); 
+        cudaDeviceSynchronize();
 #else
         fftw_execute( backward_plan[mfi] );
-        amrex::Print() << " cpu inverse done\n";
 #endif
 
         // Copy the temporary field `tmpRealField` to the real-space field `mf`
@@ -313,15 +298,14 @@ SpectralFieldData::BackwardTransform( MultiFab& mf,
                 // Copy and normalize field
                 mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k);
             });
+//#ifdef AMREX_USE_GPU
+//            cudaDeviceSynchronize();
+//#endif
+
 #ifdef AMREX_USE_GPU
             cudaDeviceSynchronize();
 #endif
-            amrex::Print() << " after backward plan in real space 0,0,0 " << mf_arr(0,0,0,0) << " tmp " << tmp_arr(0,0,0) << "\n";
-            amrex::Print() << " after backward plan in real space 15, 15, 15 " << mf_arr(15,15,15,0) << " tmp " << tmp_arr(0,0,0) << "\n";
-            amrex::Print() << "\n";
-#ifdef AMREX_USE_GPU
-            cudaDeviceSynchronize();
-#endif
+            amrex::Print() << " divided by 1/N \n";
         }
     }
 }
diff --git a/Source/FieldSolver/WarpXFFT.cpp b/Source/FieldSolver/WarpXFFT.cpp
index ffeb237c0..c4e0461f9 100644
--- a/Source/FieldSolver/WarpXFFT.cpp
+++ b/Source/FieldSolver/WarpXFFT.cpp
@@ -405,20 +405,6 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */)
     rho_fp_fft[lev]->ParallelCopy(*rho_fp[lev], 0, 0, 2, 0, 0, period_fp);
     BL_PROFILE_VAR_STOP(blp_copy);
 
-
-    for (MFIter mfi(*rho_fp_fft[lev]); mfi.isValid(); ++mfi)
-    {
-       MultiFab &mf = *rho_fp_fft[lev];
-       Box realspace_bx = mf[mfi].box(); // Copy the box
-       Array4<const Real> mf_arr = mf[mfi].array();
-       amrex::Print() << " at begin Push PSATD rho " << mf_arr(0,0,0,0) ;
-       amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n";
-       MultiFab &mf_orig = *rho_fp[lev];
-       Array4<const Real> mforig_arr = mf_orig[mfi].array();
-       amrex::Print() << " at begin Push PSATD rho " << mforig_arr(0,0,0,0) ;
-       amrex::Print() << " new rho " << mforig_arr(0,0,0,1) << "\n";
-    }
-
     BL_PROFILE_VAR_START(blp_push_eb);
     if (fft_hybrid_mpi_decomposition){
         if (Efield_fp_fft[lev][0]->local_size() == 1)
@@ -465,44 +451,29 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */)
         // Not using the hybrid decomposition
         auto& solver = *spectral_solver_fp[lev];
 
-        //// Perform forward Fourier transform
-        //amrex::Print() << " FTT of Ex \n";
-        //solver.ForwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex);
-        //amrex::Print() << " FTT of Ey \n";
-        //solver.ForwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey);
-        //amrex::Print() << " FTT of Ez \n";
-        //solver.ForwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez);
-        //amrex::Print() << " FTT of Bx \n";
-        //solver.ForwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx);
-        //amrex::Print() << " FTT of By \n";
-        //solver.ForwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By);
-        //amrex::Print() << " FTT of Bz \n";
-        //solver.ForwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz);
-        //amrex::Print() << " FTT of Jx \n";
-        //solver.ForwardTransform(*current_fp_fft[lev][0], SpectralFieldIndex::Jx);
-        //amrex::Print() << " FTT of Jy \n";
-        //solver.ForwardTransform(*current_fp_fft[lev][1], SpectralFieldIndex::Jy);
-        //amrex::Print() << " FTT of Jz \n";
-        //solver.ForwardTransform(*current_fp_fft[lev][2], SpectralFieldIndex::Jz);
+        // Perform forward Fourier transform
+        solver.ForwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex);
+        solver.ForwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey);
+        solver.ForwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez);
+        solver.ForwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx);
+        solver.ForwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By);
+        solver.ForwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz);
+        solver.ForwardTransform(*current_fp_fft[lev][0], SpectralFieldIndex::Jx);
+        solver.ForwardTransform(*current_fp_fft[lev][1], SpectralFieldIndex::Jy);
+        solver.ForwardTransform(*current_fp_fft[lev][2], SpectralFieldIndex::Jz);
         solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_old, 0);
         solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_new, 1);
 
-        ////// Advance fields in spectral space
-        //solver.pushSpectralFields();
-
-        ////// Perform backward Fourier Transform
-        //amrex::Print() << " BT of Ex \n";
-        //solver.BackwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex);
-        //amrex::Print() << " BT of Ey \n";
-        //solver.BackwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey);
-        //amrex::Print() << " BT of Ez \n";
-        //solver.BackwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez);
-        //amrex::Print() << " BT of Bx \n";
-        //solver.BackwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx);
-        //amrex::Print() << " BT of By \n";
-        //solver.BackwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By);
-        //amrex::Print() << " BT of Bz \n";
-        //solver.BackwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz);
+        // Advance fields in spectral space
+        solver.pushSpectralFields();
+
+        // Perform backward Fourier Transform
+        solver.BackwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex);
+        solver.BackwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey);
+        solver.BackwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez);
+        solver.BackwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx);
+        solver.BackwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By);
+        solver.BackwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz);
     }
     BL_PROFILE_VAR_STOP(blp_push_eb);
 
@@ -519,4 +490,5 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */)
     {
         amrex::Abort("WarpX::PushPSATD: TODO");
     }
+    amrex::Print() << " coped data from fft to valid \n";
 }
diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp
index 589ee168c..b9f27f07e 100644
--- a/Source/Initialization/WarpXInitData.cpp
+++ b/Source/Initialization/WarpXInitData.cpp
@@ -318,16 +318,28 @@ WarpX::InitLevelData (int lev, Real time)
 void
 WarpX::InitLevelDataFFT (int lev, Real time)
 {
+ 
+    amrex::Print() << " print out pointer  " << &Efield_fp_fft[lev][0] << "\n";
     Efield_fp_fft[lev][0]->setVal(0.0);
+    amrex::Print() << " ex set \n";
     Efield_fp_fft[lev][1]->setVal(0.0);
+    amrex::Print() << " ey set \n";
     Efield_fp_fft[lev][2]->setVal(0.0);
+    amrex::Print() << " ez set \n";
     Bfield_fp_fft[lev][0]->setVal(0.0);
+    amrex::Print() << " bx set \n";
     Bfield_fp_fft[lev][1]->setVal(0.0);
+    amrex::Print() << " by set \n";
     Bfield_fp_fft[lev][2]->setVal(0.0);
+    amrex::Print() << " bz set \n";
     current_fp_fft[lev][0]->setVal(0.0);
+    amrex::Print() << " jx set \n";
     current_fp_fft[lev][1]->setVal(0.0);
+    amrex::Print() << " jy set \n";
     current_fp_fft[lev][2]->setVal(0.0);
+    amrex::Print() << " jz set \n";
     rho_fp_fft[lev]->setVal(0.0);
+    amrex::Print() << " rhofp set \n";
 
     if (lev > 0)
     {
@@ -343,14 +355,6 @@ WarpX::InitLevelDataFFT (int lev, Real time)
         rho_cp_fft[lev]->setVal(0.0);
     }
 
-    for (MFIter mfi(*rho_fp_fft[lev]); mfi.isValid(); ++mfi)
-    {
-       MultiFab &mf = *rho_fp_fft[lev];
-       Box realspace_bx = mf[mfi].box(); // Copy the box
-       Array4<const Real> mf_arr = mf[mfi].array();
-       amrex::Print() << " at initialization rho " << mf_arr(0,0,0,0) ;
-       amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n";
-    }
 }
 
 #endif
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index 28971eb0c..23d97133b 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -221,7 +221,9 @@ WarpX::FillBoundaryE ()
 {
     for (int lev = 0; lev <= finest_level; ++lev)
     {
+        amrex::Print() << " lev " << lev << " in fill bndry E \n";
         FillBoundaryE(lev);
+        amrex::Print() << " lev " << lev << " after fill bndry E \n";
     }
 }
 
@@ -245,7 +247,7 @@ void
 WarpX::FillBoundaryE (int lev, PatchType patch_type)
 {
     if (patch_type == PatchType::fine)
-    {
+    {        
         if (do_pml && pml[lev]->ok())
         {
     	    pml[lev]->ExchangeE(patch_type,
@@ -270,9 +272,12 @@ WarpX::FillBoundaryE (int lev, PatchType patch_type)
         pml[lev]->FillBoundaryE(patch_type);
         }
 
+        amrex::Print() << " before defining multifab \n";
         const auto& cperiod = Geom(lev-1).periodicity();
         Vector<MultiFab*> mf{Efield_cp[lev][0].get(),Efield_cp[lev][1].get(),Efield_cp[lev][2].get()};
+        amrex::Print() << " before amrex fill bndry \n";
         amrex::FillBoundary(mf, cperiod);
+        amrex::Print() << " after amrex fill bndry \n";
     }
 }
 
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 695faaa62..94b58ca07 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -660,7 +660,7 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
       const std::array<Real,3>& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1);
 
 #ifdef AMREX_USE_GPU
-      data_ptr = (*crhomf)[pti].dataPtr();
+      data_ptr = (*crhomf)[pti].dataPtr(icomp);
       auto rholen = (*crhomf)[pti].length();
 #else
       tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector());
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 532858556..5814d6829 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -558,7 +558,9 @@ WarpX::MakeNewLevelFromScratch (int lev, Real time, const BoxArray& new_grids,
     InitLevelData(lev, time);
 
 #ifdef WARPX_USE_PSATD
+    amrex::Print() << " alloc level data fft \n";
     AllocLevelDataFFT(lev);
+    amrex::Print() << " init level data fft \n";
     InitLevelDataFFT(lev, time);
 #endif
 }
diff --git a/Source/main.cpp b/Source/main.cpp
index d89e89e47..2e1a5e9cf 100644
--- a/Source/main.cpp
+++ b/Source/main.cpp
@@ -30,10 +30,13 @@ int main(int argc, char* argv[])
     const Real strt_total = amrex::second();
 
     {
+        amrex::Print() << " about to construct warpx \n";
 	WarpX warpx;
 	
+        amrex::Print() << " call warpx init data \n";
 	warpx.InitData();
 
+        amrex::Print() << " call warpx evolve  \n";
 	warpx.Evolve();
 	
 	Real end_total = amrex::second() - strt_total;
-- 
cgit v1.2.3


From 002cdf8fd7fb445055956f8de7e6b7bf678ab3ce Mon Sep 17 00:00:00 2001
From: Revathi Jambunathan <revanathan@login2.summit.olcf.ornl.gov>
Date: Tue, 14 May 2019 18:55:53 -0400
Subject: changes made after merging with lastest dev version

---
 Docs/source/building/building.rst                  |  2 +-
 Docs/source/running_cpp/parameters.rst             | 56 ++++++++++--
 Examples/Tests/Langmuir/inputs.multi.rt            |  3 +-
 Python/pywarpx/picmi.py                            |  6 +-
 Source/Diagnostics/BoostedFrameDiagnostic.H        |  9 ++
 Source/Diagnostics/BoostedFrameDiagnostic.cpp      | 72 ++++++++++++----
 Source/Diagnostics/FieldIO.cpp                     | 29 ++++---
 Source/Diagnostics/ParticleIO.cpp                  | 12 ++-
 Source/Diagnostics/WarpXIO.cpp                     |  4 +-
 Source/Evolve/WarpXEvolveEM.cpp                    | 48 +++++++++++
 Source/FortranInterface/WarpX_f.H                  |  6 --
 Source/FortranInterface/WarpX_picsar.F90           | 32 -------
 Source/Laser/LaserParticleContainer.cpp            |  5 +-
 Source/Particles/MultiParticleContainer.H          | 12 ++-
 Source/Particles/MultiParticleContainer.cpp        | 49 ++++++++---
 Source/Particles/PhysicalParticleContainer.cpp     | 45 ++++++++--
 .../Particles/RigidInjectedParticleContainer.cpp   |  2 +-
 Source/Particles/WarpXParticleContainer.H          | 18 +++-
 Source/Particles/WarpXParticleContainer.cpp        | 99 ++++++++++------------
 Source/Utils/WarpXUtil.cpp                         |  3 +-
 Source/WarpX.H                                     | 10 ++-
 Source/WarpX.cpp                                   | 49 +++--------
 22 files changed, 373 insertions(+), 198 deletions(-)

(limited to 'Source/Particles/WarpXParticleContainer.cpp')

diff --git a/Docs/source/building/building.rst b/Docs/source/building/building.rst
index 01787545b..a9ea4e755 100644
--- a/Docs/source/building/building.rst
+++ b/Docs/source/building/building.rst
@@ -19,7 +19,7 @@ single directory (e.g. ``warpx_directory``):
     cd warpx_directory
     git clone --branch dev https://github.com/ECP-WarpX/WarpX.git
     git clone --branch master https://bitbucket.org/berkeleylab/picsar.git
-    git clone --branch developement https://github.com/AMReX-Codes/amrex.git
+    git clone --branch development https://github.com/AMReX-Codes/amrex.git
 
 Basic compilation
 -----------------
diff --git a/Docs/source/running_cpp/parameters.rst b/Docs/source/running_cpp/parameters.rst
index 076e39ce6..b22cf745c 100644
--- a/Docs/source/running_cpp/parameters.rst
+++ b/Docs/source/running_cpp/parameters.rst
@@ -28,6 +28,15 @@ Overall simulation parameters
     The direction of the Lorentz-transform for boosted-frame simulations
     (The direction ``y`` cannot be used in 2D simulations.)
 
+* ``warpx.zmax_plasma_to_compute_max_step`` (`float`) optional
+    Can be useful when running in a boosted frame. If specified, automatically 
+    calculates the number of iterations required in the boosted frame for the 
+    lower `z` end of the simulation domain to reach 
+    ``warpx.zmax_plasma_to_compute_max_step`` (typically the plasma end, 
+    given in the lab frame). The value of ``max_step`` is overwritten, and 
+    printed to standard output. Currently only works if the Lorentz boost and 
+    the moving window are along the z direction.
+
 * ``warpx.verbose`` (`0` or `1`)
     Controls how much information is printed to the terminal, when running WarpX.
 
@@ -283,6 +292,25 @@ Particle initialization
     axes (4 particles in 2D, 6 particles in 3D). When `1`, particles are split
     along the diagonals (4 particles in 2D, 8 particles in 3D).
 
+* ``<species>.plot_species`` (`0` or `1` optional; default `1`)
+    Whether to plot particle quantities for this species.
+
+* ``<species>.plot_vars`` (list of `strings` separated by spaces, optional) 
+    List of particle quantities to write to `plotfiles`. By defaults, all 
+    quantities are written to file. Choices are 
+    * ``w`` for the particle weight,
+    * ``ux`` ``uy`` ``uz`` for the particle momentum, 
+    * ``Ex`` ``Ey`` ``Ez`` for the electric field on particles,
+    * ``Bx`` ``By`` ``Bz`` for the magnetic field on particles.
+    The particle positions are always included. Use 
+    ``<species>.plot_vars = none`` to plot no particle data, except 
+    particle position.
+
+* ``<species>.do_boosted_frame_diags`` (`0` or `1` optional, default `1`)
+    Only used when ``warpx.do_boosted_frame_diagnostic=1``. When running in a 
+    boosted frame, whether or not to plot back-transformed diagnostics for 
+    this species.
+
 * ``warpx.serialize_ics`` (`0 or 1`)
     Whether or not to use OpenMP threading for particle initialization.
 
@@ -547,6 +575,20 @@ Numerics and algorithms
     same points in space) or a staggered grid (i.e. Yee grid ; different
     fields are defined at different points in space)
 
+* ``warpx.do_subcycling`` (`0` or `1`; default: 0)
+    Whether or not to use sub-cycling. Different refinement levels have a 
+    different cell size, which results in different Courant–Friedrichs–Lewy 
+    (CFL) limits for the time step. By default, when using mesh refinement, 
+    the same time step is used for all levels. This time step is 
+    taken as the CFL limit of the finest level. Hence, for coarser 
+    levels, the timestep is only a fraction of the CFL limit for this 
+    level, which may lead to numerical artifacts. With sub-cycling, each level 
+    evolves with its own time step, set to its own CFL limit. In practice, it 
+    means that when level 0 performs one iteration, level 1 performs two 
+    iterations. Currently, this option is only supported when 
+    ``amr.max_level = 1``. More information can be found at 
+    https://ieeexplore.ieee.org/document/8659392.
+
 * ``psatd.nox``, ``psatd.noy``, ``pstad.noz`` (`integer`) optional (default `16` for all)
     The order of accuracy of the spatial derivatives, when using the code compiled with a PSATD solver.
 
@@ -638,14 +680,18 @@ Diagnostics and output
     (This is done by averaging the field.) ``plot_coarsening_ratio`` should
     be an integer divisor of ``blocking_factor``.
 
-* ``warpx.particle_plot_vars`` (`strings`, separated by spaces ; default: all)
-    Control which particle variables get written to the plot file. Choices are:
-    `w`, `ux`, `uy`, `uz`, `Ex`, `Ey`, `Ez`, `Bx`, `By`, and `Bz`.
-    The particle positions and ids are always included.
-
 * ``amr.plot_file`` (`string`)
     Root for output file names. Supports sub-directories. Default `diags/plotfiles/plt`
 
+* ``warpx.plot_J_field`` (`0` or `1` optional; default `1`)
+    Whether to plot the current density.
+
+* ``warpx.plot_E_field`` (`0` or `1` optional; default `1`)
+    Whether to plot the electric field.
+
+* ``warpx.plot_B_field`` (`0` or `1` optional; default `1`)
+    Whether to plot the magnetic field.
+
 Checkpoints and restart
 -----------------------
 WarpX supports checkpoints/restart via AMReX.
diff --git a/Examples/Tests/Langmuir/inputs.multi.rt b/Examples/Tests/Langmuir/inputs.multi.rt
index 334d4ae94..c9969d39a 100644
--- a/Examples/Tests/Langmuir/inputs.multi.rt
+++ b/Examples/Tests/Langmuir/inputs.multi.rt
@@ -13,7 +13,6 @@ amr.max_level = 0
 
 amr.plot_int = 20   # How often to write plotfiles.  "<= 0" means no plotfiles.
 warpx.plot_rho = 1
-warpx.particle_plot_vars = w ux Bz Ey
 
 # Geometry
 geometry.coord_sys   = 0                  # 0: Cartesian
@@ -61,6 +60,7 @@ electrons.ymin = -20.e-6
 electrons.ymax = 20.e-6
 electrons.zmin = -20.e-6
 electrons.zmax = 20.e-6
+electrons.plot_vars = w ux Bz Ey
 
 electrons.profile = constant
 electrons.density = 2.e24   # number of electrons per m^3
@@ -79,6 +79,7 @@ positrons.ymin = -20.e-6
 positrons.ymax = 20.e-6
 positrons.zmin = -20.e-6
 positrons.zmax = 20.e-6
+positrons.plot_vars = w ux Bz Ey
 
 positrons.profile = constant
 positrons.density = 2.e24   # number of positrons per m^3
diff --git a/Python/pywarpx/picmi.py b/Python/pywarpx/picmi.py
index a439205a7..f242f8589 100644
--- a/Python/pywarpx/picmi.py
+++ b/Python/pywarpx/picmi.py
@@ -113,9 +113,6 @@ class GaussianBunchDistribution(picmistandard.PICMI_GaussianBunchDistribution):
             species.uy = self.centroid_velocity[1]
             species.uz = self.centroid_velocity[2]
 
-        if self.fill_in:
-            species.do_continuous_injection = 1
-
 
 class UniformDistribution(picmistandard.PICMI_UniformDistribution):
     def initialize_inputs(self, species_number, layout, species):
@@ -156,6 +153,9 @@ class UniformDistribution(picmistandard.PICMI_UniformDistribution):
             species.uy = self.directed_velocity[1]
             species.uz = self.directed_velocity[2]
 
+        if self.fill_in:
+            species.do_continuous_injection = 1
+
 
 class AnalyticDistribution(picmistandard.PICMI_AnalyticDistribution):
     def initialize_inputs(self, species_number, layout, species):
diff --git a/Source/Diagnostics/BoostedFrameDiagnostic.H b/Source/Diagnostics/BoostedFrameDiagnostic.H
index e35d307a6..5b447689a 100644
--- a/Source/Diagnostics/BoostedFrameDiagnostic.H
+++ b/Source/Diagnostics/BoostedFrameDiagnostic.H
@@ -74,10 +74,19 @@ class BoostedFrameDiagnostic {
     unsigned Nz_lab_;   
     int boost_direction_;
 
+    // For back-transformed diagnostics of grid fields, data_buffer_[i]
+    // stores a buffer of the fields in the lab frame (in a MultiFab, i.e.
+    // with all box data etc.). When the buffer if full, dump to file.
     amrex::Vector<std::unique_ptr<amrex::MultiFab> > data_buffer_;
+    // particles_buffer_ is currently blind to refinement level.
+    // particles_buffer_[i][j] is a WarpXParticleContainer::DiagnosticParticleData where
+    // - i is the back-transformed snapshot number 
+    // - j is the species number
     amrex::Vector<amrex::Vector<WarpXParticleContainer::DiagnosticParticleData> > particles_buffer_;
     int num_buffer_ = 256;
     int max_box_size_ = 256;
+    // buff_counter_[i] is the number of z slices in data_buffer_[i] 
+    // for snapshot number i.
     amrex::Vector<int> buff_counter_;
 
     amrex::Vector<LabSnapShot> snapshots_;
diff --git a/Source/Diagnostics/BoostedFrameDiagnostic.cpp b/Source/Diagnostics/BoostedFrameDiagnostic.cpp
index 13972075d..11789579c 100644
--- a/Source/Diagnostics/BoostedFrameDiagnostic.cpp
+++ b/Source/Diagnostics/BoostedFrameDiagnostic.cpp
@@ -504,6 +504,7 @@ void BoostedFrameDiagnostic::Flush(const Geometry& geom)
     auto & mypc = WarpX::GetInstance().GetPartContainer();
     const std::vector<std::string> species_names = mypc.GetSpeciesNames();
     
+    // Loop over BFD snapshots
     for (int i = 0; i < N_snapshots_; ++i) {
 
         int i_lab = (snapshots_[i].current_z_lab - snapshots_[i].zmin_lab) / dz_lab_;
@@ -539,14 +540,20 @@ void BoostedFrameDiagnostic::Flush(const Geometry& geom)
             }
             
             if (WarpX::do_boosted_frame_particles) {
-                for (int j = 0; j < mypc.nSpecies(); ++j) {
+                // Loop over species to be dumped to BFD
+                for (int j = 0; j < mypc.nSpeciesBoostedFrameDiags(); ++j) {
+                    // Get species name
+                    std::string species_name = 
+                        species_names[mypc.mapSpeciesBoostedFrameDiags(j)];
 #ifdef WARPX_USE_HDF5
+                    // Dump species data
                     writeParticleDataHDF5(particles_buffer_[i][j],
                                           snapshots_[i].file_name,
-                                          species_names[j]);
+                                          species_name);
 #else
                     std::stringstream part_ss;
-                    part_ss << snapshots_[i].file_name + "/" + species_names[j] + "/";
+                    part_ss << snapshots_[i].file_name + "/" + species_name + "/";
+                    // Dump species data
                     writeParticleData(particles_buffer_[i][j], part_ss.str(), i_lab);
 #endif
                 }
@@ -575,21 +582,30 @@ writeLabFrameData(const MultiFab* cell_centered_data,
     const Real zhi_boost = domain_z_boost.hi(boost_direction_);
 
     const std::vector<std::string> species_names = mypc.GetSpeciesNames();
-    
+
+    // Loop over snapshots
     for (int i = 0; i < N_snapshots_; ++i) {
+    
+        // Get updated z position of snapshot
         const Real old_z_boost = snapshots_[i].current_z_boost;
         snapshots_[i].updateCurrentZPositions(t_boost,
                                               inv_gamma_boost_,
                                               inv_beta_boost_);
         
+        // If snapshot out of the domain, nothing to do
         if ( (snapshots_[i].current_z_boost < zlo_boost) or
              (snapshots_[i].current_z_boost > zhi_boost) or
              (snapshots_[i].current_z_lab < snapshots_[i].zmin_lab) or
              (snapshots_[i].current_z_lab > snapshots_[i].zmax_lab) ) continue;
 
+        // Get z index of data_buffer_ (i.e. in the lab frame) where 
+        // simulation domain (t', [zmin',zmax']), back-transformed to lab 
+        // frame, intersects with snapshot.
         int i_lab = (snapshots_[i].current_z_lab - snapshots_[i].zmin_lab) / dz_lab_;
 
+        // If buffer of snapshot i is empty...
         if (buff_counter_[i] == 0) {
+            // ... reset fields buffer data_buffer_[i]
             if (WarpX::do_boosted_frame_fields) {
                 const int ncomp = cell_centered_data->nComp();
                 Box buff_box = geom.Domain();
@@ -600,13 +616,16 @@ writeLabFrameData(const MultiFab* cell_centered_data,
                 DistributionMapping buff_dm(buff_ba);
                 data_buffer_[i].reset( new MultiFab(buff_ba, buff_dm, ncomp, 0) );
             }
-            if (WarpX::do_boosted_frame_particles) particles_buffer_[i].resize(mypc.nSpecies());
+            // ... reset particle buffer particles_buffer_[i]
+            if (WarpX::do_boosted_frame_particles) 
+                particles_buffer_[i].resize(mypc.nSpeciesBoostedFrameDiags());
         }
 
         if (WarpX::do_boosted_frame_fields) {
             const int ncomp = cell_centered_data->nComp();
             const int start_comp = 0;
             const bool interpolate = true;
+            // Get slice in the boosted frame
             std::unique_ptr<MultiFab> slice = amrex::get_slice_data(boost_direction_,
                                                                     snapshots_[i].current_z_boost,
                                                                     *cell_centered_data, geom,
@@ -620,19 +639,23 @@ writeLabFrameData(const MultiFab* cell_centered_data,
                                          &gamma_boost_, &beta_boost_);
             }
             
+            // Create a 2D box for the slice in the boosted frame
             Real dx = geom.CellSize(boost_direction_);
             int i_boost = (snapshots_[i].current_z_boost - geom.ProbLo(boost_direction_))/dx;
             Box slice_box = geom.Domain();
             slice_box.setSmall(boost_direction_, i_boost);
             slice_box.setBig(boost_direction_, i_boost);
+            // Make it a BoxArray slice_ba
             BoxArray slice_ba(slice_box);
             slice_ba.maxSize(max_box_size_);
+            // Create MultiFab tmp on slice_ba with data from slice
             MultiFab tmp(slice_ba, data_buffer_[i]->DistributionMap(), ncomp, 0);
             tmp.copy(*slice, 0, 0, ncomp);
             
 #ifdef _OPENMP
 #pragma omp parallel
 #endif
+            // Copy data from MultiFab tmp to MultiDab data_buffer[i]
             for (MFIter mfi(tmp, true); mfi.isValid(); ++mfi) {
                 const Box& tile_box  = mfi.tilebox();
                 WRPX_COPY_SLICE(BL_TO_FORTRAN_BOX(tile_box),
@@ -641,16 +664,18 @@ writeLabFrameData(const MultiFab* cell_centered_data,
                                 &ncomp, &i_boost, &i_lab);
             }
         }
-        
+
         if (WarpX::do_boosted_frame_particles) {
             mypc.GetLabFrameData(snapshots_[i].file_name, i_lab, boost_direction_,
                                  old_z_boost, snapshots_[i].current_z_boost,
                                  t_boost, snapshots_[i].t_lab, dt, particles_buffer_[i]);
+
         }
 
 
         ++buff_counter_[i];
         
+        // If buffer full, write to disk.
         if (buff_counter_[i] == num_buffer_) {
 
             if (WarpX::do_boosted_frame_fields) {
@@ -666,14 +691,21 @@ writeLabFrameData(const MultiFab* cell_centered_data,
             }
             
             if (WarpX::do_boosted_frame_particles) {
-                for (int j = 0; j < mypc.nSpecies(); ++j) {
+                // Loop over species to be dumped to BFD
+                for (int j = 0; j < mypc.nSpeciesBoostedFrameDiags(); ++j) {        
+                    // Get species name
+                    const std::string species_name = species_names[mypc.mapSpeciesBoostedFrameDiags(j)];
 #ifdef WARPX_USE_HDF5
+                    // Write data to disk (HDF5)
                     writeParticleDataHDF5(particles_buffer_[i][j],
                                           snapshots_[i].file_name,
-                                          species_names[j]);
+                                          species_name);
 #else
                     std::stringstream part_ss;
-                    part_ss << snapshots_[i].file_name + "/" + species_names[j] + "/";
+
+                    part_ss << snapshots_[i].file_name + "/" + species_name + "/";
+
+                    // Write data to disk (custom)
                     writeParticleData(particles_buffer_[i][j], part_ss.str(), i_lab);
 #endif
                 }            
@@ -855,16 +887,19 @@ LabSnapShot(Real t_lab_in, Real t_boost, Real zmin_lab_in,
 
     ParallelDescriptor::Barrier();
     
-    if (WarpX::do_boosted_frame_particles)
-    {
+    if (WarpX::do_boosted_frame_particles){
         auto & mypc = WarpX::GetInstance().GetPartContainer();
         const std::vector<std::string> species_names = mypc.GetSpeciesNames();
-        for (int j = 0; j < mypc.nSpecies(); ++j)
+        // Loop over species to be dumped to BFD
+        for (int j = 0; j < mypc.nSpeciesBoostedFrameDiags(); ++j)
         {
-            output_create_species_group(file_name, species_names[j]);
+            // Loop over species to be dumped to BFD
+            std::string species_name = 
+                species_names[mypc.mapSpeciesBoostedFrameDiags(j)];
+            output_create_species_group(file_name, species_name);
             for (int k = 0; k < static_cast<int>(particle_field_names.size()); ++k)
             {
-                std::string field_path = species_names[j] + "/" + particle_field_names[k];
+                std::string field_path = species_name + "/" + particle_field_names[k];
                 output_create_particle_field(file_name, field_path);
             }
         }
@@ -884,11 +919,14 @@ LabSnapShot(Real t_lab_in, Real t_boost, Real zmin_lab_in,
 
         auto & mypc = WarpX::GetInstance().GetPartContainer();
         const std::vector<std::string> species_names = mypc.GetSpeciesNames();        
-        int nspecies = mypc.nSpecies();
         
         const std::string particles_prefix = "particle";
-        for(int i = 0; i < nspecies; ++i) {
-            const std::string fullpath = file_name + "/" + species_names[i];
+        // Loop over species to be dumped to BFD
+        for(int i = 0; i < mypc.nSpeciesBoostedFrameDiags(); ++i) {
+            // Get species name
+            std::string species_name = 
+                species_names[mypc.mapSpeciesBoostedFrameDiags(i)];
+            const std::string fullpath = file_name + "/" + species_name;
             if (!UtilCreateDirectory(fullpath, 0755))
                 CreateDirectoryFailed(fullpath);
         }
diff --git a/Source/Diagnostics/FieldIO.cpp b/Source/Diagnostics/FieldIO.cpp
index 209d8e9b4..ced1f8ea3 100644
--- a/Source/Diagnostics/FieldIO.cpp
+++ b/Source/Diagnostics/FieldIO.cpp
@@ -299,7 +299,10 @@ WarpX::AverageAndPackFields ( Vector<std::string>& varnames,
     amrex::Vector<MultiFab>& mf_avg, const int ngrow) const
 {
     // Count how many different fields should be written (ncomp)
-    const int ncomp = 3*3
+    const int ncomp = 0
+        + static_cast<int>(plot_E_field)*3
+        + static_cast<int>(plot_B_field)*3
+        + static_cast<int>(plot_J_field)*3
         + static_cast<int>(plot_part_per_cell)
         + static_cast<int>(plot_part_per_grid)
         + static_cast<int>(plot_part_per_proc)
@@ -321,15 +324,21 @@ WarpX::AverageAndPackFields ( Vector<std::string>& varnames,
         // Go through the different fields, pack them into mf_avg[lev],
         // add the corresponding names to `varnames` and increment dcomp
         int dcomp = 0;
-        AverageAndPackVectorField(mf_avg[lev], current_fp[lev], dcomp, ngrow);
-        if(lev==0) for(auto name:{"jx","jy","jz"}) varnames.push_back(name);
-        dcomp += 3;
-        AverageAndPackVectorField(mf_avg[lev], Efield_aux[lev], dcomp, ngrow);
-        if(lev==0) for(auto name:{"Ex","Ey","Ez"}) varnames.push_back(name);
-        dcomp += 3;
-        AverageAndPackVectorField(mf_avg[lev], Bfield_aux[lev], dcomp, ngrow);
-        if(lev==0) for(auto name:{"Bx","By","Bz"}) varnames.push_back(name);
-        dcomp += 3;
+        if (plot_J_field) {
+            AverageAndPackVectorField(mf_avg[lev], current_fp[lev], dcomp, ngrow);
+            if(lev==0) for(auto name:{"jx","jy","jz"}) varnames.push_back(name);
+            dcomp += 3;
+        }
+        if (plot_E_field) {
+            AverageAndPackVectorField(mf_avg[lev], Efield_aux[lev], dcomp, ngrow);
+            if(lev==0) for(auto name:{"Ex","Ey","Ez"}) varnames.push_back(name);
+            dcomp += 3;
+        }
+        if (plot_B_field) {
+            AverageAndPackVectorField(mf_avg[lev], Bfield_aux[lev], dcomp, ngrow);
+            if(lev==0) for(auto name:{"Bx","By","Bz"}) varnames.push_back(name);
+            dcomp += 3;
+        }
 
         if (plot_part_per_cell)
         {
diff --git a/Source/Diagnostics/ParticleIO.cpp b/Source/Diagnostics/ParticleIO.cpp
index f15c084a0..258069a37 100644
--- a/Source/Diagnostics/ParticleIO.cpp
+++ b/Source/Diagnostics/ParticleIO.cpp
@@ -28,16 +28,20 @@ MultiParticleContainer::Checkpoint (const std::string& dir) const
 
 void
 MultiParticleContainer::WritePlotFile (const std::string& dir,
-                                       const Vector<int>& real_flags,
                                        const Vector<std::string>& real_names) const
 {
     Vector<std::string> int_names;    
     Vector<int> int_flags;
     
     for (unsigned i = 0, n = species_names.size(); i < n; ++i) {
-	allcontainers[i]->WritePlotFile(dir, species_names[i],
-                                        real_flags, int_flags,
-                                        real_names, int_names);
+        auto& pc = allcontainers[i];
+        if (pc->plot_species) {
+            // real_names contains a list of all particle attributes.
+            // pc->plot_flags is 1 or 0, whether quantity is dumped or not.
+            pc->WritePlotFile(dir, species_names[i],
+                              pc->plot_flags, int_flags,
+                              real_names, int_names);
+        }
     }
 }
 
diff --git a/Source/Diagnostics/WarpXIO.cpp b/Source/Diagnostics/WarpXIO.cpp
index 186a8d45e..1762a9b79 100644
--- a/Source/Diagnostics/WarpXIO.cpp
+++ b/Source/Diagnostics/WarpXIO.cpp
@@ -422,7 +422,7 @@ WarpX::GetCellCenteredData() {
         AverageAndPackVectorField( *cc[lev], Efield_aux[lev], dcomp, ng );
         dcomp += 3;
         // then the magnetic field
-        AverageAndPackVectorField( *cc[lev], Efield_aux[lev], dcomp, ng );
+        AverageAndPackVectorField( *cc[lev], Bfield_aux[lev], dcomp, ng );
         dcomp += 3;
         // then the current density
         AverageAndPackVectorField( *cc[lev], current_fp[lev], dcomp, ng );
@@ -642,7 +642,7 @@ WarpX::WritePlotFile () const
         particle_varnames.push_back("uzold");
     }
 
-    mypc->WritePlotFile(plotfilename, particle_plot_flags, particle_varnames);
+    mypc->WritePlotFile(plotfilename, particle_varnames);
 
     WriteJobInfo(plotfilename);
 
diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp
index 6541dd4be..1c7729a83 100644
--- a/Source/Evolve/WarpXEvolveEM.cpp
+++ b/Source/Evolve/WarpXEvolveEM.cpp
@@ -25,6 +25,10 @@ WarpX::EvolveEM (int numsteps)
     static int last_check_file_step = 0;
     static int last_insitu_step = 0;
 
+    if (do_compute_max_step_from_zmax){
+        computeMaxStepBoostAccelerator(geom[0]);
+    }
+
     int numsteps_max;
     if (numsteps < 0) {  // Note that the default argument is numsteps = -1
         numsteps_max = max_step;
@@ -506,6 +510,50 @@ WarpX::ComputeDt ()
     }
 }
 
+/* \brief computes max_step for wakefield simulation in boosted frame.
+ * \param geom: Geometry object that contains simulation domain.
+ * 
+ * max_step is set so that the simulation stop when the lower corner of the 
+ * simulation box passes input parameter zmax_plasma_to_compute_max_step.
+ */
+void
+WarpX::computeMaxStepBoostAccelerator(amrex::Geometry geom){
+    // Sanity checks: can use zmax_plasma_to_compute_max_step only if 
+    // the moving window and the boost are all in z direction.
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
+        WarpX::moving_window_dir == AMREX_SPACEDIM-1,
+        "Can use zmax_plasma_to_compute_max_step only if " +
+        "moving window along z. TODO: all directions.");
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
+        maxLevel() == 0,
+        "Can use zmax_plasma_to_compute_max_step only if " +
+        "max level = 0.");
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(
+        (WarpX::boost_direction[0]-0)*(WarpX::boost_direction[0]-0) +
+        (WarpX::boost_direction[1]-0)*(WarpX::boost_direction[1]-0) +
+        (WarpX::boost_direction[2]-1)*(WarpX::boost_direction[2]-1) < 1.e-12,
+        "Can use zmax_plasma_to_compute_max_step only if " +
+        "warpx.boost_direction = z. TODO: all directions.");
+
+    // Lower end of the simulation domain. All quantities are given in boosted 
+    // frame except zmax_plasma_to_compute_max_step.
+    const Real zmin_domain_boost = geom.ProbLo(AMREX_SPACEDIM-1);
+    // End of the plasma: Transform input argument
+    // zmax_plasma_to_compute_max_step to boosted frame.
+    const Real len_plasma_boost = zmax_plasma_to_compute_max_step/gamma_boost;
+    // Plasma velocity
+    const Real v_plasma_boost = -beta_boost * PhysConst::c;
+    // Get time at which the lower end of the simulation domain passes the 
+    // upper end of the plasma (in the z direction).
+    const Real interaction_time_boost = (len_plasma_boost-zmin_domain_boost)/
+        (moving_window_v-v_plasma_boost);
+    // Divide by dt, and update value of max_step.
+    const int computed_max_step = interaction_time_boost/dt[0];
+    max_step = computed_max_step;
+    Print()<<"max_step computed in computeMaxStepBoostAccelerator: "
+           <<computed_max_step<<std::endl;
+}
+
 /* \brief Apply perfect mirror condition inside the box (not at a boundary).
  * In practice, set all fields to 0 on a section of the simulation domain
  * (as for a perfect conductor with a given thickness). 
diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H
index 3d92b7651..4457e34d8 100644
--- a/Source/FortranInterface/WarpX_f.H
+++ b/Source/FortranInterface/WarpX_f.H
@@ -176,12 +176,6 @@ extern "C"
 			const amrex::Real* charge, const amrex::Real* mass, const amrex::Real* dt,
 			const long* particle_pusher_algo);
 
-	// Particle pusher (position)
-
-	void warpx_particle_pusher_positions(const long* np,
-                                             amrex::Real* xp, amrex::Real* yp, amrex::Real* zp,
-                                             amrex::Real* uxp, amrex::Real* uyp, amrex::Real* uzp, amrex::Real* gaminv,
-                                             const amrex::Real* dt);
 
     // Laser pusher
 
diff --git a/Source/FortranInterface/WarpX_picsar.F90 b/Source/FortranInterface/WarpX_picsar.F90
index 6d6a2e411..a4cc99926 100644
--- a/Source/FortranInterface/WarpX_picsar.F90
+++ b/Source/FortranInterface/WarpX_picsar.F90
@@ -519,38 +519,6 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n
 
   end subroutine warpx_particle_pusher_momenta
 
-  ! _________________________________________________________________
-  !>
-  !> @brief
-  !> Main subroutine for the particle pusher of positions
-  !>
-  !> @param[in] np number of super-particles
-  !> @param[in] xp,yp,zp particle position arrays
-  !> @param[in] uxp,uyp,uzp normalized momentum in each direction
-  !> @param[in] gaminv particle Lorentz factors
-  !> @param[in] dt time step
-  !> @param[in] particle_pusher_algo Particle pusher algorithm
-  subroutine warpx_particle_pusher_positions(np,xp,yp,zp,uxp,uyp,uzp, &
-                                  gaminv,dt) &
-       bind(C, name="warpx_particle_pusher_positions")
-
-    INTEGER(c_long), INTENT(IN)   :: np
-    REAL(amrex_real),INTENT(INOUT)    :: gaminv(np)
-    REAL(amrex_real),INTENT(INOUT)    :: xp(np),yp(np),zp(np)
-    REAL(amrex_real),INTENT(INOUT)    :: uxp(np),uyp(np),uzp(np)
-    REAL(amrex_real),INTENT(IN)       :: dt
-
-    CALL pxr_set_gamma(np,uxp,uyp,uzp,gaminv)
-
-    !!!! --- push particle species positions a time step
-#if (AMREX_SPACEDIM == 3) || (defined WARPX_RZ)
-    CALL pxr_pushxyz(np,xp,yp,zp,uxp,uyp,uzp,gaminv,dt)
-#elif (AMREX_SPACEDIM == 2)
-    CALL pxr_pushxz(np,xp,zp,uxp,uzp,gaminv,dt)
-#endif
-
-  end subroutine warpx_particle_pusher_positions
-
   ! _________________________________________________________________
   !>
   !> @brief
diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp
index 8a2590e5e..bda5dcd86 100644
--- a/Source/Laser/LaserParticleContainer.cpp
+++ b/Source/Laser/LaserParticleContainer.cpp
@@ -25,8 +25,9 @@ LaserParticleContainer::LaserParticleContainer (AmrCore* amr_core, int ispecies,
 {
     charge = 1.0;
     mass = std::numeric_limits<Real>::max();
-
-	ParmParse pp(laser_name);
+    do_boosted_frame_diags = 0;
+        
+    ParmParse pp(laser_name);
 
 	// Parse the type of laser profile and set the corresponding flag `profile`
 	std::string laser_type_s;
diff --git a/Source/Particles/MultiParticleContainer.H b/Source/Particles/MultiParticleContainer.H
index 0c5e49c04..513f30b99 100644
--- a/Source/Particles/MultiParticleContainer.H
+++ b/Source/Particles/MultiParticleContainer.H
@@ -130,7 +130,6 @@ public:
     void Checkpoint (const std::string& dir) const;
 
     void WritePlotFile( const std::string& dir,
-                        const amrex::Vector<int>& real_flags, 
                         const amrex::Vector<std::string>& real_names) const;
     
     void Restart (const std::string& dir);
@@ -156,6 +155,10 @@ public:
 
     int nSpecies() const {return nspecies;}
 
+    int nSpeciesBoostedFrameDiags() const {return nspecies_boosted_frame_diags;}
+    int mapSpeciesBoostedFrameDiags(int i) const {return map_species_boosted_frame_diags[i];}
+    int doBoostedFrameDiags() const {return do_boosted_frame_diags;}
+
     int nSpeciesDepositOnMainGrid () const {
         int r = 0;
         for (int i : deposit_on_main_grid) {
@@ -215,6 +218,13 @@ private:
 
     void ReadParameters ();
 
+    // Number of species dumped in BoostedFrameDiagnostics
+    int nspecies_boosted_frame_diags = 0;
+    // map_species_boosted_frame_diags[i] is the species ID in 
+    // MultiParticleContainer for 0<i<nspecies_boosted_frame_diags
+    std::vector<int> map_species_boosted_frame_diags;
+    int do_boosted_frame_diags = 0;
+
     // runtime parameters
     int nlasers = 0;
     int nspecies = 1;   // physical particles only. nspecies+nlasers == allcontainers.size().
diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp
index 440906348..6d618c096 100644
--- a/Source/Particles/MultiParticleContainer.cpp
+++ b/Source/Particles/MultiParticleContainer.cpp
@@ -31,16 +31,31 @@ MultiParticleContainer::MultiParticleContainer (AmrCore* amr_core)
 
     pc_tmp.reset(new PhysicalParticleContainer(amr_core));
 
-    if (WarpX::do_boosted_frame_diagnostic && WarpX::do_boosted_frame_particles)
+    // Compute the number of species for which lab-frame data is dumped
+    // nspecies_lab_frame_diags, and map their ID to MultiParticleContainer
+    // particle IDs in map_species_lab_diags.
+    map_species_boosted_frame_diags.resize(nspecies);
+    nspecies_boosted_frame_diags = 0;
+    for (int i=0; i<nspecies; i++){
+        auto& pc = allcontainers[i];
+        if (pc->do_boosted_frame_diags){
+            map_species_boosted_frame_diags[nspecies_boosted_frame_diags] = i;
+            do_boosted_frame_diags = 1;
+            nspecies_boosted_frame_diags += 1;
+        }
+    }
+
+    if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
     {
-        for (int i = 0; i < nspecies + nlasers; ++i)
+        for (int i = 0; i < nspecies_boosted_frame_diags; ++i)
         {
-            allcontainers[i]->AddRealComp("xold");
-            allcontainers[i]->AddRealComp("yold");
-            allcontainers[i]->AddRealComp("zold");
-            allcontainers[i]->AddRealComp("uxold");
-            allcontainers[i]->AddRealComp("uyold");
-            allcontainers[i]->AddRealComp("uzold");
+            int is = map_species_boosted_frame_diags[i];
+            allcontainers[is]->AddRealComp("xold");
+            allcontainers[is]->AddRealComp("yold");
+            allcontainers[is]->AddRealComp("zold");
+            allcontainers[is]->AddRealComp("uxold");
+            allcontainers[is]->AddRealComp("uyold");
+            allcontainers[is]->AddRealComp("uzold");
         }
         pc_tmp->AddRealComp("xold");
         pc_tmp->AddRealComp("yold");
@@ -376,13 +391,25 @@ MultiParticleContainer
 
     BL_PROFILE("MultiParticleContainer::GetLabFrameData");
     
-    for (int i = 0; i < nspecies; ++i){
-        WarpXParticleContainer* pc = allcontainers[i].get();
+    // Loop over particle species
+    for (int i = 0; i < nspecies_boosted_frame_diags; ++i){
+        int isp = map_species_boosted_frame_diags[i];
+        WarpXParticleContainer* pc = allcontainers[isp].get();
         WarpXParticleContainer::DiagnosticParticles diagnostic_particles;
         pc->GetParticleSlice(direction, z_old, z_new, t_boost, t_lab, dt, diagnostic_particles);
-
+        // Here, diagnostic_particles[lev][index] is a WarpXParticleContainer::DiagnosticParticleData
+        // where "lev" is the AMR level and "index" is a [grid index][tile index] pair.
+        
+        // Loop over AMR levels
         for (int lev = 0; lev <= pc->finestLevel(); ++lev){
+            // Loop over [grid index][tile index] pairs 
+            // and Fills parts[species number i] with particle data from all grids and 
+            // tiles in diagnostic_particles. parts contains particles from all 
+            // AMR levels indistinctly.
             for (auto it = diagnostic_particles[lev].begin(); it != diagnostic_particles[lev].end(); ++it){
+                // it->first is the [grid index][tile index] key
+                // it->second is the corresponding 
+                // WarpXParticleContainer::DiagnosticParticleData value
                 parts[i].GetRealData(DiagIdx::w).insert(  parts[i].GetRealData(DiagIdx::w  ).end(),
                                                           it->second.GetRealData(DiagIdx::w  ).begin(),
                                                           it->second.GetRealData(DiagIdx::w  ).end());
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index a1d4e1319..8ef4c13a9 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -82,6 +82,40 @@ PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int isp
     pp.query("do_splitting", do_splitting);
     pp.query("split_type", split_type);
     pp.query("do_continuous_injection", do_continuous_injection);
+    // Whether to plot back-transformed (lab-frame) diagnostics 
+    // for this species.
+    pp.query("do_boosted_frame_diags", do_boosted_frame_diags);
+
+    pp.query("plot_species", plot_species);
+    int do_user_plot_vars;
+    do_user_plot_vars = pp.queryarr("plot_vars", plot_vars);
+    if (not do_user_plot_vars){
+        // By default, all particle variables are dumped to plotfiles,
+        // including {x,y,z,ux,uy,uz}old variables when running in a 
+        // boosted frame
+        if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags){
+            plot_flags.resize(PIdx::nattribs + 6, 1);
+        } else {
+            plot_flags.resize(PIdx::nattribs, 1);
+        }
+    } else {
+        // Set plot_flag to 0 for all attribs
+        if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags){
+            plot_flags.resize(PIdx::nattribs + 6, 0);
+        } else {
+            plot_flags.resize(PIdx::nattribs, 0);
+        }
+        // If not none, set plot_flags values to 1 for elements in plot_vars.
+        if (plot_vars[0] != "none"){
+            for (const auto& var : plot_vars){
+                // Return error if var not in PIdx. 
+                AMREX_ALWAYS_ASSERT_WITH_MESSAGE( 
+                    ParticleStringNames::to_index.count(var), 
+                    "plot_vars argument not in ParticleStringNames");
+                plot_flags[ParticleStringNames::to_index.at(var)] = 1;
+            }
+        }
+    }
 }
 
 PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core)
@@ -185,7 +219,7 @@ PhysicalParticleContainer::AddGaussianBeam(Real x_m, Real y_m, Real z_m,
             attribs[PIdx::uz] = u[2];
             attribs[PIdx::w ] = weight;
 
-            if (WarpX::do_boosted_frame_diagnostic && WarpX::do_boosted_frame_particles)
+            if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
             {
                 auto& particle_tile = DefineAndReturnParticleTile(0, 0, 0);
                 particle_tile.push_back_real(particle_comps["xold"], x);
@@ -469,7 +503,7 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox)
                     attribs[PIdx::uy] = u[1];
                     attribs[PIdx::uz] = u[2];
                     
-                    if (WarpX::do_boosted_frame_diagnostic && WarpX::do_boosted_frame_particles)
+                    if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
                     {
                         auto& particle_tile = DefineAndReturnParticleTile(lev, grid_id, tile_id);
                         particle_tile.push_back_real(particle_comps["xold"], x);
@@ -711,7 +745,7 @@ PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox)
                     attribs[PIdx::uz] = u[2];
 
                     // note - this will be slow on the GPU, need to revisit
-                    if (WarpX::do_boosted_frame_diagnostic && WarpX::do_boosted_frame_particles)
+                    if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
                     {
                         auto& particle_tile = DefineAndReturnParticleTile(lev, grid_id, tile_id);
                         particle_tile.push_back_real(particle_comps["xold"], x);
@@ -801,7 +835,6 @@ FieldGatherES (const amrex::Vector<std::array<std::unique_ptr<amrex::MultiFab>,
             const auto& particles = pti.GetArrayOfStructs();
             int nstride = particles.dataShape().first;
             const long np  = pti.numParticles();
-
             auto& attribs = pti.GetAttribs();
             auto& Exp = attribs[PIdx::Ex];
             auto& Eyp = attribs[PIdx::Ey];
@@ -1686,7 +1719,7 @@ PhysicalParticleContainer::PushPX(WarpXParIter& pti,
     auto& Bzp = attribs[PIdx::Bz];
     const long np  = pti.numParticles();
 
-    if (WarpX::do_boosted_frame_diagnostic && WarpX::do_boosted_frame_particles)
+    if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
     {
         auto& xpold    = pti.GetAttribs(particle_comps["xold"]);
         auto& ypold    = pti.GetAttribs(particle_comps["yold"]);
@@ -1831,6 +1864,8 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real
     // Note the the slice should always move in the negative boost direction.
     AMREX_ALWAYS_ASSERT(z_new < z_old);
 
+    AMREX_ALWAYS_ASSERT(do_boosted_frame_diags == 1);
+
     const int nlevs = std::max(0, finestLevel()+1);
 
     // we figure out a box for coarse-grained rejection. If the RealBox corresponding to a
diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp
index a5acca281..fd1b2dfb5 100644
--- a/Source/Particles/RigidInjectedParticleContainer.cpp
+++ b/Source/Particles/RigidInjectedParticleContainer.cpp
@@ -225,7 +225,7 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti,
     auto& Bzp = attribs[PIdx::Bz];
     const long np  = pti.numParticles();
 
-    if (WarpX::do_boosted_frame_diagnostic && WarpX::do_boosted_frame_particles)
+    if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
     {
         auto& xpold    = pti.GetAttribs(particle_comps["xold"]);
         auto& ypold    = pti.GetAttribs(particle_comps["yold"]);
diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H
index 6fa02b824..89244237b 100644
--- a/Source/Particles/WarpXParticleContainer.H
+++ b/Source/Particles/WarpXParticleContainer.H
@@ -59,7 +59,6 @@ public:
                       const amrex::Cuda::ManagedDeviceVector<amrex::Real>& y,
                       const amrex::Cuda::ManagedDeviceVector<amrex::Real>& z);
 #endif
-
     const std::array<RealVector, PIdx::nattribs>& GetAttribs () const { 
         return GetStructOfArrays().GetRealData(); 
     }
@@ -85,7 +84,13 @@ class WarpXParticleContainer
 public:
     friend MultiParticleContainer;
 
+    // amrex::StructOfArrays with DiagIdx::nattribs amrex::Real components 
+    // and 0 int components for the particle data.
     using DiagnosticParticleData = amrex::StructOfArrays<DiagIdx::nattribs, 0>;
+    // DiagnosticParticles is a vector, with one element per MR level.
+    // DiagnosticParticles[lev] is typically a key-value pair where the key is 
+    // a pair [grid_index, tile_index], and the value is the corresponding 
+    // DiagnosticParticleData (see above) on this tile.
     using DiagnosticParticles = amrex::Vector<std::map<std::pair<int, int>, DiagnosticParticleData> >;
     
     WarpXParticleContainer (amrex::AmrCore* amr_core, int ispecies);
@@ -265,6 +270,8 @@ protected:
     // support all features allowed by direct injection.
     int do_continuous_injection = 0;
 
+    int do_boosted_frame_diags = 1;
+
     amrex::Vector<amrex::FArrayBox> local_rho;
     amrex::Vector<amrex::FArrayBox> local_jx;
     amrex::Vector<amrex::FArrayBox> local_jy;
@@ -272,6 +279,15 @@ protected:
 
     amrex::Vector<amrex::Cuda::ManagedDeviceVector<amrex::Real> > m_xp, m_yp, m_zp, m_giv;
 
+    // Whether to dump particle quantities. 
+    // If true, particle position is always dumped.
+    int plot_species = 1;
+    // For all particle attribs (execept position), whether or not 
+    // to dump to file.
+    amrex::Vector<int> plot_flags;
+    // list of names of attributes to dump.
+    amrex::Vector<std::string> plot_vars;
+
 private:
     virtual void particlePostLocate(ParticleType& p, const amrex::ParticleLocData& pld,
                                     const int lev) override;
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 89e21df1c..32816ba5f 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -78,7 +78,7 @@ WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies)
     particle_comps["theta"] = PIdx::theta;
 #endif
 
-    if (WarpX::do_boosted_frame_diagnostic && WarpX::do_boosted_frame_particles)
+    if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
     {
         particle_comps["xold"]  = PIdx::nattribs;
         particle_comps["yold"]  = PIdx::nattribs+1;
@@ -86,9 +86,9 @@ WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies)
         particle_comps["uxold"] = PIdx::nattribs+3;
         particle_comps["uyold"] = PIdx::nattribs+4;
         particle_comps["uzold"] = PIdx::nattribs+5;
-        
+
     }
-    
+
     // Initialize temporary local arrays for charge/current deposition
     int num_threads = 1;
     #ifdef _OPENMP
@@ -174,7 +174,7 @@ WarpXParticleContainer::AddNParticles (int lev,
                                        int n, const Real* x, const Real* y, const Real* z,
 				       const Real* vx, const Real* vy, const Real* vz,
 				       int nattr, const Real* attr, int uniqueparticles, int id)
-{	
+{
     BL_ASSERT(nattr == 1);
     const Real* weight = attr;
 
@@ -230,15 +230,15 @@ WarpXParticleContainer::AddNParticles (int lev,
 #endif
         p.pos(1) = z[i];
 #endif
-        
-        if (WarpX::do_boosted_frame_diagnostic && WarpX::do_boosted_frame_particles)
+
+        if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
         {
             auto& particle_tile = DefineAndReturnParticleTile(0, 0, 0);
             particle_tile.push_back_real(particle_comps["xold"], x[i]);
             particle_tile.push_back_real(particle_comps["yold"], y[i]);
-            particle_tile.push_back_real(particle_comps["zold"], z[i]);            
+            particle_tile.push_back_real(particle_comps["zold"], z[i]);
         }
-        
+
         particle_tile.push_back(p);
     }
 
@@ -249,14 +249,14 @@ WarpXParticleContainer::AddNParticles (int lev,
         particle_tile.push_back_real(PIdx::uy,     vy + ibegin,     vy + iend);
         particle_tile.push_back_real(PIdx::uz,     vz + ibegin,     vz + iend);
 
-        if (WarpX::do_boosted_frame_diagnostic && WarpX::do_boosted_frame_particles)
+        if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)
         {
             auto& particle_tile = DefineAndReturnParticleTile(0, 0, 0);
             particle_tile.push_back_real(particle_comps["uxold"], vx + ibegin, vx + iend);
             particle_tile.push_back_real(particle_comps["uyold"], vy + ibegin, vy + iend);
-            particle_tile.push_back_real(particle_comps["uzold"], vz + ibegin, vz + iend);            
+            particle_tile.push_back_real(particle_comps["uzold"], vz + ibegin, vz + iend);
         }
-        
+
         for (int comp = PIdx::uz+1; comp < PIdx::nattribs; ++comp)
         {
 #ifdef WARPX_RZ
@@ -327,7 +327,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
       jx_ptr = local_jx[thread_num].dataPtr();
       jy_ptr = local_jy[thread_num].dataPtr();
       jz_ptr = local_jz[thread_num].dataPtr();
-      
+
       local_jx[thread_num].setVal(0.0);
       local_jy[thread_num].setVal(0.0);
       local_jz[thread_num].setVal(0.0);
@@ -426,7 +426,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
 
       BL_PROFILE_VAR_STOP(blp_pxr_cd);
 
-#ifndef AMREX_USE_GPU            
+#ifndef AMREX_USE_GPU
       BL_PROFILE_VAR_START(blp_accumulate);
 
       jx[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1);
@@ -477,7 +477,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
       auto jyntot = local_jy[thread_num].length();
       auto jzntot = local_jz[thread_num].length();
 #endif
-      
+
       long ncrse = np - np_current;
       BL_PROFILE_VAR_START(blp_pxr_cd);
       if (j_is_nodal) {
@@ -650,7 +650,7 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
 #endif
       BL_PROFILE_VAR_STOP(blp_pxr_chd);
 
-#ifndef AMREX_USE_GPU            
+#ifndef AMREX_USE_GPU
       BL_PROFILE_VAR_START(blp_accumulate);
 
       (*rhomf)[pti].atomicAdd(local_rho[thread_num], tile_box, tile_box, 0, icomp, 1);
@@ -710,7 +710,7 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
 #endif
       BL_PROFILE_VAR_STOP(blp_pxr_chd);
 
-#ifndef AMREX_USE_GPU            
+#ifndef AMREX_USE_GPU
       BL_PROFILE_VAR_START(blp_accumulate);
 
       (*crhomf)[pti].atomicAdd(local_rho[thread_num], tile_box, tile_box, 0, icomp, 1);
@@ -1025,8 +1025,6 @@ void
 WarpXParticleContainer::PushX (int lev, Real dt)
 {
     BL_PROFILE("WPC::PushX()");
-    BL_PROFILE_VAR_NS("WPC::PushX::Copy", blp_copy);
-    BL_PROFILE_VAR_NS("WPC:PushX::Push", blp_pxr_pp);
 
     if (do_not_push) return;
 
@@ -1036,47 +1034,38 @@ WarpXParticleContainer::PushX (int lev, Real dt)
 #pragma omp parallel
 #endif
     {
-        Cuda::ManagedDeviceVector<Real> xp, yp, zp, giv;
 
         for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
         {
             Real wt = amrex::second();
 
-            auto& attribs = pti.GetAttribs();
-
-            auto& uxp = attribs[PIdx::ux];
-            auto& uyp = attribs[PIdx::uy];
-            auto& uzp = attribs[PIdx::uz];
-
-            const long np = pti.numParticles();
-
-            giv.resize(np);
-
-            //
-            // copy data from particle container to temp arrays
-            //
-            BL_PROFILE_VAR_START(blp_copy);
-            pti.GetPosition(xp, yp, zp);
-            BL_PROFILE_VAR_STOP(blp_copy);
-
             //
             // Particle Push
             //
-            BL_PROFILE_VAR_START(blp_pxr_pp);
-            warpx_particle_pusher_positions(&np,
-                                            xp.dataPtr(),
-                                            yp.dataPtr(),
-                                            zp.dataPtr(),
-                                            uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
-                                            giv.dataPtr(), &dt);
-            BL_PROFILE_VAR_STOP(blp_pxr_pp);
-
-            //
-            // copy particle data back
-            //
-            BL_PROFILE_VAR_START(blp_copy);
-            pti.SetPosition(xp, yp, zp);
-            BL_PROFILE_VAR_STOP(blp_copy);
+            // Extract pointers to particle position and momenta, for this particle tile
+            // - positions are stored as an array of struct, in `ParticleType`
+            ParticleType * AMREX_RESTRICT pstructs = &(pti.GetArrayOfStructs()[0]);
+            // - momenta are stored as a struct of array, in `attribs`
+            auto& attribs = pti.GetAttribs();
+            Real* AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
+            Real* AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
+            Real* AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
+            // Loop over the particles and update the position
+            const long np = pti.numParticles();
+            const Real inv_c2 = 1./(PhysConst::c*PhysConst::c);
+            amrex::ParallelFor( np,
+                [=] AMREX_GPU_DEVICE (long i) {
+                    // Compute inverse Lorentz factor
+                    const Real inv_gamma = 1./std::sqrt(
+                        1. + (ux[i]*ux[i] + uy[i]*uy[i] + uz[i]*uz[i])*inv_c2);
+                    // Update positions over one time step
+                    pstructs[i].pos(0) += ux[i] * inv_gamma * dt;
+#if (AMREX_SPACEDIM == 3) || (defined WARPX_RZ) // RZ pushes particles in 3D
+                    pstructs[i].pos(1) += uy[i] * inv_gamma * dt;
+#endif
+                    pstructs[i].pos(2) += uz[i] * inv_gamma * dt;
+                }
+            );
 
             if (cost) {
                 const Box& tbx = pti.tilebox();
@@ -1092,15 +1081,15 @@ WarpXParticleContainer::PushX (int lev, Real dt)
 }
 
 // This function is called in Redistribute, just after locate
-void 
-WarpXParticleContainer::particlePostLocate(ParticleType& p, 
+void
+WarpXParticleContainer::particlePostLocate(ParticleType& p,
                                            const ParticleLocData& pld,
                                            const int lev)
 {
     // Tag particle if goes to higher level.
     // It will be split later in the loop
-    if (pld.m_lev == lev+1 
-        and p.m_idata.id != NoSplitParticleID 
+    if (pld.m_lev == lev+1
+        and p.m_idata.id != NoSplitParticleID
         and p.m_idata.id >= 0)
     {
         p.m_idata.id = DoSplitParticleID;
diff --git a/Source/Utils/WarpXUtil.cpp b/Source/Utils/WarpXUtil.cpp
index 4ec7ebb51..a5ea6d75a 100644
--- a/Source/Utils/WarpXUtil.cpp
+++ b/Source/Utils/WarpXUtil.cpp
@@ -118,8 +118,7 @@ void NullifyMF(amrex::MultiFab& mf, int lev, amrex::Real zmin, amrex::Real zmax)
         const int lo_ind = bx.loVect()[1];
 #endif
         // Check if box intersect with [zmin, zmax]
-        if ( (zmin>zmin_box && zmin<=zmax_box) || 
-             (zmax>zmin_box && zmax<=zmax_box) ){
+        if ( (zmax>zmin_box && zmin<=zmax_box) ){
             Array4<Real> arr = mf[mfi].array();
             // Set field to 0 between zmin and zmax
             ParallelFor(bx,
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 44e84033f..71fc54af2 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -108,6 +108,8 @@ public:
     static amrex::Real gamma_boost;
     static amrex::Real beta_boost;
     static amrex::Vector<int> boost_direction;
+    static amrex::Real zmax_plasma_to_compute_max_step;
+    static int do_compute_max_step_from_zmax;
 
     static bool do_dynamic_scheduling;
     static bool refine_plasma;
@@ -152,6 +154,8 @@ public:
     void applyMirrors(amrex::Real time);
 
     void ComputeDt ();
+    // Compute max_step automatically for simulations in a boosted frame.
+    void computeMaxStepBoostAccelerator(amrex::Geometry geom);
     int  MoveWindow (bool move_j);
     void UpdatePlasmaInjectionPosition (amrex::Real dt);
 
@@ -515,6 +519,9 @@ private:
     bool dump_plotfiles = true;
     bool dump_openpmd = false;
 #endif
+    bool plot_E_field       = true;
+    bool plot_B_field       = true;
+    bool plot_J_field       = true;
     bool plot_part_per_cell = true;
     bool plot_part_per_grid = false;
     bool plot_part_per_proc = false;
@@ -543,9 +550,6 @@ private:
 
     bool is_synchronized = true;
 
-    amrex::Vector<std::string> particle_plot_vars;
-    amrex::Vector<int> particle_plot_flags;
-
 #ifdef WARPX_USE_PSATD
     // Store fields in real space on the dual grid (i.e. the grid for the FFT push of the fields)
     // This includes data for the FFT guard cells (between FFT groups)
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index a3a24897a..6b1edaccd 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -32,6 +32,8 @@ int WarpX::moving_window_dir = -1;
 Real WarpX::gamma_boost = 1.;
 Real WarpX::beta_boost = 0.;
 Vector<int> WarpX::boost_direction = {0,0,0};
+int WarpX::do_compute_max_step_from_zmax = 0;
+Real WarpX::zmax_plasma_to_compute_max_step = 0.;
 
 long WarpX::current_deposition_algo = 3;
 long WarpX::charge_deposition_algo = 0;
@@ -115,7 +117,7 @@ WarpX::ResetInstance ()
 {
     delete m_instance;
     m_instance = nullptr;
-}
+}	
 
 WarpX::WarpX ()
 {
@@ -155,6 +157,7 @@ WarpX::WarpX ()
             current_injection_position = geom[0].ProbLo(moving_window_dir);
         }
     }
+    do_boosted_frame_particles = mypc->doBoostedFrameDiags();
 
     Efield_aux.resize(nlevs_max);
     Bfield_aux.resize(nlevs_max);
@@ -272,6 +275,12 @@ WarpX::ReadParameters ()
 
     ReadBoostedFrameParameters(gamma_boost, beta_boost, boost_direction);
 
+    // pp.query returns 1 if argument zmax_plasma_to_compute_max_step is 
+    // specified by the user, 0 otherwise.
+    do_compute_max_step_from_zmax = 
+        pp.query("zmax_plasma_to_compute_max_step", 
+                  zmax_plasma_to_compute_max_step);
+
     pp.queryarr("B_external", B_external);
 
 	pp.query("do_moving_window", do_moving_window);
@@ -317,8 +326,6 @@ WarpX::ReadParameters ()
         pp.get("gamma_boost", gamma_boost);
 
         pp.query("do_boosted_frame_fields", do_boosted_frame_fields);
-        pp.query("do_boosted_frame_particles", do_boosted_frame_particles);
-
 
         AMREX_ALWAYS_ASSERT_WITH_MESSAGE(do_moving_window,
                "The moving window should be on if using the boosted frame diagnostic.");
@@ -371,6 +378,9 @@ WarpX::ReadParameters ()
         if (ParallelDescriptor::NProcs() == 1) {
             plot_proc_number = false;
         }
+        pp.query("plot_E_field"      , plot_E_field);
+        pp.query("plot_B_field"      , plot_B_field);
+        pp.query("plot_J_field"      , plot_J_field);
         pp.query("plot_part_per_cell", plot_part_per_cell);
         pp.query("plot_part_per_grid", plot_part_per_grid);
         pp.query("plot_part_per_proc", plot_part_per_proc);
@@ -428,39 +438,6 @@ WarpX::ReadParameters ()
             fine_tag_hi = RealVect{hi};
         }
 
-        // select which particle comps to write
-        {
-            pp.queryarr("particle_plot_vars", particle_plot_vars);
-
-            if (particle_plot_vars.size() == 0)
-            {
-                if (WarpX::do_boosted_frame_diagnostic && WarpX::do_boosted_frame_particles)
-                {
-                    particle_plot_flags.resize(PIdx::nattribs + 6, 1);
-                }
-                else
-                {
-                    particle_plot_flags.resize(PIdx::nattribs, 1);
-                }                
-            }
-            else
-            {
-                if (WarpX::do_boosted_frame_diagnostic && WarpX::do_boosted_frame_particles)
-                {
-                    particle_plot_flags.resize(PIdx::nattribs + 6, 0);
-                }
-                else
-                {
-                    particle_plot_flags.resize(PIdx::nattribs, 0);
-                }                
-
-                for (const auto& var : particle_plot_vars)
-                {
-                    particle_plot_flags[ParticleStringNames::to_index.at(var)] = 1;
-                }
-            }
-        }
-
         pp.query("load_balance_int", load_balance_int);
         pp.query("load_balance_with_sfc", load_balance_with_sfc);
         pp.query("load_balance_knapsack_factor", load_balance_knapsack_factor);
-- 
cgit v1.2.3


From e7bc3c34fc838c4ab6ee845de2e3f6354c3cbdec Mon Sep 17 00:00:00 2001
From: Revathi Jambunathan <revanathan@login2.summit.olcf.ornl.gov>
Date: Thu, 6 Jun 2019 17:23:22 -0400
Subject:  adding 2D cufft plans

---
 .../SpectralSolver/SpectralFieldData.cpp            | 21 +++++++++++++--------
 Source/Parallelization/WarpXComm.cpp                |  5 -----
 Source/Particles/PhysicalParticleContainer.cpp      |  2 --
 Source/Particles/WarpXParticleContainer.cpp         |  2 --
 4 files changed, 13 insertions(+), 17 deletions(-)

(limited to 'Source/Particles/WarpXParticleContainer.cpp')

diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
index cb26b19b7..6eeb266e7 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp
@@ -55,7 +55,7 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba,
 #ifdef AMREX_USE_GPU
         // Add cuFFT-specific code
         // Creating 3D plan for real to complex -- double precision
-
+        // Assuming CUDA is used for programming GPU 
         cufftResult result;
 #if (AMREX_SPACEDIM == 3)
         result = cufftPlan3d( &forward_plan[mfi], fft_size[2], 
@@ -63,21 +63,26 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba,
         if ( result != CUFFT_SUCCESS ) {
            amrex::Print() << " cufftplan3d forward failed! \n";
         }
-#else
-        // Add 2D cuFFT-spacific code for D2Z
-        // Note that D2Z is inherently forward plan 
-#endif
 
-#if (AMREX_SPACEDIM == 3)
         result = cufftPlan3d( &backward_plan[mfi], fft_size[2], 
                               fft_size[1], fft_size[0], CUFFT_Z2D);
         if ( result != CUFFT_SUCCESS ) {
            amrex::Print() << " cufftplan3d backward failed! \n";
         }
 #else
-        // Add 2D cuFFT-specific code for Z2D
-        // Note that Z2D is inherently backward plan
+        // Add 2D cuFFT-spacific code for D2Z
+        // Note that D2Z is inherently forward plan 
+        result = cufftPlan2d( &forward_plan[mfi], fft_size[1], 
+                              fft_size[0], CUFFT_D2Z );
+        if ( result != CUFFT_SUCCESS ) {
+           amrex::Print() << " cufftplan2d forward failed! \n";
+        }
 
+        result = cufftPlan2d( &backward_plan[mfi], fft_size[1], 
+                               fft_size[0], CUFFT_Z2D );
+        if ( result != CUFFT_SUCCESS ) {
+           amrex::Print() << " cufftplan2d backward failed! \n";
+        }
 #endif
 
 #else
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index 998fc4de1..00dcb85d0 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -222,9 +222,7 @@ WarpX::FillBoundaryE ()
 {
     for (int lev = 0; lev <= finest_level; ++lev)
     {
-        amrex::Print() << " lev " << lev << " in fill bndry E \n";
         FillBoundaryE(lev);
-        amrex::Print() << " lev " << lev << " after fill bndry E \n";
     }
 }
 
@@ -273,12 +271,9 @@ WarpX::FillBoundaryE (int lev, PatchType patch_type)
         pml[lev]->FillBoundaryE(patch_type);
         }
 
-        amrex::Print() << " before defining multifab \n";
         const auto& cperiod = Geom(lev-1).periodicity();
         Vector<MultiFab*> mf{Efield_cp[lev][0].get(),Efield_cp[lev][1].get(),Efield_cp[lev][2].get()};
-        amrex::Print() << " before amrex fill bndry \n";
         amrex::FillBoundary(mf, cperiod);
-        amrex::Print() << " after amrex fill bndry \n";
     }
 }
 
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 0826605ec..1f517fccb 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -1388,9 +1388,7 @@ PhysicalParticleContainer::Evolve (int lev,
             pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]);
 	    BL_PROFILE_VAR_STOP(blp_copy);
 
-            amrex::Print() << " before deposit chage \n";
             if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev);
-            amrex::Print() << " after deposit chage \n";
             
             if (! do_not_push)
             {
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 576c53319..47d57294d 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -612,7 +612,6 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
       const std::array<Real, 3>& xyzmin = xyzmin_tile;
 
 #ifdef AMREX_USE_GPU
-      amrex::Print() << " before icomp data ptr " << icomp << "\n";
       data_ptr = (*rhomf)[pti].dataPtr(icomp);
       auto rholen = (*rhomf)[pti].length();
 #else
@@ -635,7 +634,6 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
       const long nz = rholen[1]-1-2*ngRho;
 #endif
       BL_PROFILE_VAR_START(blp_pxr_chd);
-      amrex::Print() << " before warpxcharge deposition \n";
       warpx_charge_deposition(data_ptr, &np_current,
                               m_xp[thread_num].dataPtr(),
                               m_yp[thread_num].dataPtr(),
-- 
cgit v1.2.3


From d3b95f3264632385b18c99c2d278272ad6b2875e Mon Sep 17 00:00:00 2001
From: Remi Lehe <remi.lehe@normalesup.org>
Date: Thu, 13 Jun 2019 13:05:27 -0700
Subject: Use PICSAR function for nodal deposition

---
 Source/FortranInterface/WarpX_f.H           |   8 +-
 Source/FortranInterface/WarpX_picsar.F90    |   9 +-
 Source/Particles/WarpXParticleContainer.cpp | 195 +++++-----------------------
 3 files changed, 44 insertions(+), 168 deletions(-)

(limited to 'Source/Particles/WarpXParticleContainer.cpp')

diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H
index 1053ace89..98dd8685d 100644
--- a/Source/FortranInterface/WarpX_f.H
+++ b/Source/FortranInterface/WarpX_f.H
@@ -80,7 +80,7 @@ extern "C"
                             const amrex_real* uxp, const amrex_real* uyp, const amrex_real* uzp,
                             amrex_real* xpold, amrex_real* ypold, amrex_real* zpold,
                             amrex_real* uxpold, amrex_real* uypold, amrex_real* uzpold);
-    
+
 	// Charge deposition
 	void warpx_charge_deposition(amrex::Real* rho,
             const long* np,	const amrex::Real* xp, const amrex::Real* yp, const amrex::Real* zp,	const amrex::Real* w,
@@ -110,7 +110,7 @@ extern "C"
 			const amrex::Real* dt,
 			const amrex::Real* dx, const amrex::Real* dy, const amrex::Real* dz,
 			const long* nox, const long* noy,const long* noz,
-			const long* lvect, const long* current_depo_algo);
+                        const int* l_nodal, const long* lvect, const long* current_depo_algo);
 
         // Current deposition finalize for RZ
         void warpx_current_deposition_rz_volume_scaling(
@@ -178,7 +178,7 @@ extern "C"
        amrex::Real* u_Xx, amrex::Real* u_Xy, amrex::Real* u_Xz,
        amrex::Real* u_Yx, amrex::Real* u_Yy, amrex::Real* u_Yz,
        amrex::Real* positionx, amrex::Real* positiony, amrex::Real* positionz );
-  
+
   void update_laser_particle( const long* np,
        amrex::Real* xp, amrex::Real* yp, amrex::Real* zp,
        amrex::Real* uxp, amrex::Real* uyp, amrex::Real* uzp,
@@ -187,7 +187,7 @@ extern "C"
        amrex::Real* nvecx, amrex::Real* nvecy, amrex::Real* nvecz,
        amrex::Real* mobility, amrex::Real* dt,  const amrex::Real* c,
        const amrex::Real* beta_boost, const amrex::Real* gamma_boost );
-  
+
     // Maxwell solver
 
         void warpx_push_evec(
diff --git a/Source/FortranInterface/WarpX_picsar.F90 b/Source/FortranInterface/WarpX_picsar.F90
index 12d541b08..68c586632 100644
--- a/Source/FortranInterface/WarpX_picsar.F90
+++ b/Source/FortranInterface/WarpX_picsar.F90
@@ -315,14 +315,14 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n
   subroutine warpx_current_deposition( &
     jx,jx_ng,jx_ntot,jy,jy_ng,jy_ntot,jz,jz_ng,jz_ntot, &
     np,xp,yp,zp,uxp,uyp,uzp,gaminv,w,q,xmin,ymin,zmin,dt,dx,dy,dz,nox,noy,noz,&
-    lvect,current_depo_algo) &
+    l_nodal,lvect,current_depo_algo) &
     bind(C, name="warpx_current_deposition")
 
     integer, intent(in) :: jx_ntot(AMREX_SPACEDIM), jy_ntot(AMREX_SPACEDIM), jz_ntot(AMREX_SPACEDIM)
     integer(c_long), intent(in) :: jx_ng, jy_ng, jz_ng
     integer(c_long), intent(IN)                                  :: np
     integer(c_long), intent(IN)                                  :: nox,noy,noz
-
+    integer(c_int), intent(in)                                   :: l_nodal
     real(amrex_real), intent(IN OUT):: jx(*), jy(*), jz(*)
     real(amrex_real), intent(IN)                                     :: q
     real(amrex_real), intent(IN)                                     :: dx,dy,dz
@@ -333,6 +333,7 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n
     real(amrex_real), intent(IN),  dimension(np)                     :: gaminv
     integer(c_long), intent(IN)                                   :: lvect
     integer(c_long), intent(IN)                                   :: current_depo_algo
+    logical(pxr_logical)                                          :: pxr_l_nodal
 
     ! Compute the number of valid cells and guard cells
     integer(c_long) :: jx_nvalid(AMREX_SPACEDIM), jy_nvalid(AMREX_SPACEDIM), jz_nvalid(AMREX_SPACEDIM), &
@@ -352,7 +353,7 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n
         jz,jz_nguards,jz_nvalid,            &
         np,xp,yp,zp,uxp,uyp,uzp,gaminv,w,q, &
         xmin,ymin,zmin,dt,dx,dy,dz,         &
-        nox,noy,noz,current_depo_algo)
+        nox,noy,noz,pxr_l_nodal,current_depo_algo)
 ! Dimension 2
 #elif (AMREX_SPACEDIM==2)
         CALL WRPX_PXR_CURRENT_DEPOSITION(   &
@@ -361,7 +362,7 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n
         jz,jz_nguards,jz_nvalid,            &
         np,xp,yp,zp,uxp,uyp,uzp,gaminv,w,q, &
         xmin,zmin,dt,dx,dz,nox,noz,lvect,   &
-        current_depo_algo)
+        pxr_l_nodal,current_depo_algo)
 #endif
 
   end subroutine warpx_current_deposition
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 47d57294d..43d7999d1 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -306,7 +306,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
   // WarpX assumes the same number of guard cells for Jx, Jy, Jz
   long ngJ = jx.nGrow();
 
-  bool j_is_nodal = jx.is_nodal() and jy.is_nodal() and jz.is_nodal();
+  int j_is_nodal = jx.is_nodal() and jy.is_nodal() and jz.is_nodal();
 
   // Deposit charge for particles that are not in the current buffers
   if (np_current > 0)
@@ -342,92 +342,29 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
 #endif
 
       BL_PROFILE_VAR_START(blp_pxr_cd);
-      if (j_is_nodal) {
-          const Real* p_wp = wp.dataPtr();
-          const Real* p_gaminv = m_giv[thread_num].dataPtr();
-          const Real* p_uxp = uxp.dataPtr();
-          const Real* p_uyp = uyp.dataPtr();
-          const Real* p_uzp = uzp.dataPtr();
-          AsyncArray<Real> wptmp_aa(np_current);
-          Real* const wptmp = wptmp_aa.data();
-          const Box& tile_box = pti.tilebox();
-#if (AMREX_SPACEDIM == 3)
-          const long nx = tile_box.length(0);
-          const long ny = tile_box.length(1);
-          const long nz = tile_box.length(2);
-#else
-          const long nx = tile_box.length(0);
-          const long ny = 0;
-          const long nz = tile_box.length(1);
-#endif
-          amrex::ParallelFor (np_current, [=] AMREX_GPU_DEVICE (long ip) {
-                  wptmp[ip] = p_wp[ip] * p_gaminv[ip] * p_uxp[ip];
-          });
-          warpx_charge_deposition(jx_ptr, &np_current,
-                                  m_xp[thread_num].dataPtr(),
-                                  m_yp[thread_num].dataPtr(),
-                                  m_zp[thread_num].dataPtr(),
-                                  wptmp,
-                                  &this->charge,
-                                  &xyzmin[0], &xyzmin[1], &xyzmin[2],
-                                  &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
-                                  &ngJ, &ngJ, &ngJ,
-                                  &WarpX::nox,&WarpX::noy,&WarpX::noz,
-                                  &lvect, &WarpX::current_deposition_algo);
-          amrex::ParallelFor (np_current, [=] AMREX_GPU_DEVICE (long ip) {
-                  wptmp[ip] = p_wp[ip] * p_gaminv[ip] * p_uyp[ip];
-          });
-          warpx_charge_deposition(jy_ptr, &np_current,
-                                  m_xp[thread_num].dataPtr(),
-                                  m_yp[thread_num].dataPtr(),
-                                  m_zp[thread_num].dataPtr(),
-                                  wptmp,
-                                  &this->charge,
-                                  &xyzmin[0], &xyzmin[1], &xyzmin[2],
-                                  &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
-                                  &ngJ, &ngJ, &ngJ,
-                                  &WarpX::nox,&WarpX::noy,&WarpX::noz,
-                                  &lvect, &WarpX::current_deposition_algo);
-          amrex::ParallelFor (np_current, [=] AMREX_GPU_DEVICE (long ip) {
-                  wptmp[ip] = p_wp[ip] * p_gaminv[ip] * p_uzp[ip];
-          });
-          warpx_charge_deposition(jz_ptr, &np_current,
-                                  m_xp[thread_num].dataPtr(),
-                                  m_yp[thread_num].dataPtr(),
-                                  m_zp[thread_num].dataPtr(),
-                                  wptmp,
-                                  &this->charge,
-                                  &xyzmin[0], &xyzmin[1], &xyzmin[2],
-                                  &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
-                                  &ngJ, &ngJ, &ngJ,
-                                  &WarpX::nox,&WarpX::noy,&WarpX::noz,
-                                  &lvect, &WarpX::current_deposition_algo);
-      } else {
-          warpx_current_deposition(
-                               jx_ptr, &ngJ, jxntot.getVect(),
-                               jy_ptr, &ngJ, jyntot.getVect(),
-                               jz_ptr, &ngJ, jzntot.getVect(),
-                               &np_current,
-                               m_xp[thread_num].dataPtr(),
-                               m_yp[thread_num].dataPtr(),
-                               m_zp[thread_num].dataPtr(),
-                               uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
-                               m_giv[thread_num].dataPtr(),
-                               wp.dataPtr(), &this->charge,
-                               &xyzmin[0], &xyzmin[1], &xyzmin[2],
-                               &dt, &dx[0], &dx[1], &dx[2],
-                               &WarpX::nox,&WarpX::noy,&WarpX::noz,
-                               &lvect,&WarpX::current_deposition_algo);
+      warpx_current_deposition(
+           jx_ptr, &ngJ, jxntot.getVect(),
+           jy_ptr, &ngJ, jyntot.getVect(),
+           jz_ptr, &ngJ, jzntot.getVect(),
+           &np_current,
+           m_xp[thread_num].dataPtr(),
+           m_yp[thread_num].dataPtr(),
+           m_zp[thread_num].dataPtr(),
+           uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(),
+           m_giv[thread_num].dataPtr(),
+           wp.dataPtr(), &this->charge,
+           &xyzmin[0], &xyzmin[1], &xyzmin[2],
+           &dt, &dx[0], &dx[1], &dx[2],
+           &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal,
+           &lvect,&WarpX::current_deposition_algo);
 
 #ifdef WARPX_RZ
-         warpx_current_deposition_rz_volume_scaling(
+       warpx_current_deposition_rz_volume_scaling(
                                   jx_ptr, &ngJ, jxntot.getVect(),
                                   jy_ptr, &ngJ, jyntot.getVect(),
                                   jz_ptr, &ngJ, jzntot.getVect(),
                                   &xyzmin[0], &dx[0]);
 #endif
-      }
-
       BL_PROFILE_VAR_STOP(blp_pxr_cd);
 
 #ifndef AMREX_USE_GPU
@@ -484,92 +421,30 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
 
       long ncrse = np - np_current;
       BL_PROFILE_VAR_START(blp_pxr_cd);
-      if (j_is_nodal) {
-          const Real* p_wp = wp.dataPtr() + np_current;
-          const Real* p_gaminv = m_giv[thread_num].dataPtr() + np_current;
-          const Real* p_uxp = uxp.dataPtr() + np_current;
-          const Real* p_uyp = uyp.dataPtr() + np_current;
-          const Real* p_uzp = uzp.dataPtr() + np_current;
-          AsyncArray<Real> wptmp_aa(ncrse);
-          Real* const wptmp = wptmp_aa.data();
-          const Box& tile_box = pti.tilebox();
-#if (AMREX_SPACEDIM == 3)
-          const long nx = tile_box.length(0);
-          const long ny = tile_box.length(1);
-          const long nz = tile_box.length(2);
-#else
-          const long nx = tile_box.length(0);
-          const long ny = 0;
-          const long nz = tile_box.length(1);
-#endif
-          amrex::ParallelFor (ncrse, [=] AMREX_GPU_DEVICE (long ip) {
-                  wptmp[ip] = p_wp[ip] * p_gaminv[ip] * p_uxp[ip];
-          });
-          warpx_charge_deposition(jx_ptr, &ncrse,
-                                  m_xp[thread_num].dataPtr() +np_current,
-                                  m_yp[thread_num].dataPtr() +np_current,
-                                  m_zp[thread_num].dataPtr() +np_current,
-                                  wptmp,
-                                  &this->charge,
-                                  &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
-                                  &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz,
-                                  &ngJ, &ngJ, &ngJ,
-                                  &WarpX::nox,&WarpX::noy,&WarpX::noz,
-                                  &lvect, &WarpX::current_deposition_algo);
-          amrex::ParallelFor (ncrse, [=] AMREX_GPU_DEVICE (long ip) {
-                  wptmp[ip] = p_wp[ip] * p_gaminv[ip] * p_uyp[ip];
-          });
-          warpx_charge_deposition(jy_ptr, &ncrse,
-                                  m_xp[thread_num].dataPtr() +np_current,
-                                  m_yp[thread_num].dataPtr() +np_current,
-                                  m_zp[thread_num].dataPtr() +np_current,
-                                  wptmp,
-                                  &this->charge,
-                                  &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
-                                  &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz,
-                                  &ngJ, &ngJ, &ngJ,
-                                  &WarpX::nox,&WarpX::noy,&WarpX::noz,
-                                  &lvect, &WarpX::current_deposition_algo);
-          amrex::ParallelFor (ncrse, [=] AMREX_GPU_DEVICE (long ip) {
-                  wptmp[ip] = p_wp[ip] * p_gaminv[ip] * p_uzp[ip];
-          });
-          warpx_charge_deposition(jz_ptr, &ncrse,
-                                  m_xp[thread_num].dataPtr() +np_current,
-                                  m_yp[thread_num].dataPtr() +np_current,
-                                  m_zp[thread_num].dataPtr() +np_current,
-                                  wptmp,
-                                  &this->charge,
-                                  &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
-                                  &cdx[0], &cdx[1], &cdx[2], &nx, &ny, &nz,
-                                  &ngJ, &ngJ, &ngJ,
-                                  &WarpX::nox,&WarpX::noy,&WarpX::noz,
-                                  &lvect, &WarpX::current_deposition_algo);
-      } else {
-          warpx_current_deposition(
-                               jx_ptr, &ngJ, jxntot.getVect(),
-                               jy_ptr, &ngJ, jyntot.getVect(),
-                               jz_ptr, &ngJ, jzntot.getVect(),
-                               &ncrse,
-                               m_xp[thread_num].dataPtr() +np_current,
-                               m_yp[thread_num].dataPtr() +np_current,
-                               m_zp[thread_num].dataPtr() +np_current,
-                               uxp.dataPtr()+np_current,
-                               uyp.dataPtr()+np_current,
-                               uzp.dataPtr()+np_current,
-                               m_giv[thread_num].dataPtr()+np_current,
-                               wp.dataPtr()+np_current, &this->charge,
-                               &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
-                               &dt, &cdx[0], &cdx[1], &cdx[2],
-                               &WarpX::nox,&WarpX::noy,&WarpX::noz,
-                               &lvect,&WarpX::current_deposition_algo);
+      warpx_current_deposition(
+                           jx_ptr, &ngJ, jxntot.getVect(),
+                           jy_ptr, &ngJ, jyntot.getVect(),
+                           jz_ptr, &ngJ, jzntot.getVect(),
+                           &ncrse,
+                           m_xp[thread_num].dataPtr() +np_current,
+                           m_yp[thread_num].dataPtr() +np_current,
+                           m_zp[thread_num].dataPtr() +np_current,
+                           uxp.dataPtr()+np_current,
+                           uyp.dataPtr()+np_current,
+                           uzp.dataPtr()+np_current,
+                           m_giv[thread_num].dataPtr()+np_current,
+                           wp.dataPtr()+np_current, &this->charge,
+                           &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2],
+                           &dt, &cdx[0], &cdx[1], &cdx[2],
+                           &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal,
+                           &lvect,&WarpX::current_deposition_algo);
 #ifdef WARPX_RZ
-         warpx_current_deposition_rz_volume_scaling(
+       warpx_current_deposition_rz_volume_scaling(
                                   jx_ptr, &ngJ, jxntot.getVect(),
                                   jy_ptr, &ngJ, jyntot.getVect(),
                                   jz_ptr, &ngJ, jzntot.getVect(),
                                   &xyzmin[0], &dx[0]);
 #endif
-      }
 
       BL_PROFILE_VAR_STOP(blp_pxr_cd);
 
-- 
cgit v1.2.3


From 19fa3eaaf87ce778f72b5559e94584615313a051 Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <weiqunzhang@lbl.gov>
Date: Sun, 16 Jun 2019 11:01:08 -0700
Subject: Turn off FAB_IS_MANAGED

---
 GNUmakefile                                    |  2 ++
 Source/Laser/LaserParticleContainer.cpp        |  7 ++++---
 Source/Particles/PhysicalParticleContainer.cpp | 28 +++++++++++++++-----------
 Source/Particles/WarpXParticleContainer.cpp    |  7 ++++---
 4 files changed, 26 insertions(+), 18 deletions(-)

(limited to 'Source/Particles/WarpXParticleContainer.cpp')

diff --git a/GNUmakefile b/GNUmakefile
index 1acd53be7..5575a558e 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -20,6 +20,8 @@ TINY_PROFILE   = TRUE
 USE_OMP   = TRUE
 USE_GPU   = FALSE
 
+FAB_IS_MANAGED = FALSE
+
 EBASE     = main
 
 USE_PYTHON_MAIN = FALSE
diff --git a/Source/Laser/LaserParticleContainer.cpp b/Source/Laser/LaserParticleContainer.cpp
index 2f964b6f3..de410b31f 100644
--- a/Source/Laser/LaserParticleContainer.cpp
+++ b/Source/Laser/LaserParticleContainer.cpp
@@ -518,10 +518,11 @@ LaserParticleContainer::Evolve (int lev,
             if (cost) {
                 const Box& tbx = pti.tilebox();
                 wt = (amrex::second() - wt) / tbx.d_numPts();
-                FArrayBox* costfab = cost->fabPtr(pti);
-                AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box,
+                Array4<Real> const& costarr = cost->array(pti);
+                amrex::ParallelFor(tbx,
+                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
                 {
-                    costfab->plus(wt, work_box);
+                    costarr(i,j,k) += wt;
                 });
             }
         }
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp
index 1f517fccb..7e7c9534e 100644
--- a/Source/Particles/PhysicalParticleContainer.cpp
+++ b/Source/Particles/PhysicalParticleContainer.cpp
@@ -556,10 +556,11 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox)
 
             if (cost) {
 	        wt = (amrex::second() - wt) / tile_box.d_numPts();
-                FArrayBox* costfab = cost->fabPtr(mfi);
-                AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tile_box, work_box,
+                Array4<Real> const& costarr = cost->array(mfi);
+                amrex::ParallelFor(tile_box,
+                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
                 {
-                    costfab->plus(wt, work_box);
+                    costarr(i,j,k) += wt;
                 });
             }
         }
@@ -830,10 +831,11 @@ PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox)
 	    			 
             if (cost) {
 	        wt = (amrex::second() - wt) / tile_box.d_numPts();
-                FArrayBox* costfab = cost->fabPtr(mfi);
-                AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tile_box, work_box,
+                Array4<Real> const& costarr = cost->array(mfi);
+                amrex::ParallelFor(tile_box,
+                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
                 {
-                    costfab->plus(wt, work_box);
+                    costarr(i,j,k) += wt;
                 });
             }
         }		
@@ -1137,10 +1139,11 @@ PhysicalParticleContainer::FieldGather (int lev,
             if (cost) {
                 const Box& tbx = pti.tilebox();
                 wt = (amrex::second() - wt) / tbx.d_numPts();
-                FArrayBox* costfab = cost->fabPtr(pti);
-                AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box,
+                Array4<Real> const& costarr = cost->array(pti);
+                amrex::ParallelFor(tbx,
+                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
                 {
-                    costfab->plus(wt, work_box);
+                    costarr(i,j,k) += wt;
                 });
             }
         }
@@ -1542,10 +1545,11 @@ PhysicalParticleContainer::Evolve (int lev,
             if (cost) {
                 const Box& tbx = pti.tilebox();
                 wt = (amrex::second() - wt) / tbx.d_numPts();
-                FArrayBox* costfab = cost->fabPtr(pti);
-                AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box,
+                Array4<Real> const& costarr = cost->array(pti);
+                amrex::ParallelFor(tbx,
+                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
                 {
-                    costfab->plus(wt, work_box);
+                    costarr(i,j,k) += wt;
                 });
             }
         }
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 43d7999d1..ac532912d 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -949,10 +949,11 @@ WarpXParticleContainer::PushX (int lev, Real dt)
             if (cost) {
                 const Box& tbx = pti.tilebox();
                 wt = (amrex::second() - wt) / tbx.d_numPts();
-                FArrayBox* costfab = cost->fabPtr(pti);
-                AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, work_box,
+                Array4<Real> const& costarr = cost->array(pti);
+                amrex::ParallelFor(tbx,
+                [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
                 {
-                    costfab->plus(wt, work_box);
+                    costarr(i,j,k) += wt;
                 });
             }
         }
-- 
cgit v1.2.3