Merge branch 'dev' into RZgeometry

author: Dave Grote <grote1@llnl.gov> 2019-08-09 14:12:46 -0700
committer: Dave Grote <grote1@llnl.gov> 2019-08-09 14:12:46 -0700
commit: c3ce219b9d25e8d28e5a6cc5b878b3c5793cf90a (patch)
tree: d296f21c2e9051c21707f3fa419c8cfe892ea952 /Source/Particles/WarpXParticleContainer.cpp
parent: f86512d788477a8eab5ed3c3c92ce9a7453ae4c7 (diff)
parent: 941a74cdee2d89c832d3fc682ef9a0973deddcce (diff)
download: WarpX-c3ce219b9d25e8d28e5a6cc5b878b3c5793cf90a.tar.gz
WarpX-c3ce219b9d25e8d28e5a6cc5b878b3c5793cf90a.tar.zst
WarpX-c3ce219b9d25e8d28e5a6cc5b878b3c5793cf90a.zip
1 files changed, 170 insertions, 32 deletions
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 6ecf0fe98..2107df35f 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -6,10 +6,12 @@
 #include <AMReX_AmrParGDB.H>
 #include <WarpX_f.H>
 #include <WarpX.H>
+#include <WarpXAlgorithmSelection.H>
 
 // Import low-level single-particle kernels
 #include <GetAndSetPosition.H>
 #include <UpdatePosition.H>
+#include <CurrentDeposition.H>
 
 using namespace amrex;
 
@@ -25,7 +27,7 @@ void
 WarpXParIter::GetPosition (Cuda::ManagedDeviceVector<Real>& x, Cuda::ManagedDeviceVector<Real>& y, Cuda::ManagedDeviceVector<Real>& z) const
 {
     amrex::ParIter<0,0,PIdx::nattribs>::GetPosition(x, z);
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     const auto& attribs = GetAttribs();
     const auto& theta = attribs[PIdx::theta];
     y.resize(x.size());
@@ -42,10 +44,10 @@ WarpXParIter::GetPosition (Cuda::ManagedDeviceVector<Real>& x, Cuda::ManagedDevi
 void
 WarpXParIter::SetPosition (const Cuda::ManagedDeviceVector<Real>& x, const Cuda::ManagedDeviceVector<Real>& y, const Cuda::ManagedDeviceVector<Real>& z)
 {
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     auto& attribs = GetAttribs();
     auto& theta = attribs[PIdx::theta];
-    Cuda::DeviceVector<Real> r(x.size());
+    Cuda::ManagedDeviceVector<Real> r(x.size());
     for (unsigned int i=0 ; i < x.size() ; i++) {
         theta[i] = std::atan2(y[i], x[i]);
         r[i] = std::sqrt(x[i]*x[i] + y[i]*y[i]);
@@ -78,7 +80,7 @@ WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies)
     particle_comps["Bx"] = PIdx::Bx;
     particle_comps["By"] = PIdx::By;
     particle_comps["Bz"] = PIdx::Bz;
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     particle_comps["theta"] = PIdx::theta;
 #endif
 
@@ -161,7 +163,7 @@ WarpXParticleContainer::AddOneParticle (ParticleTileType& particle_tile,
     p.pos(1) = y;
     p.pos(2) = z;
 #elif (AMREX_SPACEDIM == 2)
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     attribs[PIdx::theta] = std::atan2(y, x);
     x = std::sqrt(x*x + y*y);
 #endif
@@ -207,7 +209,7 @@ WarpXParticleContainer::AddNParticles (int lev,
 
     std::size_t np = iend-ibegin;
 
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
     Vector<Real> theta(np);
 #endif
 
@@ -226,7 +228,7 @@ WarpXParticleContainer::AddNParticles (int lev,
         p.pos(1) = y[i];
         p.pos(2) = z[i];
 #elif (AMREX_SPACEDIM == 2)
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
         theta[i-ibegin] = std::atan2(y[i], x[i]);
         p.pos(0) = std::sqrt(x[i]*x[i] + y[i]*y[i]);
 #else
@@ -263,7 +265,7 @@ WarpXParticleContainer::AddNParticles (int lev,
 
         for (int comp = PIdx::uz+1; comp < PIdx::nattribs; ++comp)
         {
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
             if (comp == PIdx::theta) {
                 particle_tile.push_back_real(comp, theta.front(), theta.back());
             }
@@ -279,7 +281,7 @@ WarpXParticleContainer::AddNParticles (int lev,
     Redistribute();
 }
 
-/* \brief Current Deposition for thread thread_num
+/* \brief Current Deposition for thread thread_num using PICSAR
  * \param pti         : Particle iterator
  * \param wp          : Array of particle weights
  * \param uxp uyp uzp : Array of particle
@@ -292,16 +294,15 @@ WarpXParticleContainer::AddNParticles (int lev,
  * \param lev         : Level of box that contains particles
  * \param depos_lev   : Level on which particles deposit (if buffers are used)
  * \param dt          : Time step for particle level
- * \param ngJ         : Number of ghosts cells (of lev)
  */
 void
-WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
-                                       RealVector& wp, RealVector& uxp,
-                                       RealVector& uyp, RealVector& uzp,
-                                       MultiFab* jx, MultiFab* jy, MultiFab* jz,
-                                       const long offset, const long np_to_depose,
-                                       int thread_num, int lev, int depos_lev,
-                                       Real dt)
+WarpXParticleContainer::DepositCurrentFortran(WarpXParIter& pti,
+                                              RealVector& wp, RealVector& uxp,
+                                              RealVector& uyp, RealVector& uzp,
+                                              MultiFab* jx, MultiFab* jy, MultiFab* jz,
+                                              const long offset, const long np_to_depose,
+                                              int thread_num, int lev, int depos_lev,
+                                              Real dt)
 {
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) ||
                                      (depos_lev==(lev  )),
@@ -394,15 +395,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
         &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal,
         &lvect,&WarpX::current_deposition_algo);
 
-#ifdef WARPX_RZ
-    // Rescale current in r-z mode
-    warpx_current_deposition_rz_volume_scaling(
-        jx_ptr, &ngJ, jxntot.getVect(),
-        jy_ptr, &ngJ, jyntot.getVect(),
-        jz_ptr, &ngJ, jzntot.getVect(),
-        &WarpX::n_rz_azimuthal_modes,
-        &xyzmin[0], &dx[0]);
-#endif
     BL_PROFILE_VAR_STOP(blp_pxr_cd);
 
 #ifndef AMREX_USE_GPU
@@ -416,6 +408,150 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
 #endif
 }
 
+/* \brief Current Deposition for thread thread_num
+ * \param pti         : Particle iterator
+ * \param wp          : Array of particle weights
+ * \param uxp uyp uzp : Array of particle
+ * \param jx jy jz    : Full array of current density
+ * \param offset      : Index of first particle for which current is deposited
+ * \param np_to_depose: Number of particles for which current is deposited.
+                        Particles [offset,offset+np_tp_depose] deposit current
+ * \param thread_num  : Thread number (if tiling)
+ * \param lev         : Level of box that contains particles
+ * \param depos_lev   : Level on which particles deposit (if buffers are used)
+ * \param dt          : Time step for particle level
+ */
+void
+WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
+                                       RealVector& wp, RealVector& uxp,
+                                       RealVector& uyp, RealVector& uzp,
+                                       MultiFab* jx, MultiFab* jy, MultiFab* jz,
+                                       const long offset, const long np_to_depose,
+                                       int thread_num, int lev, int depos_lev,
+                                       Real dt)
+{    
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) ||
+                                     (depos_lev==(lev  )),
+                                     "Deposition buffers only work for lev-1");
+    // If no particles, do not do anything
+    if (np_to_depose == 0) return;
+
+    const long ngJ = jx->nGrow();
+    const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0));
+    int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal();
+    Real q = this->charge;
+    const Real stagger_shift = j_is_nodal ? 0.0 : 0.5;
+
+    BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate);
+
+    // Get tile box where current is deposited.
+    // The tile box is different when depositing in the buffers (depos_lev<lev)
+    // or when depositing inside the level (depos_lev=lev)
+    Box tilebox;
+    if (lev == depos_lev) {
+        tilebox = pti.tilebox();
+    } else {
+        const IntVect& ref_ratio = WarpX::RefRatio(depos_lev);
+        tilebox = amrex::coarsen(pti.tilebox(),ref_ratio);
+    }
+    
+    // Staggered tile boxes (different in each direction)
+    Box tbx = convert(tilebox, WarpX::jx_nodal_flag);
+    Box tby = convert(tilebox, WarpX::jy_nodal_flag);
+    Box tbz = convert(tilebox, WarpX::jz_nodal_flag);
+    tilebox.grow(ngJ);
+
+#ifdef AMREX_USE_GPU
+    // No tiling on GPU: jx_ptr points to the full
+    // jx array (same for jy_ptr and jz_ptr).
+    Array4<Real> const& jx_arr = jx->array(pti);
+    Array4<Real> const& jy_arr = jy->array(pti);
+    Array4<Real> const& jz_arr = jz->array(pti);
+#else
+    // Tiling is on: jx_ptr points to local_jx[thread_num]
+    // (same for jy_ptr and jz_ptr)
+    tbx.grow(ngJ);
+    tby.grow(ngJ);
+    tbz.grow(ngJ);
+
+    local_jx[thread_num].resize(tbx);
+    local_jy[thread_num].resize(tby);
+    local_jz[thread_num].resize(tbz);
+
+    // local_jx[thread_num] is set to zero
+    local_jx[thread_num].setVal(0.0);
+    local_jy[thread_num].setVal(0.0);
+    local_jz[thread_num].setVal(0.0);
+
+    Array4<Real> const& jx_arr = local_jx[thread_num].array();
+    Array4<Real> const& jy_arr = local_jy[thread_num].array();
+    Array4<Real> const& jz_arr = local_jz[thread_num].array();
+#endif
+    // GPU, no tiling: deposit directly in jx
+    // CPU, tiling: deposit into local_jx
+    // (same for jx and jz)
+
+    Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
+    Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
+    Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset;
+
+    // Lower corner of tile box physical domain
+    // Note that this includes guard cells since it is after tilebox.ngrow
+    const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);
+    // xyzmin is built on pti.tilebox(), so it does 
+    // not include staggering, so the stagger_shift has to be done by hand.
+    // Alternatively, we could define xyzminx from tbx (and the same for 3 
+    // directions and for jx, jy, jz). This way, sx0 would not be needed.
+    // Better for memory? worth trying?    
+    const Dim3 lo = lbound(tilebox);
+
+    if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) {
+        if        (WarpX::nox == 1){
+            doEsirkepovDepositionShapeN<1>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, 
+                                           uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, 
+                                           jz_arr, np_to_depose, dt, dx,
+                                           xyzmin, lo, q);
+        } else if (WarpX::nox == 2){
+            doEsirkepovDepositionShapeN<2>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, 
+                                           uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, 
+                                           jz_arr, np_to_depose, dt, dx,
+                                           xyzmin, lo, q);
+        } else if (WarpX::nox == 3){
+            doEsirkepovDepositionShapeN<3>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, 
+                                           uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, 
+                                           jz_arr, np_to_depose, dt, dx,
+                                           xyzmin, lo, q);
+        }
+    } else {
+        if        (WarpX::nox == 1){
+            doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, 
+                                  uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, 
+                                  jz_arr, np_to_depose, dt, dx,
+                                  xyzmin, lo, stagger_shift, q);
+        } else if (WarpX::nox == 2){
+            doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, 
+                                  uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, 
+                                  jz_arr, np_to_depose, dt, dx,
+                                  xyzmin, lo, stagger_shift, q);
+        } else if (WarpX::nox == 3){
+            doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, 
+                                  uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, 
+                                  jz_arr, np_to_depose, dt, dx,
+                                  xyzmin, lo, stagger_shift, q);
+        }
+    }
+
+#ifndef AMREX_USE_GPU
+    BL_PROFILE_VAR_START(blp_accumulate);
+    // CPU, tiling: atomicAdd local_jx into jx
+    // (same for jx and jz)
+    (*jx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1);
+    (*jy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1);
+    (*jz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1);
+    BL_PROFILE_VAR_STOP(blp_accumulate);
+#endif
+}
+
 void
 WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
                                         MultiFab* rhomf, MultiFab* crhomf, int icomp,
@@ -475,7 +611,7 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
                               &ngRho, &ngRho, &ngRho,
                               &WarpX::nox,&WarpX::noy,&WarpX::noz,
                               &lvect, &WarpX::charge_deposition_algo);
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
       warpx_charge_deposition_rz_volume_scaling(
                                data_ptr, &ngRho, rholen.getVect(),
                                &xyzmin[0], &dx[0]);
@@ -535,7 +671,7 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp,
                               &ngRho, &ngRho, &ngRho,
                               &WarpX::nox,&WarpX::noy,&WarpX::noz,
                               &lvect, &WarpX::charge_deposition_algo);
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
       warpx_charge_deposition_rz_volume_scaling(
                                data_ptr, &ngRho, rholen.getVect(),
                                &cxyzmin_tile[0], &cdx[0]);
@@ -695,7 +831,7 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local)
                                     &dx[0], &dx[1], &dx[2], &nx, &ny, &nz,
                                     &nxg, &nyg, &nzg, &WarpX::nox,&WarpX::noy,&WarpX::noz,
                                     &lvect, &WarpX::charge_deposition_algo);
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
             long ngRho = WarpX::nox;
             warpx_charge_deposition_rz_volume_scaling(
                                      data_ptr, &ngRho, rholen.getVect(),
@@ -880,7 +1016,7 @@ WarpXParticleContainer::PushX (int lev, Real dt)
             Real* AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr();
             Real* AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr();
             Real* AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr();
-#ifdef WARPX_RZ
+#ifdef WARPX_DIM_RZ
             Real* AMREX_RESTRICT theta = attribs[PIdx::theta].dataPtr();
 #endif
             // Loop over the particles and update their position
@@ -888,12 +1024,12 @@ WarpXParticleContainer::PushX (int lev, Real dt)
                 [=] AMREX_GPU_DEVICE (long i) {
                     ParticleType& p = pstructs[i]; // Particle object that gets updated
                     Real x, y, z; // Temporary variables
-#ifndef WARPX_RZ
+#ifndef WARPX_DIM_RZ
                     GetPosition( x, y, z, p ); // Initialize x, y, z
                     UpdatePosition( x, y, z, ux[i], uy[i], uz[i], dt);
                     SetPosition( p, x, y, z ); // Update the object p
 #else
-                    // For WARPX_RZ, the particles are still pushed in 3D Cartesian
+                    // For WARPX_DIM_RZ, the particles are still pushed in 3D Cartesian
                     GetCartesianPositionFromCylindrical( x, y, z, p, theta[i] );
                     UpdatePosition( x, y, z, ux[i], uy[i], uz[i], dt);
                     SetCylindricalPositionFromCartesian( p, theta[i], x, y, z );
@@ -934,3 +1070,5 @@ WarpXParticleContainer::particlePostLocate(ParticleType& p,
     if (pld.m_lev == lev-1){
     }
 }
+
+
author	Dave Grote <grote1@llnl.gov>	2019-08-09 14:12:46 -0700
committer	Dave Grote <grote1@llnl.gov>	2019-08-09 14:12:46 -0700
commit	c3ce219b9d25e8d28e5a6cc5b878b3c5793cf90a (patch)
tree	d296f21c2e9051c21707f3fa419c8cfe892ea952 /Source/Particles/WarpXParticleContainer.cpp
parent	f86512d788477a8eab5ed3c3c92ce9a7453ae4c7 (diff)
parent	941a74cdee2d89c832d3fc682ef9a0973deddcce (diff)
download	WarpX-c3ce219b9d25e8d28e5a6cc5b878b3c5793cf90a.tar.gz WarpX-c3ce219b9d25e8d28e5a6cc5b878b3c5793cf90a.tar.zst WarpX-c3ce219b9d25e8d28e5a6cc5b878b3c5793cf90a.zip