aboutsummaryrefslogtreecommitdiff
path: root/Source/Particles/WarpXParticleContainer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Source/Particles/WarpXParticleContainer.cpp')
-rw-r--r--Source/Particles/WarpXParticleContainer.cpp248
1 files changed, 80 insertions, 168 deletions
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 69f7aa3d0..cd0c37bd8 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -310,7 +310,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
const long ngJ = jx->nGrow();
const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0));
- const long lvect = 8;
int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal();
BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd);
@@ -332,13 +331,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
Box tby = convert(tilebox, WarpX::jy_nodal_flag);
Box tbz = convert(tilebox, WarpX::jz_nodal_flag);
- // Lower corner of tile box physical domain
- // const std::array<Real,3>& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev);
- // const std::array<Real, 3>& xyzmin = xyzmin_tile;
tilebox.grow(ngJ);
- const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);;
-
- // Print()<<" xyzmin_tile "<<xyzmin_tile[0]<<' '<<xyzmin_tile[1]<<' '<<xyzmin_tile[2]<<'\n';
#ifdef AMREX_USE_GPU
// No tiling on GPU: jx_ptr points to the full
@@ -357,12 +350,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
tby.grow(ngJ);
tbz.grow(ngJ);
- const std::array<Real, 3>& xyzminx = WarpX::LowerCorner(tbx, depos_lev);;
- const std::array<Real, 3>& xyzminy = WarpX::LowerCorner(tby, depos_lev);;
- const std::array<Real, 3>& xyzminz = WarpX::LowerCorner(tbz, depos_lev);;
-
- Print()<<"ngJ "<<ngJ<<'\n';
-
local_jx[thread_num].resize(tbx);
local_jy[thread_num].resize(tby);
local_jz[thread_num].resize(tbz);
@@ -377,162 +364,87 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti,
// CPU, tiling: deposit into local_jx
// (same for jx and jz)
BL_PROFILE_VAR_START(blp_pxr_cd);
- bool use_new = true;
- if (use_new){
- Real dxi = 1.0/dx[0];
- Real dzi = 1.0/dx[2];
- Real invvol = dxi*dzi;
- Real dts2dx = 0.5*dt*dxi;
- Real dts2dz = 0.5*dt*dzi;
- Real clightsq = 1.0/PhysConst::c/PhysConst::c;
- Real c = PhysConst::c;
-
- Real xmin = xyzmin[0];
- Real zmin = xyzmin[2];
- /*
- Real xminx = xyzminx[0];
- Real zminx = xyzminx[2];
- Real xminy = xyzminy[0];
- Real zminy = xyzminy[2];
- Real xminz = xyzminz[0];
- Real zminz = xyzminz[2];
- */
- Real q = this->charge;
- Real stagger_shift = j_is_nodal ? 0.0 : 0.5;
- // Print()<<"xmin "<<xmin<<" zmin "<<zmin<<" stagger_shift "<<stagger_shift<<'\n';
- Print()<<"dxi "<<dxi<<" dzi "<<dzi<<'\n';
-
- /* works for GPU/no tiling
- auto jx_arr = jx->array(pti);
- auto jy_arr = jy->array(pti);
- auto jz_arr = jz->array(pti);
- */
-
- Array4<Real> const& jx_arr = local_jx[thread_num].array();
- Array4<Real> const& jy_arr = local_jy[thread_num].array();
- Array4<Real> const& jz_arr = local_jz[thread_num].array();
-
- Print()<<"tbx"<<tbx<<'\n';
- Print()<<"lbound(tbx)"<<"lbound(tbx).x "<<lbound(tbx).x<<"lbound(tbx).y "<<lbound(tbx).y<<"lbound(tbz).z "<<lbound(tbx).z<<'\n';
- Print()<<"lbound(tbx)"<<lbound(tbx)<<'\n';
- Dim3 lox = lbound(tbx);
- Dim3 loy = lbound(tby);
- Dim3 loz = lbound(tbz);
-
- // - momenta are stored as a struct of array, in `attribs`
- // auto& attribs = pti.GetAttribs();
-
- // Real* AMREX_RESTRICT uxp = attribs[PIdx::ux].dataPtr();
- // Real* AMREX_RESTRICT uyp = attribs[PIdx::uy].dataPtr();
- // Real* AMREX_RESTRICT uzp = attribs[PIdx::uz].dataPtr();
- // Real* AMREX_RESTRICT w = attribs[PIdx::w ].dataPtr();
-
- //Cuda::ManagedDeviceVector<Real> Vx, Vy, Vz;
- //pti.GetPosition(Vx, Vy, Vz);
- //Real* AMREX_RESTRICT xp = Vx.dataPtr() + offset;
- //Real* AMREX_RESTRICT zp = Vz.dataPtr() + offset;
-
- Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
- Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
-
- Print()<<"np_to_depose "<<np_to_depose<<'\n';
- ParallelFor( np_to_depose,
- [=] AMREX_GPU_DEVICE (long ip) {
- std::cout<<" ip "<<ip<<'\n';
- Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq
- + uyp[ip]*uyp[ip]*clightsq
- + uzp[ip]*uzp[ip]*clightsq);
-
- Real vx = uxp[ip]*gaminv;
- Real vy = uyp[ip]*gaminv;
- Real vz = uzp[ip]*gaminv;
-
- std::cout<<" uxp[ip] "<<uxp[ip]<<'\n';
- std::cout<<" uyp[ip] "<<uyp[ip]<<'\n';
- std::cout<<" uzp[ip] "<<uzp[ip]<<'\n';
-
- std::cout<<" vx "<<vx<<'\n';
- std::cout<<" vz "<<vz<<'\n';
-
- Real wq=q*wp[ip];
- Real wqx=wq*invvol*vx;
- Real wqy=wq*invvol*vy;
- Real wqz=wq*invvol*vz;
-
-
- // int j0= (int) xmid;
- // int l0= (int) zmid;
- // std::cout<<" j "<<j<<" j0 "<<j0<<'\n';
- // std::cout<<" l "<<l<<" l0 "<<l0<<'\n';
-
- Real x = (Real) (xp[ip]-xmin)*dxi;
- Real z = (Real) (zp[ip]-zmin)*dzi;
-
- Real xmid=x-dts2dx*vx;
- Real zmid=z-dts2dz*vz;
-
- int j = (int) xmid;
- int l = (int) zmid;
- int j0= (int) (xmid-stagger_shift);
- int l0= (int) (zmid-stagger_shift);
-
- Real xint = xmid-j;
- Real zint = zmid-l;
-
- Real sx[2] = {1.0 - xint, xint};
- Real sz[2] = {1.0 - zint, zint};
-
- xint = xmid-stagger_shift-j0;
- zint = zmid-stagger_shift-l0;
-
- Real sx0[2] = {1.0 - xint, xint};
- Real sz0[2] = {1.0 - zint, zint};
-
- jx_arr(lox.x+j0 ,lox.y+l ,0) += sx0[0]*sz[0]*wqx;
- jx_arr(lox.x+j0+1,lox.y+l ,0) += sx0[1]*sz[0]*wqx;
- jx_arr(lox.x+j0 ,lox.y+l+1,0) += sx0[0]*sz[1]*wqx;
- jx_arr(lox.x+j0+1,lox.y+l+1,0) += sx0[1]*sz[1]*wqx;
-
- jy_arr(loy.x+j ,loy.y+l ,0) += sx[0]*sz[0]*wqy;
- jy_arr(loy.x+j+1,loy.y+l ,0) += sx[1]*sz[0]*wqy;
- jy_arr(loy.x+j ,loy.y+l+1,0) += sx[0]*sz[1]*wqy;
- jy_arr(loy.x+j+1,loy.y+l+1,0) += sx[1]*sz[1]*wqy;
-
- jz_arr(loz.x+j ,loz.y+l0 ,0) += sx[0]*sz0[0]*wqz;
- jz_arr(loz.x+j+1,loz.y+l0 ,0) += sx[1]*sz0[0]*wqz;
- jz_arr(loz.x+j ,loz.y+l0+1,0) += sx[0]*sz0[1]*wqz;
- jz_arr(loz.x+j+1,loz.y+l0+1,0) += sx[1]*sz0[1]*wqz;
- }
- );
- Print()<<"ParallelFor done!\n";
- } else {
+ Real dxi = 1.0/dx[0];
+ Real dzi = 1.0/dx[2];
+ Real invvol = dxi*dzi;
+ Real dts2dx = 0.5*dt*dxi;
+ Real dts2dz = 0.5*dt*dzi;
+ Real clightsq = 1.0/PhysConst::c/PhysConst::c;
+ Real q = this->charge;
- Real* jx_ptr = local_jx[thread_num].dataPtr();
- Real* jy_ptr = local_jy[thread_num].dataPtr();
- Real* jz_ptr = local_jz[thread_num].dataPtr();
-
- auto jxntot = local_jx[thread_num].length();
- auto jyntot = local_jy[thread_num].length();
- auto jzntot = local_jz[thread_num].length();
-
- warpx_current_deposition(
- jx_ptr, &ngJ, jxntot.getVect(),
- jy_ptr, &ngJ, jyntot.getVect(),
- jz_ptr, &ngJ, jzntot.getVect(),
- &np_to_depose,
- m_xp[thread_num].dataPtr() + offset,
- m_yp[thread_num].dataPtr() + offset,
- m_zp[thread_num].dataPtr() + offset,
- uxp.dataPtr() + offset,
- uyp.dataPtr() + offset,
- uzp.dataPtr() + offset,
- m_giv[thread_num].dataPtr() + offset,
- wp.dataPtr() + offset, &this->charge,
- &xyzmin[0], &xyzmin[1], &xyzmin[2],
- &dt, &dx[0], &dx[1], &dx[2],
- &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal,
- &lvect,&WarpX::current_deposition_algo);
- }
+ // Lower corner of tile box physical domain
+ const std::array<Real, 3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);;
+ // xmin and zmin are built on pti.tilebox(), so it does
+ // not include staggering, so the stagger_shift has to be done by hand.
+ // Alternatively, we could define xminx from tbx (and the same for 3
+ // directions and for jx, jy, jz). This way, sxo would not be needed.
+ // Better for memory? worth trying?
+ const Real xmin = xyzmin[0];
+ const Real zmin = xyzmin[2];
+ const Real stagger_shift = j_is_nodal ? 0.0 : 0.5;
+
+ Array4<Real> const& jx_arr = local_jx[thread_num].array();
+ Array4<Real> const& jy_arr = local_jy[thread_num].array();
+ Array4<Real> const& jz_arr = local_jz[thread_num].array();
+
+ const Dim3 lo = lbound(tilebox);
+
+ Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset;
+ Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset;
+
+ ParallelFor( np_to_depose,
+ [=] AMREX_GPU_DEVICE (long ip) {
+ Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq
+ + uyp[ip]*uyp[ip]*clightsq
+ + uzp[ip]*uzp[ip]*clightsq);
+
+ Real vx = uxp[ip]*gaminv;
+ Real vy = uyp[ip]*gaminv;
+ Real vz = uzp[ip]*gaminv;
+
+ Real wq=q*wp[ip];
+ Real wqx=wq*invvol*vx;
+ Real wqy=wq*invvol*vy;
+ Real wqz=wq*invvol*vz;
+
+ Real x = (Real) (xp[ip]-xmin)*dxi;
+ Real z = (Real) (zp[ip]-zmin)*dzi;
+ Real xmid=x-dts2dx*vx;
+ Real zmid=z-dts2dz*vz;
+
+ int j = (int) xmid;
+ int l = (int) zmid;
+ int j0= (int) (xmid-stagger_shift);
+ int l0= (int) (zmid-stagger_shift);
+
+ Real xint = xmid-j;
+ Real zint = zmid-l;
+
+ Real sx[2] = {1.0 - xint, xint};
+ Real sz[2] = {1.0 - zint, zint};
+
+ xint = xmid-stagger_shift-j0;
+ zint = zmid-stagger_shift-l0;
+
+ Real sx0[2] = {1.0 - xint, xint};
+ Real sz0[2] = {1.0 - zint, zint};
+
+ jx_arr(lo.x+j0 ,lo.y+l ,0) += sx0[0]*sz[0]*wqx;
+ jx_arr(lo.x+j0+1,lo.y+l ,0) += sx0[1]*sz[0]*wqx;
+ jx_arr(lo.x+j0 ,lo.y+l+1,0) += sx0[0]*sz[1]*wqx;
+ jx_arr(lo.x+j0+1,lo.y+l+1,0) += sx0[1]*sz[1]*wqx;
+
+ jy_arr(lo.x+j ,lo.y+l ,0) += sx[0]*sz[0]*wqy;
+ jy_arr(lo.x+j+1,lo.y+l ,0) += sx[1]*sz[0]*wqy;
+ jy_arr(lo.x+j ,lo.y+l+1,0) += sx[0]*sz[1]*wqy;
+ jy_arr(lo.x+j+1,lo.y+l+1,0) += sx[1]*sz[1]*wqy;
+
+ jz_arr(lo.x+j ,lo.y+l0 ,0) += sx[0]*sz0[0]*wqz;
+ jz_arr(lo.x+j+1,lo.y+l0 ,0) += sx[1]*sz0[0]*wqz;
+ jz_arr(lo.x+j ,lo.y+l0+1,0) += sx[0]*sz0[1]*wqz;
+ jz_arr(lo.x+j+1,lo.y+l0+1,0) += sx[1]*sz0[1]*wqz;
+ }
+ );
#ifdef WARPX_RZ
// Rescale current in r-z mode
warpx_current_deposition_rz_volume_scaling(