aboutsummaryrefslogtreecommitdiff
path: root/Source/PhysicalParticleContainer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Source/PhysicalParticleContainer.cpp')
-rw-r--r--Source/PhysicalParticleContainer.cpp173
1 files changed, 112 insertions, 61 deletions
diff --git a/Source/PhysicalParticleContainer.cpp b/Source/PhysicalParticleContainer.cpp
index df0ee3b3c..d9fd55afa 100644
--- a/Source/PhysicalParticleContainer.cpp
+++ b/Source/PhysicalParticleContainer.cpp
@@ -607,7 +607,7 @@ PhysicalParticleContainer::FieldGather (int lev,
#pragma omp parallel
#endif
{
- Vector<Real> xp, yp, zp;
+ RealVector xp, yp, zp;
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
@@ -722,14 +722,19 @@ PhysicalParticleContainer::Evolve (int lev,
#pragma omp parallel
#endif
{
- Vector<Real> xp, yp, zp, giv;
- FArrayBox local_rho, local_jx, local_jy, local_jz;
+ RealVector xp, yp, zp, giv;
+
+ std::unique_ptr<FArrayBox> local_rho(new FArrayBox());
+ std::unique_ptr<FArrayBox> local_jx(new FArrayBox());
+ std::unique_ptr<FArrayBox> local_jy(new FArrayBox());
+ std::unique_ptr<FArrayBox> local_jz(new FArrayBox());
+
FArrayBox filtered_Ex, filtered_Ey, filtered_Ez;
FArrayBox filtered_Bx, filtered_By, filtered_Bz;
std::vector<bool> inexflag;
Vector<long> pid;
- Vector<Real> tmp;
- Vector<ParticleType> particle_tmp;
+ RealVector tmp;
+ ParticleVector particle_tmp;
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
@@ -827,12 +832,21 @@ PhysicalParticleContainer::Evolve (int lev,
FArrayBox& jyfab = jy[pti];
FArrayBox& jzfab = jz[pti];
+#ifdef AMREX_USE_CUDA
+ thrust::fill(thrust::device, thrust::device_ptr<Real>(Exp.data()), thrust::device_ptr<Real>(Exp.data() + np), 0.0);
+ thrust::fill(thrust::device, thrust::device_ptr<Real>(Eyp.data()), thrust::device_ptr<Real>(Eyp.data() + np), 0.0);
+ thrust::fill(thrust::device, thrust::device_ptr<Real>(Ezp.data()), thrust::device_ptr<Real>(Ezp.data() + np), 0.0);
+ thrust::fill(thrust::device, thrust::device_ptr<Real>(Bxp.data()), thrust::device_ptr<Real>(Bxp.data() + np), WarpX::B_external[0]);
+ thrust::fill(thrust::device, thrust::device_ptr<Real>(Byp.data()), thrust::device_ptr<Real>(Byp.data() + np), WarpX::B_external[1]);
+ thrust::fill(thrust::device, thrust::device_ptr<Real>(Bzp.data()), thrust::device_ptr<Real>(Bzp.data() + np), WarpX::B_external[2]);
+#else
Exp.assign(np,0.0);
Eyp.assign(np,0.0);
Ezp.assign(np,0.0);
Bxp.assign(np,WarpX::B_external[0]);
Byp.assign(np,WarpX::B_external[1]);
Bzp.assign(np,WarpX::B_external[2]);
+#endif
giv.resize(np);
@@ -954,10 +968,10 @@ PhysicalParticleContainer::Evolve (int lev,
FArrayBox& rhofab = (*rhomf)[pti];
const std::array<Real, 3>& xyzmin = xyzmin_tile;
tile_box.grow(ngRho);
- local_rho.resize(tile_box);
- local_rho = 0.0;
- data_ptr = local_rho.dataPtr();
- rholen = local_rho.length();
+ local_rho->resize(tile_box);
+ local_rho->setVal(0.0);
+ data_ptr = local_rho->dataPtr();
+ rholen = local_rho->length();
#if (AMREX_SPACEDIM == 3)
const long nx = rholen[0]-1-2*ngRho;
@@ -978,8 +992,12 @@ PhysicalParticleContainer::Evolve (int lev,
&lvect, &WarpX::charge_deposition_algo);
const int ncomp = 1;
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho),
- BL_TO_FORTRAN_N_3D(rhofab,icomp), ncomp);
+ FArrayBox const* local_fab = local_rho.get();
+ FArrayBox* global_fab = &rhofab;
+ AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx,
+ {
+ global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp);
+ });
}
if (np_current < np)
@@ -991,12 +1009,12 @@ PhysicalParticleContainer::Evolve (int lev,
tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector());
tile_box.grow(ngRho);
- local_rho.resize(tile_box);
+ local_rho->resize(tile_box);
- local_rho = 0.0;
+ local_rho->setVal(0.0);
- data_ptr = local_rho.dataPtr();
- rholen = local_rho.length();
+ data_ptr = local_rho->dataPtr();
+ rholen = local_rho->length();
#if (AMREX_SPACEDIM == 3)
const long nx = rholen[0]-1-2*ngRho;
@@ -1024,8 +1042,12 @@ PhysicalParticleContainer::Evolve (int lev,
FArrayBox& crhofab = (*crhomf)[pti];
const int ncomp = 1;
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_rho),
- BL_TO_FORTRAN_N_3D(crhofab,icomp), ncomp);
+ FArrayBox const* local_fab = local_rho.get();
+ FArrayBox* global_fab = &crhofab;
+ AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tile_box, tbx,
+ {
+ global_fab->atomicAdd(*local_fab, tbx, tbx, 0, icomp, ncomp);
+ });
}
};
@@ -1175,7 +1197,6 @@ PhysicalParticleContainer::Evolve (int lev,
Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag);
Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag);
Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag);
- Box gtbx, gtby, gtbz;
const std::array<Real, 3>& xyzmin = xyzmin_tile;
@@ -1185,21 +1206,21 @@ PhysicalParticleContainer::Evolve (int lev,
tby.grow(ngJ);
tbz.grow(ngJ);
- local_jx.resize(tbx);
- local_jy.resize(tby);
- local_jz.resize(tbz);
+ local_jx->resize(tbx);
+ local_jy->resize(tby);
+ local_jz->resize(tbz);
- local_jx = 0.0;
- local_jy = 0.0;
- local_jz = 0.0;
+ local_jx->setVal(0.0);
+ local_jy->setVal(0.0);
+ local_jz->setVal(0.0);
- jx_ptr = local_jx.dataPtr();
- jy_ptr = local_jy.dataPtr();
- jz_ptr = local_jz.dataPtr();
+ jx_ptr = local_jx->dataPtr();
+ jy_ptr = local_jy->dataPtr();
+ jz_ptr = local_jz->dataPtr();
- jxntot = local_jx.length();
- jyntot = local_jy.length();
- jzntot = local_jz.length();
+ jxntot = local_jx->length();
+ jyntot = local_jy->length();
+ jzntot = local_jz->length();
warpx_current_deposition(
jx_ptr, &ngJ, jxntot,
@@ -1216,15 +1237,28 @@ PhysicalParticleContainer::Evolve (int lev,
BL_PROFILE_VAR_STOP(blp_pxr_cd);
BL_PROFILE_VAR_START(blp_accumulate);
- const int ncomp = 1;
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jx),
- BL_TO_FORTRAN_3D(jxfab), ncomp);
-
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jy),
- BL_TO_FORTRAN_3D(jyfab), ncomp);
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jz),
- BL_TO_FORTRAN_3D(jzfab), ncomp);
+ FArrayBox const* local_jx_ptr = local_jx.get();
+ FArrayBox* global_jx_ptr = &jxfab;
+ AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx,
+ {
+ global_jx_ptr->atomicAdd(*local_jx_ptr, thread_bx, thread_bx, 0, 0, 1);
+ });
+
+ FArrayBox const* local_jy_ptr = local_jy.get();
+ FArrayBox* global_jy_ptr = &jyfab;
+ AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx,
+ {
+ global_jy_ptr->atomicAdd(*local_jy_ptr, thread_bx, thread_bx, 0, 0, 1);
+ });
+
+ FArrayBox const* local_jz_ptr = local_jz.get();
+ FArrayBox* global_jz_ptr = &jzfab;
+ AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx,
+ {
+ global_jz_ptr->atomicAdd(*local_jz_ptr, thread_bx, thread_bx, 0, 0, 1);
+ });
+
BL_PROFILE_VAR_STOP(blp_accumulate);
}
@@ -1241,21 +1275,21 @@ PhysicalParticleContainer::Evolve (int lev,
tby.grow(ngJ);
tbz.grow(ngJ);
- local_jx.resize(tbx);
- local_jy.resize(tby);
- local_jz.resize(tbz);
+ local_jx->resize(tbx);
+ local_jy->resize(tby);
+ local_jz->resize(tbz);
- local_jx = 0.0;
- local_jy = 0.0;
- local_jz = 0.0;
+ local_jx->setVal(0.0);
+ local_jy->setVal(0.0);
+ local_jz->setVal(0.0);
- jx_ptr = local_jx.dataPtr();
- jy_ptr = local_jy.dataPtr();
- jz_ptr = local_jz.dataPtr();
+ jx_ptr = local_jx->dataPtr();
+ jy_ptr = local_jy->dataPtr();
+ jz_ptr = local_jz->dataPtr();
- jxntot = local_jx.length();
- jyntot = local_jy.length();
- jzntot = local_jz.length();
+ jxntot = local_jx->length();
+ jyntot = local_jy->length();
+ jzntot = local_jz->length();
long ncrse = np - nfine_current;
warpx_current_deposition(
@@ -1274,13 +1308,30 @@ PhysicalParticleContainer::Evolve (int lev,
FArrayBox& cjyfab = (*cjy)[pti];
FArrayBox& cjzfab = (*cjz)[pti];
- const int ncomp = 1;
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jx),
- BL_TO_FORTRAN_3D(cjxfab), ncomp);
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jy),
- BL_TO_FORTRAN_3D(cjyfab), ncomp);
- amrex_atomic_accumulate_fab(BL_TO_FORTRAN_3D(local_jz),
- BL_TO_FORTRAN_3D(cjzfab), ncomp);
+ BL_PROFILE_VAR_START(blp_accumulate);
+
+ FArrayBox const* local_jx_ptr = local_jx.get();
+ FArrayBox* global_jx_ptr = &cjxfab;
+ AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tbx, thread_bx,
+ {
+ global_jx_ptr->atomicAdd(*local_jx_ptr, thread_bx, thread_bx, 0, 0, 1);
+ });
+
+ FArrayBox const* local_jy_ptr = local_jy.get();
+ FArrayBox* global_jy_ptr = &cjyfab;
+ AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tby, thread_bx,
+ {
+ global_jy_ptr->atomicAdd(*local_jy_ptr, thread_bx, thread_bx, 0, 0, 1);
+ });
+
+ FArrayBox const* local_jz_ptr = local_jz.get();
+ FArrayBox* global_jz_ptr = &cjzfab;
+ AMREX_CUDA_LAUNCH_HOST_DEVICE_LAMBDA(tbz, thread_bx,
+ {
+ global_jz_ptr->atomicAdd(*local_jz_ptr, thread_bx, thread_bx, 0, 0, 1);
+ });
+
+ BL_PROFILE_VAR_STOP(blp_accumulate);
}
//
@@ -1304,8 +1355,8 @@ PhysicalParticleContainer::Evolve (int lev,
void
PhysicalParticleContainer::PushPX(WarpXParIter& pti,
- Vector<Real>& xp, Vector<Real>& yp, Vector<Real>& zp,
- Vector<Real>& giv,
+ RealVector& xp, RealVector& yp, RealVector& zp,
+ RealVector& giv,
Real dt)
{
@@ -1359,7 +1410,7 @@ PhysicalParticleContainer::PushP (int lev, Real dt,
#pragma omp parallel
#endif
{
- Vector<Real> xp, yp, zp, giv;
+ RealVector xp, yp, zp, giv;
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{
@@ -1482,7 +1533,7 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real
#pragma omp parallel
#endif
{
- Vector<Real> xp_new, yp_new, zp_new;
+ RealVector xp_new, yp_new, zp_new;
for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti)
{