aboutsummaryrefslogtreecommitdiff
path: root/Source/Particles/WarpXParticleContainer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Source/Particles/WarpXParticleContainer.cpp')
-rw-r--r--Source/Particles/WarpXParticleContainer.cpp541
1 files changed, 429 insertions, 112 deletions
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index de49bb12b..b6e33180a 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -12,6 +12,7 @@
#include "ablastr/particles/DepositCharge.H"
#include "Deposition/ChargeDeposition.H"
#include "Deposition/CurrentDeposition.H"
+#include "Deposition/SharedDepositionUtils.H"
#include "Pusher/GetAndSetPosition.H"
#include "Pusher/UpdatePosition.H"
#include "ParticleBoundaries_K.H"
@@ -372,6 +373,13 @@ WarpXParticleContainer::DepositCurrent (WarpXParIter& pti,
const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0));
amrex::ParticleReal q = this->charge;
+ WARPX_PROFILE_VAR_NS("WarpXParticleContainer::DepositCurrent::Sorting", blp_sort);
+ WARPX_PROFILE_VAR_NS("WarpXParticleContainer::DepositCurrent::FindMaxTilesize",
+ blp_get_max_tilesize);
+ WARPX_PROFILE_VAR_NS("WarpXParticleContainer::DepositCurrent::DirectCurrentDepKernel",
+ direct_current_dep_kernel);
+ WARPX_PROFILE_VAR_NS("WarpXParticleContainer::DepositCurrent::EsirkepovCurrentDepKernel",
+ esirkepov_current_dep_kernel);
WARPX_PROFILE_VAR_NS("WarpXParticleContainer::DepositCurrent::CurrentDeposition", blp_deposit);
WARPX_PROFILE_VAR_NS("WarpXParticleContainer::DepositCurrent::Accumulate", blp_accumulate);
@@ -445,75 +453,159 @@ WarpXParticleContainer::DepositCurrent (WarpXParIter& pti,
amrex::LayoutData<amrex::Real> * const costs = WarpX::getCosts(lev);
amrex::Real * const cost = costs ? &((*costs)[pti.index()]) : nullptr;
- if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) {
- if (WarpX::nox == 1){
- doEsirkepovDepositionShapeN<1>(
- GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
- uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
- jx_arr, jy_arr, jz_arr, np_to_depose, dt, relative_time, dx, xyzmin, lo, q,
- WarpX::n_rz_azimuthal_modes, cost,
- WarpX::load_balance_costs_update_algo);
- } else if (WarpX::nox == 2){
- doEsirkepovDepositionShapeN<2>(
- GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
- uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
- jx_arr, jy_arr, jz_arr, np_to_depose, dt, relative_time, dx, xyzmin, lo, q,
- WarpX::n_rz_azimuthal_modes, cost,
- WarpX::load_balance_costs_update_algo);
- } else if (WarpX::nox == 3){
- doEsirkepovDepositionShapeN<3>(
- GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
- uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
- jx_arr, jy_arr, jz_arr, np_to_depose, dt, relative_time, dx, xyzmin, lo, q,
- WarpX::n_rz_azimuthal_modes, cost,
- WarpX::load_balance_costs_update_algo);
+ // If doing shared mem current deposition, get tile info
+ if (WarpX::do_shared_mem_current_deposition) {
+ const Geometry& geom = Geom(lev);
+ const auto dxi = geom.InvCellSizeArray();
+ const auto plo = geom.ProbLoArray();
+ const auto domain = geom.Domain();
+
+ Box box = pti.validbox();
+ box.grow(ng_J);
+ amrex::IntVect bin_size = WarpX::shared_tilesize;
+
+ //sort particles by bin
+ WARPX_PROFILE_VAR_START(blp_sort);
+ amrex::DenseBins<ParticleType> bins;
+ {
+ auto& ptile = ParticlesAt(lev, pti);
+ auto& aos = ptile.GetArrayOfStructs();
+ auto pstruct_ptr = aos().dataPtr();
+
+ int ntiles = numTilesInBox(box, true, bin_size);
+
+ bins.build(ptile.numParticles(), pstruct_ptr, ntiles,
+ [=] AMREX_GPU_HOST_DEVICE (const ParticleType& p) -> unsigned int
+ {
+ Box tbox;
+ auto iv = getParticleCell(p, plo, dxi, domain);
+ AMREX_ASSERT(box.contains(iv));
+ auto tid = getTileIndex(iv, box, true, bin_size, tbox);
+ return static_cast<unsigned int>(tid);
+ });
}
- } else if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Vay) {
- // jx_fab, jy_fab and jz_fab are Vay currents (D), not physical currents (j)
- if (WarpX::nox == 1){
- doVayDepositionShapeN<1>(
- GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
- uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
- jx_fab, jy_fab, jz_fab, np_to_depose, dt, relative_time, dx, xyzmin, lo, q,
- WarpX::n_rz_azimuthal_modes, cost,
- WarpX::load_balance_costs_update_algo);
- } else if (WarpX::nox == 2){
- doVayDepositionShapeN<2>(
- GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
- uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
- jx_fab, jy_fab, jz_fab, np_to_depose, dt, relative_time, dx, xyzmin, lo, q,
- WarpX::n_rz_azimuthal_modes, cost,
- WarpX::load_balance_costs_update_algo);
- } else if (WarpX::nox == 3){
- doVayDepositionShapeN<3>(
- GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
- uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
- jx_fab, jy_fab, jz_fab, np_to_depose, dt, relative_time, dx, xyzmin, lo, q,
- WarpX::n_rz_azimuthal_modes, cost,
- WarpX::load_balance_costs_update_algo);
+ WARPX_PROFILE_VAR_STOP(blp_sort);
+ WARPX_PROFILE_VAR_START(blp_get_max_tilesize);
+ //get the maximum size necessary for shared mem
+ // get tile boxes
+ //get the maximum size necessary for shared mem
+#if AMREX_SPACEDIM > 0
+ int sizeX = getMaxTboxAlongDim(box.size()[0], WarpX::shared_tilesize[0]);
+#endif
+#if AMREX_SPACEDIM > 1
+ int sizeZ = getMaxTboxAlongDim(box.size()[1], WarpX::shared_tilesize[1]);
+#endif
+#if AMREX_SPACEDIM > 2
+ int sizeY = getMaxTboxAlongDim(box.size()[2], WarpX::shared_tilesize[2]);
+#endif
+ amrex::IntVect max_tbox_size( AMREX_D_DECL(sizeX,sizeZ,sizeY) );
+ WARPX_PROFILE_VAR_STOP(blp_get_max_tilesize);
+
+ // Now pick current deposition algorithm
+ if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) {
+ amrex::Abort("Cannot do shared memory deposition with Esirkepov algorithm");
}
- } else {
- if (WarpX::nox == 1){
- doDepositionShapeN<1>(
- GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
- uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
- jx_fab, jy_fab, jz_fab, np_to_depose, relative_time, dx,
- xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost,
- WarpX::load_balance_costs_update_algo);
- } else if (WarpX::nox == 2){
- doDepositionShapeN<2>(
- GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
- uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
- jx_fab, jy_fab, jz_fab, np_to_depose, relative_time, dx,
- xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost,
- WarpX::load_balance_costs_update_algo);
- } else if (WarpX::nox == 3){
- doDepositionShapeN<3>(
- GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
- uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
- jx_fab, jy_fab, jz_fab, np_to_depose, relative_time, dx,
- xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost,
- WarpX::load_balance_costs_update_algo);
+ else if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Vay) {
+ amrex::Abort("Cannot do shared memory deposition with Vay algorithm");
+ }
+ else {
+ WARPX_PROFILE_VAR_START(direct_current_dep_kernel);
+ if (WarpX::nox == 1){
+ doDepositionSharedShapeN<1>(
+ GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
+ jx_fab, jy_fab, jz_fab, np_to_depose, relative_time, dx,
+ xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo, bins, box, geom, max_tbox_size);
+ } else if (WarpX::nox == 2){
+ doDepositionSharedShapeN<2>(
+ GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
+ jx_fab, jy_fab, jz_fab, np_to_depose, relative_time, dx,
+ xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo, bins, box, geom, max_tbox_size);
+ } else if (WarpX::nox == 3){
+ doDepositionSharedShapeN<3>(
+ GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
+ jx_fab, jy_fab, jz_fab, np_to_depose, relative_time, dx,
+ xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo, bins, box, geom, max_tbox_size);
+ }
+ WARPX_PROFILE_VAR_STOP(direct_current_dep_kernel);
+ }
+ }
+ // If not doing shared memory deposition, call normal kernels
+ else {
+ if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) {
+ if (WarpX::nox == 1){
+ doEsirkepovDepositionShapeN<1>(
+ GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
+ jx_arr, jy_arr, jz_arr, np_to_depose, dt, relative_time, dx, xyzmin, lo, q,
+ WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo);
+ } else if (WarpX::nox == 2){
+ doEsirkepovDepositionShapeN<2>(
+ GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
+ jx_arr, jy_arr, jz_arr, np_to_depose, dt, relative_time, dx, xyzmin, lo, q,
+ WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo);
+ } else if (WarpX::nox == 3){
+ doEsirkepovDepositionShapeN<3>(
+ GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
+ jx_arr, jy_arr, jz_arr, np_to_depose, dt, relative_time, dx, xyzmin, lo, q,
+ WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo);
+ }
+ } else if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Vay) {
+ if (WarpX::nox == 1){
+ doVayDepositionShapeN<1>(
+ GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
+ jx_fab, jy_fab, jz_fab, np_to_depose, dt, relative_time, dx, xyzmin, lo, q,
+ WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo);
+ } else if (WarpX::nox == 2){
+ doVayDepositionShapeN<2>(
+ GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
+ jx_fab, jy_fab, jz_fab, np_to_depose, dt, relative_time, dx, xyzmin, lo, q,
+ WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo);
+ } else if (WarpX::nox == 3){
+ doVayDepositionShapeN<3>(
+ GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
+ jx_fab, jy_fab, jz_fab, np_to_depose, dt, relative_time, dx, xyzmin, lo, q,
+ WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo);
+ }
+ } else { // Direct deposition
+ if (WarpX::nox == 1){
+ doDepositionShapeN<1>(
+ GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
+ jx_fab, jy_fab, jz_fab, np_to_depose, relative_time, dx,
+ xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo);
+ } else if (WarpX::nox == 2){
+ doDepositionShapeN<2>(
+ GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
+ jx_fab, jy_fab, jz_fab, np_to_depose, relative_time, dx,
+ xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo);
+ } else if (WarpX::nox == 3){
+ doDepositionShapeN<3>(
+ GetPosition, wp.dataPtr() + offset, uxp.dataPtr() + offset,
+ uyp.dataPtr() + offset, uzp.dataPtr() + offset, ion_lev,
+ jx_fab, jy_fab, jz_fab, np_to_depose, relative_time, dx,
+ xyzmin, lo, q, WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo);
+ }
}
}
WARPX_PROFILE_VAR_STOP(blp_deposit);
@@ -590,59 +682,284 @@ WarpXParticleContainer::DepositCurrent (
void
WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector const& wp,
const int * const ion_lev,
- amrex::MultiFab* rho, int icomp,
+ amrex::MultiFab* rho, const int icomp,
const long offset, const long np_to_depose,
- int thread_num, int lev, int depos_lev)
+ const int thread_num, const int lev, const int depos_lev)
{
- WarpX& warpx = WarpX::GetInstance();
+ if (WarpX::do_shared_mem_charge_deposition)
+ {
+ WARPX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) ||
+ (depos_lev==(lev )),
+ "Deposition buffers only work for lev-1");
- // deposition guards
- // note: this is smaller than rho->nGrowVect() for PSATD
- const amrex::IntVect& ng_rho = warpx.get_ng_depos_rho();
+ // If no particles, do not do anything
+ if (np_to_depose == 0) return;
- const std::array<amrex::Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0));
- amrex::IntVect ref_ratio;
- if (lev == depos_lev) {
- ref_ratio = IntVect(AMREX_D_DECL(1, 1, 1 ));
- } else {
- ref_ratio = WarpX::RefRatio(depos_lev);
- }
- const int nc = WarpX::ncomps;
+ // Number of guard cells for local deposition of rho
+ WarpX& warpx = WarpX::GetInstance();
+ const amrex::IntVect& ng_rho = warpx.get_ng_depos_rho();
- // Get tile box where charge is deposited.
- // The tile box is different when depositing in the buffers (depos_lev<lev)
- // or when depositing inside the level (depos_lev=lev)
- amrex::Box tilebox;
- if (lev == depos_lev) {
- tilebox = pti.tilebox();
+ // Extract deposition order and check that particles shape fits within the guard cells.
+ // NOTE: In specific situations where the staggering of rho and the charge deposition algorithm
+ // are not trivial, this check might be too strict and we might need to relax it, as currently
+ // done for the current deposition.
+
+#if defined(WARPX_DIM_1D_Z)
+ const amrex::IntVect shape_extent = amrex::IntVect(static_cast<int>(WarpX::noz/2));
+#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
+ const amrex::IntVect shape_extent = amrex::IntVect(static_cast<int>(WarpX::nox/2+1),
+ static_cast<int>(WarpX::noz/2+1));
+#elif defined(WARPX_DIM_3D)
+ const amrex::IntVect shape_extent = amrex::IntVect(static_cast<int>(WarpX::nox/2+1),
+ static_cast<int>(WarpX::noy/2+1),
+ static_cast<int>(WarpX::noz/2+1));
+#endif
+
+ // On CPU: particles deposit on tile arrays, which have a small number of guard cells ng_rho
+ // On GPU: particles deposit directly on the rho array, which usually have a larger number of guard cells
+#ifndef AMREX_USE_GPU
+ const amrex::IntVect range = ng_rho - shape_extent;
+#else
+ const amrex::IntVect range = rho->nGrowVect() - shape_extent;
+#endif
+
+ AMREX_ASSERT_WITH_MESSAGE(
+ amrex::numParticlesOutOfRange(pti, range) == 0,
+ "Particles shape does not fit within tile (CPU) or guard cells (GPU) used for charge deposition");
+ amrex::ignore_unused(range); // In case the assertion isn't compiled
+
+ const std::array<Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0));
+ const Real q = this->charge;
+
+ WARPX_PROFILE_VAR_NS("WarpXParticleContainer::DepositCharge::Sorting", blp_sort);
+ WARPX_PROFILE_VAR_NS("WarpXParticleContainer::DepositCharge::ChargeDeposition", blp_ppc_chd);
+ WARPX_PROFILE_VAR_NS("WarpXParticleContainer::DepositCharge::Accumulate", blp_accumulate);
+
+ // Get tile box where charge is deposited.
+ // The tile box is different when depositing in the buffers (depos_lev<lev)
+ // or when depositing inside the level (depos_lev=lev)
+ Box tilebox;
+ if (lev == depos_lev) {
+ tilebox = pti.tilebox();
+ } else {
+ const IntVect& ref_ratio = WarpX::RefRatio(depos_lev);
+ tilebox = amrex::coarsen(pti.tilebox(),ref_ratio);
+ }
+
+ const auto ix_type = rho->ixType().toIntVect();
+#ifndef AMREX_USE_GPU
+ // Staggered tile box
+ Box tb = amrex::convert( tilebox, ix_type );
+#endif
+
+ tilebox.grow(ng_rho);
+
+ const int nc = WarpX::ncomps;
+
+#ifdef AMREX_USE_GPU
+ amrex::ignore_unused(thread_num);
+ // GPU, no tiling: rho_fab points to the full rho array
+ MultiFab rhoi(*rho, amrex::make_alias, icomp*nc, nc);
+ auto & rho_fab = rhoi.get(pti);
+#else
+ tb.grow(ng_rho);
+
+ // CPU, tiling: rho_fab points to local_rho[thread_num]
+ local_rho[thread_num].resize(tb, nc);
+
+ // local_rho[thread_num] is set to zero
+ local_rho[thread_num].setVal(0.0);
+
+ auto & rho_fab = local_rho[thread_num];
+#endif
+
+ // Lower corner of tile box physical domain
+ // Note that this includes guard cells since it is after tilebox.ngrow
+ // Take into account Galilean shift
+ Real dt = warpx.getdt(lev);
+ const amrex::Real time_shift_delta = (icomp == 0 ? 0.0_rt : dt);
+ const std::array<amrex::Real,3>& xyzmin = WarpX::LowerCorner(
+ tilebox, depos_lev, time_shift_delta);
+
+ // Indices of the lower bound
+ const Dim3 lo = lbound(tilebox);
+
+ // HACK - sort particles by bin here.
+ WARPX_PROFILE_VAR_START(blp_sort);
+ amrex::DenseBins<ParticleType> bins;
+ {
+ const Geometry& geom = Geom(lev);
+ const auto dxi = geom.InvCellSizeArray();
+ const auto plo = geom.ProbLoArray();
+ const auto domain = geom.Domain();
+
+ auto& ptile = ParticlesAt(lev, pti);
+ auto& aos = ptile.GetArrayOfStructs();
+ auto pstruct_ptr = aos().dataPtr();
+
+ Box box = pti.validbox();
+ box.grow(ng_rho);
+ amrex::IntVect bin_size = WarpX::shared_tilesize;
+ int ntiles = numTilesInBox(box, true, bin_size);
+
+ bins.build(ptile.numParticles(), pstruct_ptr, ntiles,
+ [=] AMREX_GPU_HOST_DEVICE (const ParticleType& p) -> unsigned int
+ {
+ Box tbx;
+ auto iv = getParticleCell(p, plo, dxi, domain);
+ AMREX_ASSERT(box.contains(iv));
+ auto tid = getTileIndex(iv, box, true, bin_size, tbx);
+ return static_cast<unsigned int>(tid);
+ });
+ }
+ WARPX_PROFILE_VAR_STOP(blp_sort);
+
+ // get tile boxes
+ amrex::Gpu::DeviceVector<Box> tboxes(bins.numBins(), amrex::Box());
+ {
+ const Geometry& geom = Geom(lev);
+ const auto dxi = geom.InvCellSizeArray();
+ const auto plo = geom.ProbLoArray();
+ const auto domain = geom.Domain();
+
+ auto& ptile = ParticlesAt(lev, pti);
+ auto& aos = ptile.GetArrayOfStructs();
+ auto pstruct_ptr = aos().dataPtr();
+
+ Box box = pti.validbox();
+ box.grow(ng_rho);
+ amrex::IntVect bin_size = WarpX::shared_tilesize;
+
+ const auto offsets_ptr = bins.offsetsPtr();
+ auto tbox_ptr = tboxes.dataPtr();
+ auto permutation = bins.permutationPtr();
+ amrex::ParallelFor(bins.numBins(),
+ [=] AMREX_GPU_DEVICE (int ibin) {
+ const int bin_start = offsets_ptr[ibin];
+ const int bin_stop = offsets_ptr[ibin+1];
+ if (bin_start < bin_stop) {
+ auto p = pstruct_ptr[permutation[bin_start]];
+ Box tbx;
+ auto iv = getParticleCell(p, plo, dxi, domain);
+ AMREX_ASSERT(box.contains(iv));
+ [[maybe_unused]] auto tid = getTileIndex(iv, box, true, bin_size, tbx);
+ AMREX_ASSERT(tid == ibin);
+ AMREX_ASSERT(tbx.contains(iv));
+ tbox_ptr[ibin] = tbx;
+ }
+ });
+ }
+
+ // compute max tile box size in each direction
+ amrex::IntVect max_tbox_size;
+ {
+ ReduceOps<AMREX_D_DECL(ReduceOpMax, ReduceOpMax, ReduceOpMax)> reduce_op;
+ ReduceData<AMREX_D_DECL(int, int, int)> reduce_data(reduce_op);
+ using ReduceTuple = typename decltype(reduce_data)::Type;
+
+ const auto boxes_ptr = tboxes.dataPtr();
+ reduce_op.eval(tboxes.size(), reduce_data,
+ [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
+ {
+ const Box& box = boxes_ptr[i];
+ if (box.ok()) {
+ IntVect si = box.length();
+ return {AMREX_D_DECL(si[0], si[1], si[2])};
+ } else {
+ return {AMREX_D_DECL(0, 0, 0)};
+ }
+ });
+
+ ReduceTuple hv = reduce_data.value();
+
+ max_tbox_size = IntVect(AMREX_D_DECL(amrex::get<0>(hv),
+ amrex::get<1>(hv),
+ amrex::get<2>(hv)));
+ }
+
+ WARPX_PROFILE_VAR_START(blp_ppc_chd);
+ amrex::LayoutData<amrex::Real>* costs = WarpX::getCosts(lev);
+ amrex::Real* cost = costs ? &((*costs)[pti.index()]) : nullptr;
+
+ const auto GetPosition = GetParticlePosition(pti, offset);
+ const Geometry& geom = Geom(lev);
+ Box box = pti.validbox();
+ box.grow(ng_rho);
+
+ if (WarpX::nox == 1){
+ doChargeDepositionSharedShapeN<1>(GetPosition, wp.dataPtr()+offset, ion_lev,
+ rho_fab, ix_type, np_to_depose, dx, xyzmin, lo, q,
+ WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo, bins, box, geom, max_tbox_size);
+ } else if (WarpX::nox == 2){
+ doChargeDepositionSharedShapeN<2>(GetPosition, wp.dataPtr()+offset, ion_lev,
+ rho_fab, ix_type, np_to_depose, dx, xyzmin, lo, q,
+ WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo, bins, box, geom, max_tbox_size);
+ } else if (WarpX::nox == 3){
+ doChargeDepositionSharedShapeN<3>(GetPosition, wp.dataPtr()+offset, ion_lev,
+ rho_fab, ix_type, np_to_depose, dx, xyzmin, lo, q,
+ WarpX::n_rz_azimuthal_modes, cost,
+ WarpX::load_balance_costs_update_algo, bins, box, geom, max_tbox_size);
+ }
+#ifndef AMREX_USE_GPU
+ // CPU, tiling: atomicAdd local_rho into rho
+ WARPX_PROFILE_VAR_START(blp_accumulate);
+ (*rho)[pti].atomicAdd(local_rho[thread_num], tb, tb, 0, icomp*nc, nc);
+ WARPX_PROFILE_VAR_STOP(blp_accumulate);
+#endif
} else {
- tilebox = amrex::coarsen(pti.tilebox(), ref_ratio);
- }
- tilebox.grow(ng_rho);
- // Lower corner of tile box physical domain
- // Note that this includes guard cells since it is after tilebox.ngrow
- // Take into account Galilean shift
- const amrex::Real dt = warpx.getdt(lev);
- const amrex::Real time_shift_delta = (icomp == 0 ? 0.0_rt : dt);
- const std::array<amrex::Real,3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev, time_shift_delta);
+ WarpX& warpx = WarpX::GetInstance();
- // pointer to costs data
- amrex::LayoutData<amrex::Real>* costs = WarpX::getCosts(lev);
- amrex::Real* cost = costs ? &((*costs)[pti.index()]) : nullptr;
-
- AMREX_ALWAYS_ASSERT(WarpX::nox == WarpX::noy);
- AMREX_ALWAYS_ASSERT(WarpX::nox == WarpX::noz);
-
- ablastr::particles::deposit_charge<WarpXParticleContainer>(
- pti, wp, this->charge, ion_lev,
- rho, local_rho[thread_num],
- WarpX::noz, dx, xyzmin, WarpX::n_rz_azimuthal_modes,
- ng_rho, depos_lev, ref_ratio,
- offset, np_to_depose,
- icomp, nc,
- cost, WarpX::load_balance_costs_update_algo, WarpX::do_device_synchronize
- );
+ // deposition guards
+ // note: this is smaller than rho->nGrowVect() for PSATD
+ const amrex::IntVect& ng_rho = warpx.get_ng_depos_rho();
+
+ const std::array<amrex::Real,3>& dx = WarpX::CellSize(std::max(depos_lev,0));
+ amrex::IntVect ref_ratio;
+ if (lev == depos_lev) {
+ ref_ratio = IntVect(AMREX_D_DECL(1, 1, 1 ));
+ } else {
+ ref_ratio = WarpX::RefRatio(depos_lev);
+ }
+ const int nc = WarpX::ncomps;
+
+ // Get tile box where charge is deposited.
+ // The tile box is different when depositing in the buffers (depos_lev<lev)
+ // or when depositing inside the level (depos_lev=lev)
+ amrex::Box tilebox;
+ if (lev == depos_lev) {
+ tilebox = pti.tilebox();
+ } else {
+ tilebox = amrex::coarsen(pti.tilebox(), ref_ratio);
+ }
+ tilebox.grow(ng_rho);
+
+ // Lower corner of tile box physical domain
+ // Note that this includes guard cells since it is after tilebox.ngrow
+ // Take into account Galilean shift
+ const amrex::Real dt = warpx.getdt(lev);
+ const amrex::Real time_shift_delta = (icomp == 0 ? 0.0_rt : dt);
+ const std::array<amrex::Real,3>& xyzmin = WarpX::LowerCorner(tilebox, depos_lev, time_shift_delta);
+
+ // pointer to costs data
+ amrex::LayoutData<amrex::Real>* costs = WarpX::getCosts(lev);
+ amrex::Real* cost = costs ? &((*costs)[pti.index()]) : nullptr;
+
+ AMREX_ALWAYS_ASSERT(WarpX::nox == WarpX::noy);
+ AMREX_ALWAYS_ASSERT(WarpX::nox == WarpX::noz);
+
+ ablastr::particles::deposit_charge<WarpXParticleContainer>(
+ pti, wp, this->charge, ion_lev,
+ rho, local_rho[thread_num],
+ WarpX::noz, dx, xyzmin, WarpX::n_rz_azimuthal_modes,
+ ng_rho, depos_lev, ref_ratio,
+ offset, np_to_depose,
+ icomp, nc,
+ cost, WarpX::load_balance_costs_update_algo, WarpX::do_device_synchronize
+ );
+ }
}
void