From 1ce5957cb32a3b7ae39ad68b528e5a370854c58f Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Sun, 7 Jul 2019 11:14:31 -0700 Subject: avoid duplication in current deposition when using buffers --- Source/Particles/WarpXParticleContainer.cpp | 283 ++++++++++++---------------- 1 file changed, 116 insertions(+), 167 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index ac532912d..eb03059f6 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -279,187 +279,136 @@ WarpXParticleContainer::AddNParticles (int lev, Redistribute(); } - +/* \brief Current Deposition for thread thread_num + * \param pti : Particle iterator + * \param wp : Array of particle weights + * \param uxp uyp uzp : Array of particle + * \param jx jy jz : Full array of current density + * \param offset : Index of first particle for which current is deposited + * \param np_to_depose: Number of particles for which current is deposited. + Particles [offset,offset+np_tp_depose] deposit their + current + * \param thread_num : Thread number (if tiling) + * \param lev : Level of box that contains particles + * \param depos_lev : Level on which particles deposit (if buffers are used) + * \param dt : Time step for particle level + * \param ngJ : Number of ghosts cells (of lev) + */ void WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, RealVector& wp, RealVector& uxp, RealVector& uyp, RealVector& uzp, - MultiFab& jx, MultiFab& jy, MultiFab& jz, - MultiFab* cjx, MultiFab* cjy, MultiFab* cjz, - const long np_current, const long np, - int thread_num, int lev, Real dt ) + MultiFab* jx, MultiFab* jy, MultiFab* jz, + const long offset, const long np_to_depose, + int thread_num, int lev, int depos_lev, + Real dt, const long ngJ) { - Real *jx_ptr, *jy_ptr, *jz_ptr; - const std::array& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); - const std::array& dx = WarpX::CellSize(lev); - const std::array& cdx = WarpX::CellSize(std::max(lev-1,0)); - const std::array& xyzmin = xyzmin_tile; - const long lvect = 8; - - BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); - BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); - - Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag); - Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag); - Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag); - - // WarpX assumes the same number of guard cells for Jx, Jy, Jz - long ngJ = jx.nGrow(); - - int j_is_nodal = jx.is_nodal() and jy.is_nodal() and jz.is_nodal(); - - // Deposit charge for particles that are not in the current buffers - if (np_current > 0) - { -#ifdef AMREX_USE_GPU - jx_ptr = jx[pti].dataPtr(); - jy_ptr = jy[pti].dataPtr(); - jz_ptr = jz[pti].dataPtr(); - - auto jxntot = jx[pti].length(); - auto jyntot = jy[pti].length(); - auto jzntot = jz[pti].length(); -#else - tbx.grow(ngJ); - tby.grow(ngJ); - tbz.grow(ngJ); - - local_jx[thread_num].resize(tbx); - local_jy[thread_num].resize(tby); - local_jz[thread_num].resize(tbz); - - jx_ptr = local_jx[thread_num].dataPtr(); - jy_ptr = local_jy[thread_num].dataPtr(); - jz_ptr = local_jz[thread_num].dataPtr(); - - local_jx[thread_num].setVal(0.0); - local_jy[thread_num].setVal(0.0); - local_jz[thread_num].setVal(0.0); - - auto jxntot = local_jx[thread_num].length(); - auto jyntot = local_jy[thread_num].length(); - auto jzntot = local_jz[thread_num].length(); -#endif - - BL_PROFILE_VAR_START(blp_pxr_cd); - warpx_current_deposition( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &np_current, - m_xp[thread_num].dataPtr(), - m_yp[thread_num].dataPtr(), - m_zp[thread_num].dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - m_giv[thread_num].dataPtr(), - wp.dataPtr(), &this->charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dt, &dx[0], &dx[1], &dx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, - &lvect,&WarpX::current_deposition_algo); - -#ifdef WARPX_RZ - warpx_current_deposition_rz_volume_scaling( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &xyzmin[0], &dx[0]); -#endif - BL_PROFILE_VAR_STOP(blp_pxr_cd); - -#ifndef AMREX_USE_GPU - BL_PROFILE_VAR_START(blp_accumulate); - - jx[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); - jy[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); - jz[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); - - BL_PROFILE_VAR_STOP(blp_accumulate); -#endif - } + AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) || + (depos_lev==(lev )), + "Deposition buffers only work for lev-1"); + // If no particles, do not do anything + if (np_to_depose == 0) return; + + const std::array& dx = WarpX::CellSize(std::max(depos_lev,0)); + const long lvect = 8; + int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal(); + + BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); + BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); + + // Get tile box where current is deposited + Box tilebox; + if (lev == depos_lev) { + tilebox = pti.tilebox(); + } else { + const IntVect& ref_ratio = WarpX::RefRatio(depos_lev); + tilebox = amrex::coarsen(pti.tilebox(),ref_ratio); + } + + // Staggered tile boxes (different in each direction) + Box tbx = convert(tilebox, WarpX::jx_nodal_flag); + Box tby = convert(tilebox, WarpX::jy_nodal_flag); + Box tbz = convert(tilebox, WarpX::jz_nodal_flag); - // Deposit charge for particles that are in the current buffers - if (np_current < np) - { - const IntVect& ref_ratio = WarpX::RefRatio(lev-1); - const Box& ctilebox = amrex::coarsen(pti.tilebox(),ref_ratio); - const std::array& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); + // Lower corner of tile box physical domain + const std::array& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); + const std::array& xyzmin = xyzmin_tile; #ifdef AMREX_USE_GPU - jx_ptr = (*cjx)[pti].dataPtr(); - jy_ptr = (*cjy)[pti].dataPtr(); - jz_ptr = (*cjz)[pti].dataPtr(); - - auto jxntot = jx[pti].length(); - auto jyntot = jy[pti].length(); - auto jzntot = jz[pti].length(); + // No tiling on GPU: jx_ptr points to the full + // jx array (same for jy_ptr and jz_ptr). + Real* jx_ptr = (*jx)[pti].dataPtr(); + Real* jy_ptr = (*jy)[pti].dataPtr(); + Real* jz_ptr = (*jz)[pti].dataPtr(); + + auto jxntot = jx[pti].length(); + auto jyntot = jy[pti].length(); + auto jzntot = jz[pti].length(); #else - - tbx = amrex::convert(ctilebox, WarpX::jx_nodal_flag); - tby = amrex::convert(ctilebox, WarpX::jy_nodal_flag); - tbz = amrex::convert(ctilebox, WarpX::jz_nodal_flag); - tbx.grow(ngJ); - tby.grow(ngJ); - tbz.grow(ngJ); - - local_jx[thread_num].resize(tbx); - local_jy[thread_num].resize(tby); - local_jz[thread_num].resize(tbz); - - jx_ptr = local_jx[thread_num].dataPtr(); - jy_ptr = local_jy[thread_num].dataPtr(); - jz_ptr = local_jz[thread_num].dataPtr(); - - local_jx[thread_num].setVal(0.0); - local_jy[thread_num].setVal(0.0); - local_jz[thread_num].setVal(0.0); - - auto jxntot = local_jx[thread_num].length(); - auto jyntot = local_jy[thread_num].length(); - auto jzntot = local_jz[thread_num].length(); -#endif - - long ncrse = np - np_current; - BL_PROFILE_VAR_START(blp_pxr_cd); - warpx_current_deposition( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &ncrse, - m_xp[thread_num].dataPtr() +np_current, - m_yp[thread_num].dataPtr() +np_current, - m_zp[thread_num].dataPtr() +np_current, - uxp.dataPtr()+np_current, - uyp.dataPtr()+np_current, - uzp.dataPtr()+np_current, - m_giv[thread_num].dataPtr()+np_current, - wp.dataPtr()+np_current, &this->charge, - &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], - &dt, &cdx[0], &cdx[1], &cdx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, - &lvect,&WarpX::current_deposition_algo); + // Tiling is on: jx_ptr points to local_jx[thread_num] + // (same for jy_ptr and jz_ptr) + tbx.grow(ngJ); + tby.grow(ngJ); + tbz.grow(ngJ); + + local_jx[thread_num].resize(tbx); + local_jy[thread_num].resize(tby); + local_jz[thread_num].resize(tbz); + + Real* jx_ptr = local_jx[thread_num].dataPtr(); + Real* jy_ptr = local_jy[thread_num].dataPtr(); + Real* jz_ptr = local_jz[thread_num].dataPtr(); + + // local_jx[thread_num] is set to zero + local_jx[thread_num].setVal(0.0); + local_jy[thread_num].setVal(0.0); + local_jz[thread_num].setVal(0.0); + + auto jxntot = local_jx[thread_num].length(); + auto jyntot = local_jy[thread_num].length(); + auto jzntot = local_jz[thread_num].length(); +#endif + // GPU, no tiling: deposit directly in jx + // CPU, tiling: deposit into local_jx + // (same for jx and jz) + BL_PROFILE_VAR_START(blp_pxr_cd); + warpx_current_deposition( + jx_ptr, &ngJ, jxntot.getVect(), + jy_ptr, &ngJ, jyntot.getVect(), + jz_ptr, &ngJ, jzntot.getVect(), + &np_to_depose, + m_xp[thread_num].dataPtr() + offset, + m_yp[thread_num].dataPtr() + offset, + m_zp[thread_num].dataPtr() + offset, + uxp.dataPtr() + offset, + uyp.dataPtr() + offset, + uzp.dataPtr() + offset, + m_giv[thread_num].dataPtr() + offset, + wp.dataPtr() + offset, &this->charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dt, &dx[0], &dx[1], &dx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, + &lvect,&WarpX::current_deposition_algo); + #ifdef WARPX_RZ - warpx_current_deposition_rz_volume_scaling( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &xyzmin[0], &dx[0]); + warpx_current_deposition_rz_volume_scaling( + jx_ptr, &ngJ, jxntot.getVect(), + jy_ptr, &ngJ, jyntot.getVect(), + jz_ptr, &ngJ, jzntot.getVect(), + &xyzmin[0], &dx[0]); #endif - - BL_PROFILE_VAR_STOP(blp_pxr_cd); + BL_PROFILE_VAR_STOP(blp_pxr_cd); #ifndef AMREX_USE_GPU - BL_PROFILE_VAR_START(blp_accumulate); - - (*cjx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); - (*cjy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); - (*cjz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); - - BL_PROFILE_VAR_STOP(blp_accumulate); + BL_PROFILE_VAR_START(blp_accumulate); + // CPU, tiling: atomicAdd local_jx into jx + // (same for jx and jz) + (*jx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); + (*jy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); + (*jz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); + BL_PROFILE_VAR_STOP(blp_accumulate); #endif - } -}; - +} void WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, -- cgit v1.2.3 From 044344d7dd0fe44a9ed9ba06f4edffbd64a64b8c Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Mon, 8 Jul 2019 08:32:26 -0700 Subject: Test implementation for direct deposition order 1 --- Examples/Tests/SingleParticle/inputs | 8 +- Source/Particles/WarpXParticleContainer.cpp | 140 ++++++++++++++++++++++++---- 2 files changed, 127 insertions(+), 21 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Examples/Tests/SingleParticle/inputs b/Examples/Tests/SingleParticle/inputs index 548848d79..c4bb19ccf 100644 --- a/Examples/Tests/SingleParticle/inputs +++ b/Examples/Tests/SingleParticle/inputs @@ -1,8 +1,8 @@ max_step = 1 -amr.n_cell = 16 16 +amr.n_cell = 8 8 amr.max_level = 0 amr.blocking_factor = 8 -amr.max_grid_size = 8 +amr.max_grid_size = 16 amr.plot_int = 1 geometry.coord_sys = 0 geometry.is_periodic = 0 0 @@ -16,8 +16,8 @@ warpx.cfl = 1.0 particles.nspecies = 1 particles.species_names = electron electron.charge = -q_e -electron.mass = m_e +electron.mass = 1.e10 electron.injection_style = "SingleParticle" electron.single_particle_pos = 0.0 0.0 0.0 -electron.single_particle_vel = 1.e20 0.0 0.0 # gamma*beta +electron.single_particle_vel = 1.e20 0.0 1.e20 # gamma*beta electron.single_particle_weight = 1.0 diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 95e692c28..829efe53f 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -333,6 +333,8 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, // Lower corner of tile box physical domain const std::array& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); const std::array& xyzmin = xyzmin_tile; + Print()<<" xyzmin_tile "<charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dt, &dx[0], &dx[1], &dx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, - &lvect,&WarpX::current_deposition_algo); + bool use_new = true; + if (use_new){ + Real dxi = 1.0/dx[0]; + Real dzi = 1.0/dx[2]; + Real invvol = dxi*dzi; + Real dts2dx = 0.5*dt*dxi; + Real dts2dz = 0.5*dt*dzi; + Real clightsq = 1.0/PhysConst::c/PhysConst::c; + Real c = PhysConst::c; + + Real xmin = xyzmin[0]; + Real zmin = xyzmin[2]; + Real q = this->charge; + Real stagger_shift = j_is_nodal ? 0.0 : 0.5; + Print()<<"xmin "<array(pti); + auto jy_arr = jy->array(pti); + auto jz_arr = jz->array(pti); + + // auto const& jx_arr = jx->array(); + // auto const& jy_arr = jy->array(); + // auto const& jz_arr = jz->array(); + + // - momenta are stored as a struct of array, in `attribs` + auto& attribs = pti.GetAttribs(); + Real* AMREX_RESTRICT uxp = attribs[PIdx::ux].dataPtr(); + Real* AMREX_RESTRICT uyp = attribs[PIdx::uy].dataPtr(); + Real* AMREX_RESTRICT uzp = attribs[PIdx::uz].dataPtr(); + Real* AMREX_RESTRICT w = attribs[PIdx::w ].dataPtr(); + Cuda::ManagedDeviceVector Vx, Vy, Vz; + pti.GetPosition(Vx, Vy, Vz); + Real* AMREX_RESTRICT xp = Vx.dataPtr() + offset; + // Real* AMREX_RESTRICT yp = y.dataPtr(); + Real* AMREX_RESTRICT zp = Vz.dataPtr() + offset; + // Loop over the particles and convert momentum + // const long np = pti.numParticles(); + Print()<<"np_to_depose "<charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dt, &dx[0], &dx[1], &dx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, + &lvect,&WarpX::current_deposition_algo); + } #ifdef WARPX_RZ warpx_current_deposition_rz_volume_scaling( jx_ptr, &ngJ, jxntot.getVect(), -- cgit v1.2.3 From 8bd48733e9ce659a227c9f19f6b4857596bf9896 Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Mon, 8 Jul 2019 11:19:47 -0700 Subject: fix position pointer. simple test gives correct result --- Examples/Tests/SingleParticle/inputs | 5 +++++ Source/Particles/WarpXParticleContainer.cpp | 26 +++++++++++++++++--------- 2 files changed, 22 insertions(+), 9 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Examples/Tests/SingleParticle/inputs b/Examples/Tests/SingleParticle/inputs index c4bb19ccf..89e3eb1c6 100644 --- a/Examples/Tests/SingleParticle/inputs +++ b/Examples/Tests/SingleParticle/inputs @@ -13,6 +13,11 @@ algo.charge_deposition = standard algo.field_gathering = standard warpx.cfl = 1.0 +algo.current_deposition = direct +interpolation.nox = 1 +interpolation.noy = 1 +interpolation.noz = 1 + particles.nspecies = 1 particles.species_names = electron electron.charge = -q_e diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 1841ea0ce..6acf7b1ea 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -408,21 +408,29 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Real* AMREX_RESTRICT uyp = attribs[PIdx::uy].dataPtr(); Real* AMREX_RESTRICT uzp = attribs[PIdx::uz].dataPtr(); Real* AMREX_RESTRICT w = attribs[PIdx::w ].dataPtr(); - Cuda::ManagedDeviceVector Vx, Vy, Vz; - pti.GetPosition(Vx, Vy, Vz); - Real* AMREX_RESTRICT xp = Vx.dataPtr() + offset; - // Real* AMREX_RESTRICT yp = y.dataPtr(); - Real* AMREX_RESTRICT zp = Vz.dataPtr() + offset; + + + //Cuda::ManagedDeviceVector Vx, Vy, Vz; + //pti.GetPosition(Vx, Vy, Vz); + //Real* AMREX_RESTRICT xp = Vx.dataPtr() + offset; + //Real* AMREX_RESTRICT zp = Vz.dataPtr() + offset; + + Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; + Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; + // Loop over the particles and convert momentum // const long np = pti.numParticles(); Print()<<"np_to_depose "< Date: Mon, 8 Jul 2019 16:01:37 -0700 Subject: bug when tiling on CPU --- Examples/Tests/SingleParticle/Untitled.ipynb | 114 +++++++++++++++++++++++++ Examples/Tests/SingleParticle/inputs | 10 +-- Source/Particles/PhysicalParticleContainer.cpp | 2 +- Source/Particles/WarpXParticleContainer.cpp | 66 +++++++++++--- 4 files changed, 172 insertions(+), 20 deletions(-) create mode 100644 Examples/Tests/SingleParticle/Untitled.ipynb (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Examples/Tests/SingleParticle/Untitled.ipynb b/Examples/Tests/SingleParticle/Untitled.ipynb new file mode 100644 index 000000000..7a98be71a --- /dev/null +++ b/Examples/Tests/SingleParticle/Untitled.ipynb @@ -0,0 +1,114 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Import statements\n", + "import yt ; yt.funcs.mylog.setLevel(50)\n", + "import numpy as np\n", + "import scipy.constants as scc\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib notebook" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mthevenet/anaconda3/lib/python3.7/site-packages/yt/units/yt_array.py:1394: RuntimeWarning: invalid value encountered in true_divide\n", + " out=out, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ds = yt.load( './old_diags/plt00001' ) # Create a dataset object\n", + "sl = yt.SlicePlot(ds, 2, 'jz', aspect=.4) # Create a sliceplot object\n", + "sl.annotate_grids() # Show grids\n", + "sl.show() # Show the plot" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mthevenet/anaconda3/lib/python3.7/site-packages/yt/units/yt_array.py:1394: RuntimeWarning: invalid value encountered in true_divide\n", + " out=out, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ds = yt.load( './new_diags/plt00001' ) # Create a dataset object\n", + "sl = yt.SlicePlot(ds, 2, 'jx', aspect=.4) # Create a sliceplot object\n", + "sl.annotate_grids() # Show grids\n", + "sl.show() # Show the plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Examples/Tests/SingleParticle/inputs b/Examples/Tests/SingleParticle/inputs index 89e3eb1c6..feecb996c 100644 --- a/Examples/Tests/SingleParticle/inputs +++ b/Examples/Tests/SingleParticle/inputs @@ -1,11 +1,11 @@ -max_step = 1 -amr.n_cell = 8 8 +max_step = 10 +amr.n_cell = 8 16 amr.max_level = 0 amr.blocking_factor = 8 amr.max_grid_size = 16 amr.plot_int = 1 geometry.coord_sys = 0 -geometry.is_periodic = 0 0 +geometry.is_periodic = 1 1 geometry.prob_lo = -8 -12 geometry.prob_hi = 8 12 warpx.do_pml = 0 @@ -23,6 +23,6 @@ particles.species_names = electron electron.charge = -q_e electron.mass = 1.e10 electron.injection_style = "SingleParticle" -electron.single_particle_pos = 0.0 0.0 0.0 -electron.single_particle_vel = 1.e20 0.0 1.e20 # gamma*beta +electron.single_particle_pos = 4 0 6 +electron.single_particle_vel = -2 0 -5 # -2. 1. 4. electron.single_particle_weight = 1.0 diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 43b46ec49..b78d36c87 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -1530,6 +1530,7 @@ PhysicalParticleContainer::Evolve (int lev, // Current Deposition // // Deposit inside domains + Print()<<"np_current "<& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); - const std::array& xyzmin = xyzmin_tile; - Print()<<" xyzmin_tile "<& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); + // const std::array& xyzmin = xyzmin_tile; + const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; + + // Print()<<" xyzmin_tile "<array(pti); auto jy_arr = jy->array(pti); auto jz_arr = jz->array(pti); - - // auto const& jx_arr = jx->array(); - // auto const& jy_arr = jy->array(); - // auto const& jz_arr = jz->array(); + */ + + // const auto& jx_arr = local_jx[thread_num].array(); + // const auto& jy_arr = local_jy[thread_num].array(); + // const auto& jz_arr = local_jz[thread_num].array(); + + //Array4 const& jx_arr = local_jx[thread_num].array(tbx); + //Array4 const& jy_arr = local_jy[thread_num].array(tby); + //Array4 const& jz_arr = local_jz[thread_num].array(tbz); + Array4 const& jx_arr = local_jx[thread_num].array(); + Array4 const& jy_arr = local_jy[thread_num].array(); + Array4 const& jz_arr = local_jz[thread_num].array(); + + Dim3 lox = lbound(jx_arr); + Dim3 loy = lbound(jy_arr); + Dim3 loz = lbound(jz_arr); + /* + Dim3 loy = lbound(local_jy[thread_num]); + Dim3 loz = lbound(local_jz[thread_num]); + Dim3 hix = hbound(local_jx[thread_num]); + Dim3 hiy = hbound(local_jy[thread_num]); + Dim3 hiz = hbound(local_jz[thread_num]); + */ + Print()<<" lox "< Vx, Vy, Vz; //pti.GetPosition(Vx, Vy, Vz); @@ -446,7 +474,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, std::cout<<" vx "< Date: Tue, 9 Jul 2019 17:57:07 -0700 Subject: works for order 1, direct deposition 2D --- Examples/Tests/SingleParticle/Untitled.ipynb | 14 ++-- Examples/Tests/SingleParticle/inputs | 2 +- Source/Particles/WarpXParticleContainer.cpp | 115 +++++++++++++-------------- 3 files changed, 62 insertions(+), 69 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Examples/Tests/SingleParticle/Untitled.ipynb b/Examples/Tests/SingleParticle/Untitled.ipynb index 7a98be71a..497210df7 100644 --- a/Examples/Tests/SingleParticle/Untitled.ipynb +++ b/Examples/Tests/SingleParticle/Untitled.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 116, "metadata": {}, "outputs": [ { @@ -30,10 +30,10 @@ { "data": { "text/html": [ - "
" + "
" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -42,14 +42,14 @@ ], "source": [ "ds = yt.load( './old_diags/plt00001' ) # Create a dataset object\n", - "sl = yt.SlicePlot(ds, 2, 'jz', aspect=.4) # Create a sliceplot object\n", + "sl = yt.SlicePlot(ds, 2, 'jx', aspect=.4) # Create a sliceplot object\n", "sl.annotate_grids() # Show grids\n", "sl.show() # Show the plot" ] }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 117, "metadata": {}, "outputs": [ { @@ -63,10 +63,10 @@ { "data": { "text/html": [ - "
" + "
" ], "text/plain": [ - "" + "" ] }, "metadata": {}, diff --git a/Examples/Tests/SingleParticle/inputs b/Examples/Tests/SingleParticle/inputs index feecb996c..9f478cfa0 100644 --- a/Examples/Tests/SingleParticle/inputs +++ b/Examples/Tests/SingleParticle/inputs @@ -1,5 +1,5 @@ max_step = 10 -amr.n_cell = 8 16 +amr.n_cell = 24 24 amr.max_level = 0 amr.blocking_factor = 8 amr.max_grid_size = 16 diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index d383ab6f4..69f7aa3d0 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -292,7 +292,6 @@ WarpXParticleContainer::AddNParticles (int lev, * \param lev : Level of box that contains particles * \param depos_lev : Level on which particles deposit (if buffers are used) * \param dt : Time step for particle level - * \param ngJ : Number of ghosts cells (of lev) */ void WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, @@ -336,10 +335,10 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, // Lower corner of tile box physical domain // const std::array& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); // const std::array& xyzmin = xyzmin_tile; + tilebox.grow(ngJ); const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; // Print()<<" xyzmin_tile "<& xyzminx = WarpX::LowerCorner(tbx, depos_lev);; + const std::array& xyzminy = WarpX::LowerCorner(tby, depos_lev);; + const std::array& xyzminz = WarpX::LowerCorner(tbz, depos_lev);; + Print()<<"ngJ "<charge; Real stagger_shift = j_is_nodal ? 0.0 : 0.5; - Print()<<"xmin "<array(pti); */ - // const auto& jx_arr = local_jx[thread_num].array(); - // const auto& jy_arr = local_jy[thread_num].array(); - // const auto& jz_arr = local_jz[thread_num].array(); - - //Array4 const& jx_arr = local_jx[thread_num].array(tbx); - //Array4 const& jy_arr = local_jy[thread_num].array(tby); - //Array4 const& jz_arr = local_jz[thread_num].array(tbz); Array4 const& jx_arr = local_jx[thread_num].array(); Array4 const& jy_arr = local_jy[thread_num].array(); Array4 const& jz_arr = local_jz[thread_num].array(); - Dim3 lox = lbound(jx_arr); - Dim3 loy = lbound(jy_arr); - Dim3 loz = lbound(jz_arr); - /* - Dim3 loy = lbound(local_jy[thread_num]); - Dim3 loz = lbound(local_jz[thread_num]); - Dim3 hix = hbound(local_jx[thread_num]); - Dim3 hiy = hbound(local_jy[thread_num]); - Dim3 hiz = hbound(local_jz[thread_num]); - */ - Print()<<" lox "< Date: Tue, 9 Jul 2019 21:26:39 -0700 Subject: some cleaning --- Examples/Tests/SingleParticle/inputs | 4 +- Source/Diagnostics/WarpXIO.cpp | 1 - Source/Particles/WarpXParticleContainer.cpp | 248 +++++++++------------------- 3 files changed, 82 insertions(+), 171 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Examples/Tests/SingleParticle/inputs b/Examples/Tests/SingleParticle/inputs index 9f478cfa0..7ec1a659f 100644 --- a/Examples/Tests/SingleParticle/inputs +++ b/Examples/Tests/SingleParticle/inputs @@ -1,5 +1,5 @@ max_step = 10 -amr.n_cell = 24 24 +amr.n_cell = 8 16 amr.max_level = 0 amr.blocking_factor = 8 amr.max_grid_size = 16 @@ -24,5 +24,5 @@ electron.charge = -q_e electron.mass = 1.e10 electron.injection_style = "SingleParticle" electron.single_particle_pos = 4 0 6 -electron.single_particle_vel = -2 0 -5 # -2. 1. 4. +electron.single_particle_vel = -2 0 -5000 # -2. 1. 4. electron.single_particle_weight = 1.0 diff --git a/Source/Diagnostics/WarpXIO.cpp b/Source/Diagnostics/WarpXIO.cpp index 24272c588..38399bf9e 100644 --- a/Source/Diagnostics/WarpXIO.cpp +++ b/Source/Diagnostics/WarpXIO.cpp @@ -755,7 +755,6 @@ WarpX::WriteSlicePlotFile () const // writing plotfile // const std::string& slice_plotfilename = amrex::Concatenate(slice_plot_file,istep[0]); amrex::Print() << " Writing slice plotfile " << slice_plotfilename << "\n"; - const int ngrow = 0; Vector rfs; VisMF::Header::Version current_version = VisMF::GetHeaderVersion(); diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 69f7aa3d0..cd0c37bd8 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -310,7 +310,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, const long ngJ = jx->nGrow(); const std::array& dx = WarpX::CellSize(std::max(depos_lev,0)); - const long lvect = 8; int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal(); BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); @@ -332,13 +331,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Box tby = convert(tilebox, WarpX::jy_nodal_flag); Box tbz = convert(tilebox, WarpX::jz_nodal_flag); - // Lower corner of tile box physical domain - // const std::array& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); - // const std::array& xyzmin = xyzmin_tile; tilebox.grow(ngJ); - const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; - - // Print()<<" xyzmin_tile "<& xyzminx = WarpX::LowerCorner(tbx, depos_lev);; - const std::array& xyzminy = WarpX::LowerCorner(tby, depos_lev);; - const std::array& xyzminz = WarpX::LowerCorner(tbz, depos_lev);; - - Print()<<"ngJ "<charge; - Real stagger_shift = j_is_nodal ? 0.0 : 0.5; - // Print()<<"xmin "<array(pti); - auto jy_arr = jy->array(pti); - auto jz_arr = jz->array(pti); - */ - - Array4 const& jx_arr = local_jx[thread_num].array(); - Array4 const& jy_arr = local_jy[thread_num].array(); - Array4 const& jz_arr = local_jz[thread_num].array(); - - Print()<<"tbx"< Vx, Vy, Vz; - //pti.GetPosition(Vx, Vy, Vz); - //Real* AMREX_RESTRICT xp = Vx.dataPtr() + offset; - //Real* AMREX_RESTRICT zp = Vz.dataPtr() + offset; - - Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; - Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; - - Print()<<"np_to_depose "<charge; - Real* jx_ptr = local_jx[thread_num].dataPtr(); - Real* jy_ptr = local_jy[thread_num].dataPtr(); - Real* jz_ptr = local_jz[thread_num].dataPtr(); - - auto jxntot = local_jx[thread_num].length(); - auto jyntot = local_jy[thread_num].length(); - auto jzntot = local_jz[thread_num].length(); - - warpx_current_deposition( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &np_to_depose, - m_xp[thread_num].dataPtr() + offset, - m_yp[thread_num].dataPtr() + offset, - m_zp[thread_num].dataPtr() + offset, - uxp.dataPtr() + offset, - uyp.dataPtr() + offset, - uzp.dataPtr() + offset, - m_giv[thread_num].dataPtr() + offset, - wp.dataPtr() + offset, &this->charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dt, &dx[0], &dx[1], &dx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, - &lvect,&WarpX::current_deposition_algo); - } + // Lower corner of tile box physical domain + const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; + // xmin and zmin are built on pti.tilebox(), so it does + // not include staggering, so the stagger_shift has to be done by hand. + // Alternatively, we could define xminx from tbx (and the same for 3 + // directions and for jx, jy, jz). This way, sxo would not be needed. + // Better for memory? worth trying? + const Real xmin = xyzmin[0]; + const Real zmin = xyzmin[2]; + const Real stagger_shift = j_is_nodal ? 0.0 : 0.5; + + Array4 const& jx_arr = local_jx[thread_num].array(); + Array4 const& jy_arr = local_jy[thread_num].array(); + Array4 const& jz_arr = local_jz[thread_num].array(); + + const Dim3 lo = lbound(tilebox); + + Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; + Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; + + ParallelFor( np_to_depose, + [=] AMREX_GPU_DEVICE (long ip) { + Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + + Real vx = uxp[ip]*gaminv; + Real vy = uyp[ip]*gaminv; + Real vz = uzp[ip]*gaminv; + + Real wq=q*wp[ip]; + Real wqx=wq*invvol*vx; + Real wqy=wq*invvol*vy; + Real wqz=wq*invvol*vz; + + Real x = (Real) (xp[ip]-xmin)*dxi; + Real z = (Real) (zp[ip]-zmin)*dzi; + Real xmid=x-dts2dx*vx; + Real zmid=z-dts2dz*vz; + + int j = (int) xmid; + int l = (int) zmid; + int j0= (int) (xmid-stagger_shift); + int l0= (int) (zmid-stagger_shift); + + Real xint = xmid-j; + Real zint = zmid-l; + + Real sx[2] = {1.0 - xint, xint}; + Real sz[2] = {1.0 - zint, zint}; + + xint = xmid-stagger_shift-j0; + zint = zmid-stagger_shift-l0; + + Real sx0[2] = {1.0 - xint, xint}; + Real sz0[2] = {1.0 - zint, zint}; + + jx_arr(lo.x+j0 ,lo.y+l ,0) += sx0[0]*sz[0]*wqx; + jx_arr(lo.x+j0+1,lo.y+l ,0) += sx0[1]*sz[0]*wqx; + jx_arr(lo.x+j0 ,lo.y+l+1,0) += sx0[0]*sz[1]*wqx; + jx_arr(lo.x+j0+1,lo.y+l+1,0) += sx0[1]*sz[1]*wqx; + + jy_arr(lo.x+j ,lo.y+l ,0) += sx[0]*sz[0]*wqy; + jy_arr(lo.x+j+1,lo.y+l ,0) += sx[1]*sz[0]*wqy; + jy_arr(lo.x+j ,lo.y+l+1,0) += sx[0]*sz[1]*wqy; + jy_arr(lo.x+j+1,lo.y+l+1,0) += sx[1]*sz[1]*wqy; + + jz_arr(lo.x+j ,lo.y+l0 ,0) += sx[0]*sz0[0]*wqz; + jz_arr(lo.x+j+1,lo.y+l0 ,0) += sx[1]*sz0[0]*wqz; + jz_arr(lo.x+j ,lo.y+l0+1,0) += sx[0]*sz0[1]*wqz; + jz_arr(lo.x+j+1,lo.y+l0+1,0) += sx[1]*sz0[1]*wqz; + } + ); #ifdef WARPX_RZ // Rescale current in r-z mode warpx_current_deposition_rz_volume_scaling( -- cgit v1.2.3 From ffe120809d57edf2049f6266454638fe60795501 Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Wed, 10 Jul 2019 10:02:45 -0700 Subject: 2d and 3d version, order 1, with comments --- .../Physics_applications/uniform_plasma/inputs.2d | 35 ------- .../Physics_applications/uniform_plasma/inputs.3d | 9 +- Source/Particles/WarpXParticleContainer.cpp | 109 ++++++++++++++------- 3 files changed, 83 insertions(+), 70 deletions(-) delete mode 100644 Examples/Physics_applications/uniform_plasma/inputs.2d (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Examples/Physics_applications/uniform_plasma/inputs.2d b/Examples/Physics_applications/uniform_plasma/inputs.2d deleted file mode 100644 index 4304638dd..000000000 --- a/Examples/Physics_applications/uniform_plasma/inputs.2d +++ /dev/null @@ -1,35 +0,0 @@ -################################# -####### GENERAL PARAMETERS ###### -################################# -max_step = 10 -amr.n_cell = 128 128 -amr.max_grid_size = 64 -amr.blocking_factor = 32 -amr.max_level = 0 -geometry.coord_sys = 0 # 0: Cartesian -geometry.is_periodic = 1 1 # Is periodic? -geometry.prob_lo = -20.e-6 -20.e-6 # physical domain -geometry.prob_hi = 20.e-6 20.e-6 - -################################# -############ NUMERICS ########### -################################# -warpx.serialize_ics = 1 -warpx.verbose = 1 -warpx.cfl = 1.0 -amr.plot_int = 10 - -################################# -############ PLASMA ############# -################################# -particles.nspecies = 1 -particles.species_names = electrons - -electrons.charge = -q_e -electrons.mass = m_e -electrons.injection_style = "NUniformPerCell" -electrons.num_particles_per_cell_each_dim = 2 2 -electrons.profile = constant -electrons.density = 1.e25 # number of electrons per m^3 -electrons.momentum_distribution_type = "gaussian" -electrons.u_th = 0.01 # uth the std of the (unitless) momentum diff --git a/Examples/Physics_applications/uniform_plasma/inputs.3d b/Examples/Physics_applications/uniform_plasma/inputs.3d index 6792dac2c..20e47b0a2 100644 --- a/Examples/Physics_applications/uniform_plasma/inputs.3d +++ b/Examples/Physics_applications/uniform_plasma/inputs.3d @@ -19,6 +19,11 @@ warpx.verbose = 1 warpx.cfl = 1.0 amr.plot_int = 10 +algo.current_deposition = direct +interpolation.nox = 1 +interpolation.noy = 1 +interpolation.noz = 1 + ################################# ############ PLASMA ############# ################################# @@ -32,4 +37,6 @@ electrons.num_particles_per_cell_each_dim = 2 2 2 electrons.profile = constant electrons.density = 1.e25 # number of electrons per m^3 electrons.momentum_distribution_type = "gaussian" -electrons.u_th = 0.01 # uth the std of the (unitless) momentum +electrons.ux_th = 0.01 +electrons.uy_th = 0.01 +electrons.uz_th = 0.01 diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index cd0c37bd8..99596f8c3 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -366,9 +366,15 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, BL_PROFILE_VAR_START(blp_pxr_cd); Real dxi = 1.0/dx[0]; Real dzi = 1.0/dx[2]; - Real invvol = dxi*dzi; Real dts2dx = 0.5*dt*dxi; Real dts2dz = 0.5*dt*dzi; +#if (AMREX_SPACEDIM == 2) + Real invvol = dxi*dzi; +#else // (AMREX_SPACEDIM == 3) + Real dyi = 1.0/dx[1]; + Real dts2dy = 0.5*dt*dyi; + Real invvol = dxi*dyi*dzi; +#endif Real clightsq = 1.0/PhysConst::c/PhysConst::c; Real q = this->charge; @@ -392,59 +398,94 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; +#if (AMREX_SPACEDIM == 3) + Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset; + const Real ymin = xyzmin[1]; +#endif + ParallelFor( np_to_depose, [=] AMREX_GPU_DEVICE (long ip) { + + // macro-particle current in all 3D (even for 2D runs) + // wqx, wqy and wqz are the particle current in each + // direction Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + uyp[ip]*uyp[ip]*clightsq + uzp[ip]*uzp[ip]*clightsq); - + Real wq = q*wp[ip]; Real vx = uxp[ip]*gaminv; + Real wqx = wq*invvol*vx; Real vy = uyp[ip]*gaminv; + Real wqy = wq*invvol*vy; Real vz = uzp[ip]*gaminv; + Real wqz = wq*invvol*vz; + + // --- x direction + // particle position in cell unit (1/2 timestep back) + // Real x = (Real) (xp[ip]-xmin)*dxi; + // particle position in cell unit + // Real xmid=x-dts2dx*vx; + Real xmid= (Real) (xp[ip]-xmin)*dxi-dts2dx*vx; + // x index of cell containing particle (cell-centered) + int j = (int) xmid; + // x index of cell containing particle (node-centered) + int j0= (int) (xmid-stagger_shift); + // for the cell-centered quantities, compute current on + // each point + Real xint = xmid-j; + Real sx[2] = {1.0 - xint, xint}; + // for the node-centered quantities, compute current on + // each point + xint = xmid-stagger_shift-j0; + Real sx0[2] = {1.0 - xint, xint}; - Real wq=q*wp[ip]; - Real wqx=wq*invvol*vx; - Real wqy=wq*invvol*vy; - Real wqz=wq*invvol*vz; - - Real x = (Real) (xp[ip]-xmin)*dxi; +#if (AMREX_SPACEDIM == 3) + // --- y direction + Real y = (Real) (yp[ip]-ymin)*dyi; + Real ymid=y-dts2dy*vy; + int k = (int) ymid; + int k0= (int) (ymid-stagger_shift); + Real yint = ymid-k; + Real sy[2] = {1.0 - yint, yint}; + yint = ymid-stagger_shift-k0; + Real sy0[2] = {1.0 - yint, yint}; +#endif + // --- z direction Real z = (Real) (zp[ip]-zmin)*dzi; - Real xmid=x-dts2dx*vx; Real zmid=z-dts2dz*vz; - - int j = (int) xmid; int l = (int) zmid; - int j0= (int) (xmid-stagger_shift); int l0= (int) (zmid-stagger_shift); - - Real xint = xmid-j; Real zint = zmid-l; - - Real sx[2] = {1.0 - xint, xint}; Real sz[2] = {1.0 - zint, zint}; - - xint = xmid-stagger_shift-j0; zint = zmid-stagger_shift-l0; - - Real sx0[2] = {1.0 - xint, xint}; Real sz0[2] = {1.0 - zint, zint}; - jx_arr(lo.x+j0 ,lo.y+l ,0) += sx0[0]*sz[0]*wqx; - jx_arr(lo.x+j0+1,lo.y+l ,0) += sx0[1]*sz[0]*wqx; - jx_arr(lo.x+j0 ,lo.y+l+1,0) += sx0[0]*sz[1]*wqx; - jx_arr(lo.x+j0+1,lo.y+l+1,0) += sx0[1]*sz[1]*wqx; - - jy_arr(lo.x+j ,lo.y+l ,0) += sx[0]*sz[0]*wqy; - jy_arr(lo.x+j+1,lo.y+l ,0) += sx[1]*sz[0]*wqy; - jy_arr(lo.x+j ,lo.y+l+1,0) += sx[0]*sz[1]*wqy; - jy_arr(lo.x+j+1,lo.y+l+1,0) += sx[1]*sz[1]*wqy; - - jz_arr(lo.x+j ,lo.y+l0 ,0) += sx[0]*sz0[0]*wqz; - jz_arr(lo.x+j+1,lo.y+l0 ,0) += sx[1]*sz0[0]*wqz; - jz_arr(lo.x+j ,lo.y+l0+1,0) += sx[0]*sz0[1]*wqz; - jz_arr(lo.x+j+1,lo.y+l0+1,0) += sx[1]*sz0[1]*wqz; + // Real xbounds[2], ybounds[2], zbounds[2]; + // compute_shape_factor(xbounds, sx, xint); + // deposit_current() + +#if (AMREX_SPACEDIM == 2) + for (int iz=0; iz<2; iz++){ + for (int ix=0; ix<2; ix++){ + jx_arr(lo.x+j0+ix ,lo.y+l +iz ,0) += sx0[ix]*sz [iz]*wqx; + jy_arr(lo.x+j +ix ,lo.y+l +iz ,0) += sx [ix]*sz [iz]*wqy; + jz_arr(lo.x+j +ix ,lo.y+l0+iz ,0) += sx [ix]*sz0[iz]*wqz; + } + } +#else // (AMREX_SPACEDIM == 3) + for (int iz=0; iz<2; iz++){ + for (int iy=0; iy<2; iy++){ + for (int ix=0; ix<2; ix++){ + jx_arr(lo.x+j0+ix ,lo.y+k +iy ,lo.z+l +iz ) += sx0[ix]*sy [iy]*sz [iz]*wqx; + jy_arr(lo.x+j +ix ,lo.y+k0+iy ,lo.z+l +iz ) += sx [ix]*sy0[iy]*sz [iz]*wqy; + jz_arr(lo.x+j +ix ,lo.y+k +iy ,lo.z+l0+iz ) += sx [ix]*sy [iy]*sz0[iz]*wqz; + } + } + } +#endif } ); + #ifdef WARPX_RZ // Rescale current in r-z mode warpx_current_deposition_rz_volume_scaling( -- cgit v1.2.3 From c81d4af3bfcc4e2df4e012f8259fc26ecdb00f3a Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Wed, 10 Jul 2019 18:48:00 -0700 Subject: generic solution OK for 2/3D order 1 and 3. Issues with order 2 --- Source/Particles/Deposition/ComputeShapeFactors.H | 232 ++++++++++++++++++++++ Source/Particles/Deposition/Make.package | 3 + Source/Particles/Make.package | 1 + Source/Particles/WarpXParticleContainer.H | 4 +- Source/Particles/WarpXParticleContainer.cpp | 171 +++++----------- 5 files changed, 286 insertions(+), 125 deletions(-) create mode 100644 Source/Particles/Deposition/ComputeShapeFactors.H create mode 100644 Source/Particles/Deposition/Make.package (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/Deposition/ComputeShapeFactors.H b/Source/Particles/Deposition/ComputeShapeFactors.H new file mode 100644 index 000000000..22e6fb6a5 --- /dev/null +++ b/Source/Particles/Deposition/ComputeShapeFactors.H @@ -0,0 +1,232 @@ +#ifndef COMPUTESHAPEFACTORS_H_ +#define COMPUTESHAPEFACTORS_H_ + +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include + +//#include +//#include +//#include +//#include +//#include + +//#include +//#include +//#include +//#include +//#include +// #include +// #include +//#include "WarpXParticleContainer.H" +//#include +//#include + +using namespace amrex; + + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo(amrex::Dim3& shape_lo) {}; + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo <1> (amrex::Dim3& shape_lo) { + shape_lo = {0, 0, 0}; +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo <2> (amrex::Dim3& shape_lo) { + shape_lo = {-1, -1, -1}; +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo <3> (amrex::Dim3& shape_lo) { + shape_lo = {-1, -1, -1}; +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor(Real* sx, Real xint) {}; + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor <1> (Real* sx, Real xint){ + sx[0] = 1.0 - xint; + sx[1] = xint; +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor <2> (Real* sx, Real xint){ + Print()<<"compute_shape_factor <2>\n"; + sx[0] = 0.5*(0.5-xint)*(0.5-xint); + sx[1] = 0.75-xint*xint; + sx[2] = 0.5*(0.5+xint)*(0.5+xint); +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor <3> (Real* sx, Real xint){ + Print()<<"compute_shape_factor <2>\n"; + sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); + sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0); + sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); + sx[3] = 1.0/6.0*xint*xint*xint; +} + +template +void doDepositionShapeN(Real* xp, Real* yp, Real* zp, + Real* wp, Real* uxp, + Real* uyp, Real* uzp, + const amrex::Array4& jx_arr, + const amrex::Array4& jy_arr, + const amrex::Array4& jz_arr, + const long offset, const long np_to_depose, + const amrex::Real dt, const std::array& dx, + const std::array xyzmin, + const Dim3 lo, + const amrex::Real stagger_shift, + const amrex::Real q) +{ + Real dxi = 1.0/dx[0]; + Real dzi = 1.0/dx[2]; + Real dts2dx = 0.5*dt*dxi; + Real dts2dz = 0.5*dt*dzi; +#if (AMREX_SPACEDIM == 2) + Real invvol = dxi*dzi; +#else // (AMREX_SPACEDIM == 3) + Real dyi = 1.0/dx[1]; + Real dts2dy = 0.5*dt*dyi; + Real invvol = dxi*dyi*dzi; +#endif + + const Real xmin = xyzmin[0]; + const Real ymin = xyzmin[1]; + const Real zmin = xyzmin[2]; + + Print()<<"toto\n"; + + Dim3 shape_lo; + compute_shape_factor_lo(shape_lo); + Real clightsq = 1.0/PhysConst::c/PhysConst::c; + ParallelFor( np_to_depose, + [=] AMREX_GPU_DEVICE (long ip) { + + // macro-particle current in all 3D (even for 2D runs) + // wqx, wqy and wqz are the particle current in each + // direction + Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + Real wq = q*wp[ip]; + Real vx = uxp[ip]*gaminv; + Real wqx = wq*invvol*vx; + Real vy = uyp[ip]*gaminv; + Real wqy = wq*invvol*vy; + Real vz = uzp[ip]*gaminv; + Real wqz = wq*invvol*vz; + + // --- x direction + // particle position in cell unit (1/2 timestep back) + // Real x = (Real) (xp[ip]-xmin)*dxi; + // particle position in cell unit + // Real xmid=x-dts2dx*vx; + Real xmid= (Real) (xp[ip]-xmin)*dxi-dts2dx*vx; + // x index of cell containing particle (cell-centered) + int j = (int) xmid; + // x index of cell containing particle (node-centered) + int j0= (int) (xmid-stagger_shift); + // for the cell-centered quantities, compute current on + // each point + Real xint = xmid-j; + Real sx[depos_order + 1], sx0[depos_order + 1]; + // Real sx[2] = {1.0 - xint, xint}; + compute_shape_factor(sx, xint); + // for the node-centered quantities, compute current on + // each point + xint = xmid-stagger_shift-j0; + compute_shape_factor(sx0, xint); + // Real sx0[2] = {1.0 - xint, xint}; + // compute_shape_factor(sx0, xint); + +#if (AMREX_SPACEDIM == 3) + // --- y direction + Real y = (Real) (yp[ip]-ymin)*dyi; + Real ymid=y-dts2dy*vy; + int k = (int) ymid; + int k0= (int) (ymid-stagger_shift); + Real yint = ymid-k; + Real sy[depos_order + 1], sy0[depos_order + 1]; + compute_shape_factor(sy, yint); + // Real sy[2] = {1.0 - yint, yint}; + yint = ymid-stagger_shift-k0; + compute_shape_factor(sy0, yint); + //Real sy0[2] = {1.0 - yint, yint}; + // compute_shape_factor(shape_lo, shape_hi, sy, yint); +#endif + // --- z direction + Real z = (Real) (zp[ip]-zmin)*dzi; + Real zmid=z-dts2dz*vz; + int l = (int) zmid; + int l0= (int) (zmid-stagger_shift); + Real zint = zmid-l; + Real sz[depos_order + 1], sz0[depos_order + 1]; + compute_shape_factor(sz, zint); + // Real sz[2] = {1.0 - zint, zint}; + zint = zmid-stagger_shift-l0; + compute_shape_factor(sz0, zint); + // Real sz0[2] = {1.0 - zint, zint}; + // compute_shape_factor(shape_lo, shape_hi, sz, zint); +/* + #if (AMREX_SPACEDIM == 2) + for (int iz=shape_lo.y; iz<=shape_hi.y; iz++){ + for (int ix=shape_lo.x; ix<=shape_hi.x; ix++){ + jx_arr(lo.x+j0+ix ,lo.y+l +iz ,0) += sx0[ix]*sz [iz]*wqx; + jy_arr(lo.x+j +ix ,lo.y+l +iz ,0) += sx [ix]*sz [iz]*wqy; + jz_arr(lo.x+j +ix ,lo.y+l0+iz ,0) += sx [ix]*sz0[iz]*wqz; + } + } + #else // (AMREX_SPACEDIM == 3) + for (int iz=shape_lo.z; iz<=shape_hi.z; iz++){ + for (int iy=shape_lo.y; iy<=shape_hi.y; iy++){ + for (int ix=shape_lo.x; ix<=shape_hi.x; ix++){ + jx_arr(lo.x+j0+ix ,lo.y+k +iy ,lo.z+l +iz ) += sx0[ix]*sy [iy]*sz [iz]*wqx; + jy_arr(lo.x+j +ix ,lo.y+k0+iy ,lo.z+l +iz ) += sx [ix]*sy0[iy]*sz [iz]*wqy; + jz_arr(lo.x+j +ix ,lo.y+k +iy ,lo.z+l0+iz ) += sx [ix]*sy [iy]*sz0[iz]*wqz; + } + } + } + #endif +*/ +#if (AMREX_SPACEDIM == 2) + for (int iz=0; iz<=depos_order; iz++){ + for (int ix=0; ix<=depos_order; ix++){ + jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx0[ix]*sz [iz]*wqx; + jy_arr(lo.x+j +ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx [ix]*sz [iz]*wqy; + jz_arr(lo.x+j +ix+shape_lo.x, lo.y+l0+iz+shape_lo.y, 0) += sx [ix]*sz0[iz]*wqz; + } + } +#else // (AMREX_SPACEDIM == 3) + for (int iz=0; iz<=depos_order; iz++){ + for (int iy=0; iy<=depos_order; iy++){ + for (int ix=0; ix<=depos_order; ix++){ + jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx0[ix]*sy [iy]*sz [iz]*wqx; + jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx [ix]*sy0[iy]*sz [iz]*wqy; + jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z) += sx [ix]*sy [iy]*sz0[iz]*wqz; + } + } + } +#endif + } + ); +} + +#endif // COMPUTESHAPEFACTORS_H_ diff --git a/Source/Particles/Deposition/Make.package b/Source/Particles/Deposition/Make.package new file mode 100644 index 000000000..7eb4a5660 --- /dev/null +++ b/Source/Particles/Deposition/Make.package @@ -0,0 +1,3 @@ +CEXE_headers += ComputeShapeFactors.H +INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Deposition +VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles/Deposition diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package index f33095738..2038472a1 100644 --- a/Source/Particles/Make.package +++ b/Source/Particles/Make.package @@ -11,6 +11,7 @@ CEXE_headers += RigidInjectedParticleContainer.H CEXE_headers += PhysicalParticleContainer.H include $(WARPX_HOME)/Source/Particles/Pusher/Make.package +include $(WARPX_HOME)/Source/Particles/Deposition/Make.package INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H index 0e800bf1d..6182ec5bd 100644 --- a/Source/Particles/WarpXParticleContainer.H +++ b/Source/Particles/WarpXParticleContainer.H @@ -253,8 +253,8 @@ public: AddIntComp(comm); } - int DoBoostedFrameDiags () const { return do_boosted_frame_diags; } - + int DoBoostedFrameDiags () const { return do_boosted_frame_diags; } + protected: std::map particle_comps; diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 99596f8c3..93940b4f0 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -10,6 +10,7 @@ // Import low-level single-particle kernels #include #include +#include using namespace amrex; @@ -279,6 +280,22 @@ WarpXParticleContainer::AddNParticles (int lev, Redistribute(); } +//doDepositionShapeN<1>(wp, uxp, uyp, uzp, jx_arr, jy_arr, jz_arr, +// offset, np_to_depose, dt, dx, +// tilebox, stagger_shift, q); + +// template +/* +void WarpXParticleContainer::doDepositionShapeN(RealVector& wp, RealVector& uxp, + RealVector& uyp, RealVector& uzp, + Array4& jx_arr, + Array4& jy_arr, + Array4& jz_arr, + const long offset, const long np_to_depose, + const Real dt, const std::array& dx, + const Box tilebox, const Real stagger_shift, + const Real q) +*/ /* \brief Current Deposition for thread thread_num * \param pti : Particle iterator * \param wp : Array of particle weights @@ -311,6 +328,8 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, const long ngJ = jx->nGrow(); const std::array& dx = WarpX::CellSize(std::max(depos_lev,0)); int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal(); + Real q = this->charge; + const Real stagger_shift = j_is_nodal ? 0.0 : 0.5; BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); @@ -330,19 +349,14 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Box tbx = convert(tilebox, WarpX::jx_nodal_flag); Box tby = convert(tilebox, WarpX::jy_nodal_flag); Box tbz = convert(tilebox, WarpX::jz_nodal_flag); - tilebox.grow(ngJ); #ifdef AMREX_USE_GPU // No tiling on GPU: jx_ptr points to the full // jx array (same for jy_ptr and jz_ptr). - Real* jx_ptr = (*jx)[pti].dataPtr(); - Real* jy_ptr = (*jy)[pti].dataPtr(); - Real* jz_ptr = (*jz)[pti].dataPtr(); - - auto jxntot = jx[pti].length(); - auto jyntot = jy[pti].length(); - auto jzntot = jz[pti].length(); + Array4 const& jx_arr = jx.array(pti); + Array4 const& jy_arr = jy.array(pti); + Array4 const& jz_arr = jz.array(pti); #else // Tiling is on: jx_ptr points to local_jx[thread_num] // (same for jy_ptr and jz_ptr) @@ -359,132 +373,41 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, local_jy[thread_num].setVal(0.0); local_jz[thread_num].setVal(0.0); + Array4 const& jx_arr = local_jx[thread_num].array(); + Array4 const& jy_arr = local_jy[thread_num].array(); + Array4 const& jz_arr = local_jz[thread_num].array(); #endif // GPU, no tiling: deposit directly in jx // CPU, tiling: deposit into local_jx // (same for jx and jz) BL_PROFILE_VAR_START(blp_pxr_cd); - Real dxi = 1.0/dx[0]; - Real dzi = 1.0/dx[2]; - Real dts2dx = 0.5*dt*dxi; - Real dts2dz = 0.5*dt*dzi; -#if (AMREX_SPACEDIM == 2) - Real invvol = dxi*dzi; -#else // (AMREX_SPACEDIM == 3) - Real dyi = 1.0/dx[1]; - Real dts2dy = 0.5*dt*dyi; - Real invvol = dxi*dyi*dzi; -#endif - Real clightsq = 1.0/PhysConst::c/PhysConst::c; - Real q = this->charge; - - // Lower corner of tile box physical domain - const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; - // xmin and zmin are built on pti.tilebox(), so it does - // not include staggering, so the stagger_shift has to be done by hand. - // Alternatively, we could define xminx from tbx (and the same for 3 - // directions and for jx, jy, jz). This way, sxo would not be needed. - // Better for memory? worth trying? - const Real xmin = xyzmin[0]; - const Real zmin = xyzmin[2]; - const Real stagger_shift = j_is_nodal ? 0.0 : 0.5; - - Array4 const& jx_arr = local_jx[thread_num].array(); - Array4 const& jy_arr = local_jy[thread_num].array(); - Array4 const& jz_arr = local_jz[thread_num].array(); - - const Dim3 lo = lbound(tilebox); Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; - -#if (AMREX_SPACEDIM == 3) Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset; - const Real ymin = xyzmin[1]; -#endif - ParallelFor( np_to_depose, - [=] AMREX_GPU_DEVICE (long ip) { - - // macro-particle current in all 3D (even for 2D runs) - // wqx, wqy and wqz are the particle current in each - // direction - Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq - + uyp[ip]*uyp[ip]*clightsq - + uzp[ip]*uzp[ip]*clightsq); - Real wq = q*wp[ip]; - Real vx = uxp[ip]*gaminv; - Real wqx = wq*invvol*vx; - Real vy = uyp[ip]*gaminv; - Real wqy = wq*invvol*vy; - Real vz = uzp[ip]*gaminv; - Real wqz = wq*invvol*vz; - - // --- x direction - // particle position in cell unit (1/2 timestep back) - // Real x = (Real) (xp[ip]-xmin)*dxi; - // particle position in cell unit - // Real xmid=x-dts2dx*vx; - Real xmid= (Real) (xp[ip]-xmin)*dxi-dts2dx*vx; - // x index of cell containing particle (cell-centered) - int j = (int) xmid; - // x index of cell containing particle (node-centered) - int j0= (int) (xmid-stagger_shift); - // for the cell-centered quantities, compute current on - // each point - Real xint = xmid-j; - Real sx[2] = {1.0 - xint, xint}; - // for the node-centered quantities, compute current on - // each point - xint = xmid-stagger_shift-j0; - Real sx0[2] = {1.0 - xint, xint}; + // Lower corner of tile box physical domain + const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; + // xyzmin is built on pti.tilebox(), so it does + // not include staggering, so the stagger_shift has to be done by hand. + // Alternatively, we could define xyzminx from tbx (and the same for 3 + // directions and for jx, jy, jz). This way, sx0 would not be needed. + // Better for memory? worth trying? + const Dim3 lo = lbound(tilebox); -#if (AMREX_SPACEDIM == 3) - // --- y direction - Real y = (Real) (yp[ip]-ymin)*dyi; - Real ymid=y-dts2dy*vy; - int k = (int) ymid; - int k0= (int) (ymid-stagger_shift); - Real yint = ymid-k; - Real sy[2] = {1.0 - yint, yint}; - yint = ymid-stagger_shift-k0; - Real sy0[2] = {1.0 - yint, yint}; -#endif - // --- z direction - Real z = (Real) (zp[ip]-zmin)*dzi; - Real zmid=z-dts2dz*vz; - int l = (int) zmid; - int l0= (int) (zmid-stagger_shift); - Real zint = zmid-l; - Real sz[2] = {1.0 - zint, zint}; - zint = zmid-stagger_shift-l0; - Real sz0[2] = {1.0 - zint, zint}; - - // Real xbounds[2], ybounds[2], zbounds[2]; - // compute_shape_factor(xbounds, sx, xint); - // deposit_current() - -#if (AMREX_SPACEDIM == 2) - for (int iz=0; iz<2; iz++){ - for (int ix=0; ix<2; ix++){ - jx_arr(lo.x+j0+ix ,lo.y+l +iz ,0) += sx0[ix]*sz [iz]*wqx; - jy_arr(lo.x+j +ix ,lo.y+l +iz ,0) += sx [ix]*sz [iz]*wqy; - jz_arr(lo.x+j +ix ,lo.y+l0+iz ,0) += sx [ix]*sz0[iz]*wqz; - } - } -#else // (AMREX_SPACEDIM == 3) - for (int iz=0; iz<2; iz++){ - for (int iy=0; iy<2; iy++){ - for (int ix=0; ix<2; ix++){ - jx_arr(lo.x+j0+ix ,lo.y+k +iy ,lo.z+l +iz ) += sx0[ix]*sy [iy]*sz [iz]*wqx; - jy_arr(lo.x+j +ix ,lo.y+k0+iy ,lo.z+l +iz ) += sx [ix]*sy0[iy]*sz [iz]*wqy; - jz_arr(lo.x+j +ix ,lo.y+k +iy ,lo.z+l0+iz ) += sx [ix]*sy [iy]*sz0[iz]*wqz; - } - } - } -#endif - } - ); + if (WarpX::nox == 1){ + doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, + offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } else if (WarpX::nox == 2){ + doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, + offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } else if (WarpX::nox == 3){ + doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, + offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } #ifdef WARPX_RZ // Rescale current in r-z mode @@ -1029,3 +952,5 @@ WarpXParticleContainer::particlePostLocate(ParticleType& p, if (pld.m_lev == lev-1){ } } + + -- cgit v1.2.3 From f00c210bcf2e5c7a44f5cbe15f8767b3ec9973e0 Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Wed, 10 Jul 2019 18:57:42 -0700 Subject: rename kernel file --- Source/Particles/Deposition/ComputeShapeFactors.H | 195 ---------------------- Source/Particles/Deposition/CurrentDeposition.H | 195 ++++++++++++++++++++++ Source/Particles/WarpXParticleContainer.cpp | 2 +- 3 files changed, 196 insertions(+), 196 deletions(-) delete mode 100644 Source/Particles/Deposition/ComputeShapeFactors.H create mode 100644 Source/Particles/Deposition/CurrentDeposition.H (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/Deposition/ComputeShapeFactors.H b/Source/Particles/Deposition/ComputeShapeFactors.H deleted file mode 100644 index 283fc5160..000000000 --- a/Source/Particles/Deposition/ComputeShapeFactors.H +++ /dev/null @@ -1,195 +0,0 @@ -#ifndef COMPUTESHAPEFACTORS_H_ -#define COMPUTESHAPEFACTORS_H_ - -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include - -//#include -//#include -//#include -//#include -//#include - -//#include -//#include -//#include -//#include -//#include -// #include -// #include -//#include "WarpXParticleContainer.H" -//#include -//#include - -using namespace amrex; - - -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor_lo(amrex::Dim3& shape_lo) {}; - -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor_lo <1> (amrex::Dim3& shape_lo) { - shape_lo = {0, 0, 0}; -} - -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor_lo <2> (amrex::Dim3& shape_lo) { - shape_lo = {-1, -1, -1}; -} - -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor_lo <3> (amrex::Dim3& shape_lo) { - shape_lo = {-1, -1, -1}; -} - -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor(Real* sx, Real xint) {}; - -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor <1> (Real* sx, Real xint){ - sx[0] = 1.0 - xint; - sx[1] = xint; -} - -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor <2> (Real* sx, Real xint){ - Print()<<"compute_shape_factor <2>\n"; - sx[0] = 0.5*(0.5-xint)*(0.5-xint); - sx[1] = 0.75-xint*xint; - sx[2] = 0.5*(0.5+xint)*(0.5+xint); -} - -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor <3> (Real* sx, Real xint){ - Print()<<"compute_shape_factor <2>\n"; - sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); - sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0); - sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); - sx[3] = 1.0/6.0*xint*xint*xint; -} - -template -void doDepositionShapeN(Real* xp, Real* yp, Real* zp, - Real* wp, Real* uxp, - Real* uyp, Real* uzp, - const amrex::Array4& jx_arr, - const amrex::Array4& jy_arr, - const amrex::Array4& jz_arr, - const long offset, const long np_to_depose, - const amrex::Real dt, const std::array& dx, - const std::array xyzmin, - const Dim3 lo, - const amrex::Real stagger_shift, - const amrex::Real q) -{ - Real dxi = 1.0/dx[0]; - Real dzi = 1.0/dx[2]; - Real dts2dx = 0.5*dt*dxi; - Real dts2dz = 0.5*dt*dzi; -#if (AMREX_SPACEDIM == 2) - Real invvol = dxi*dzi; -#else // (AMREX_SPACEDIM == 3) - Real dyi = 1.0/dx[1]; - Real dts2dy = 0.5*dt*dyi; - Real invvol = dxi*dyi*dzi; -#endif - - const Real xmin = xyzmin[0]; - const Real ymin = xyzmin[1]; - const Real zmin = xyzmin[2]; - - Dim3 shape_lo; - compute_shape_factor_lo(shape_lo); - Real clightsq = 1.0/PhysConst::c/PhysConst::c; - - ParallelFor( np_to_depose, - [=] AMREX_GPU_DEVICE (long ip) { - - // macro-particle current in all 3D (even for 2D runs) - // wqx, wqy and wqz are the particle current in each - // direction - Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq - + uyp[ip]*uyp[ip]*clightsq - + uzp[ip]*uzp[ip]*clightsq); - Real wq = q*wp[ip]; - Real vx = uxp[ip]*gaminv; - Real wqx = wq*invvol*vx; - Real vy = uyp[ip]*gaminv; - Real wqy = wq*invvol*vy; - Real vz = uzp[ip]*gaminv; - Real wqz = wq*invvol*vz; - - // --- x direction - // Get particle position after 1/2 push back in position - Real xmid= (Real) (xp[ip]-xmin)*dxi-dts2dx*vx; - // Initialize arrays for node-centered and cell-centered - // shape factors - Real sx[depos_order + 1], sx0[depos_order + 1]; - // Compute shape factors for node-centered quantities - int j = (int) xmid; - Real xint = xmid-j; - compute_shape_factor(sx, xint); - // Compute shape factors for cell-centered quantities - int j0= (int) (xmid-stagger_shift); - xint = xmid-stagger_shift-j0; - compute_shape_factor(sx0, xint); - -#if (AMREX_SPACEDIM == 3) - // --- y direction - Real ymid= (Real) (yp[ip]-ymin)*dyi-dts2dy*vy; - Real sy[depos_order + 1], sy0[depos_order + 1]; - int k = (int) ymid; - Real yint = ymid-k; - compute_shape_factor(sy, yint); - int k0= (int) (ymid-stagger_shift); - yint = ymid-stagger_shift-k0; - compute_shape_factor(sy0, yint); -#endif - // --- z direction - Real zmid=(Real) (zp[ip]-zmin)*dzi-dts2dz*vz; - Real sz[depos_order + 1], sz0[depos_order + 1]; - int l = (int) zmid; - Real zint = zmid-l; - compute_shape_factor(sz, zint); - int l0= (int) (zmid-stagger_shift); - zint = zmid-stagger_shift-l0; - compute_shape_factor(sz0, zint); - -#if (AMREX_SPACEDIM == 2) - for (int iz=0; iz<=depos_order; iz++){ - for (int ix=0; ix<=depos_order; ix++){ - jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx0[ix]*sz [iz]*wqx; - jy_arr(lo.x+j +ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx [ix]*sz [iz]*wqy; - jz_arr(lo.x+j +ix+shape_lo.x, lo.y+l0+iz+shape_lo.y, 0) += sx [ix]*sz0[iz]*wqz; - } - } -#else // (AMREX_SPACEDIM == 3) - for (int iz=0; iz<=depos_order; iz++){ - for (int iy=0; iy<=depos_order; iy++){ - for (int ix=0; ix<=depos_order; ix++){ - jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx0[ix]*sy [iy]*sz [iz]*wqx; - jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx [ix]*sy0[iy]*sz [iz]*wqy; - jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z) += sx [ix]*sy [iy]*sz0[iz]*wqz; - } - } - } -#endif - } - ); -} - -#endif // COMPUTESHAPEFACTORS_H_ diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H new file mode 100644 index 000000000..5e73dd9e0 --- /dev/null +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -0,0 +1,195 @@ +#ifndef CURRENTDEPOSITION_H_ +#define CURRENTDEPOSITION_H_ + +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include + +//#include +//#include +//#include +//#include +//#include + +//#include +//#include +//#include +//#include +//#include +// #include +// #include +//#include "WarpXParticleContainer.H" +//#include +//#include + +using namespace amrex; + + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo(amrex::Dim3& shape_lo) {}; + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo <1> (amrex::Dim3& shape_lo) { + shape_lo = {0, 0, 0}; +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo <2> (amrex::Dim3& shape_lo) { + shape_lo = {-1, -1, -1}; +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo <3> (amrex::Dim3& shape_lo) { + shape_lo = {-1, -1, -1}; +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor(Real* sx, Real xint) {}; + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor <1> (Real* sx, Real xint){ + sx[0] = 1.0 - xint; + sx[1] = xint; +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor <2> (Real* sx, Real xint){ + Print()<<"compute_shape_factor <2>\n"; + sx[0] = 0.5*(0.5-xint)*(0.5-xint); + sx[1] = 0.75-xint*xint; + sx[2] = 0.5*(0.5+xint)*(0.5+xint); +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor <3> (Real* sx, Real xint){ + Print()<<"compute_shape_factor <2>\n"; + sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); + sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0); + sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); + sx[3] = 1.0/6.0*xint*xint*xint; +} + +template +void doDepositionShapeN(Real* xp, Real* yp, Real* zp, + Real* wp, Real* uxp, + Real* uyp, Real* uzp, + const amrex::Array4& jx_arr, + const amrex::Array4& jy_arr, + const amrex::Array4& jz_arr, + const long offset, const long np_to_depose, + const amrex::Real dt, const std::array& dx, + const std::array xyzmin, + const Dim3 lo, + const amrex::Real stagger_shift, + const amrex::Real q) +{ + Real dxi = 1.0/dx[0]; + Real dzi = 1.0/dx[2]; + Real dts2dx = 0.5*dt*dxi; + Real dts2dz = 0.5*dt*dzi; +#if (AMREX_SPACEDIM == 2) + Real invvol = dxi*dzi; +#else // (AMREX_SPACEDIM == 3) + Real dyi = 1.0/dx[1]; + Real dts2dy = 0.5*dt*dyi; + Real invvol = dxi*dyi*dzi; +#endif + + const Real xmin = xyzmin[0]; + const Real ymin = xyzmin[1]; + const Real zmin = xyzmin[2]; + + Dim3 shape_lo; + compute_shape_factor_lo(shape_lo); + Real clightsq = 1.0/PhysConst::c/PhysConst::c; + + ParallelFor( np_to_depose, + [=] AMREX_GPU_DEVICE (long ip) { + + // macro-particle current in all 3D (even for 2D runs) + // wqx, wqy and wqz are the particle current in each + // direction + Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + Real wq = q*wp[ip]; + Real vx = uxp[ip]*gaminv; + Real wqx = wq*invvol*vx; + Real vy = uyp[ip]*gaminv; + Real wqy = wq*invvol*vy; + Real vz = uzp[ip]*gaminv; + Real wqz = wq*invvol*vz; + + // --- x direction + // Get particle position after 1/2 push back in position + Real xmid= (Real) (xp[ip]-xmin)*dxi-dts2dx*vx; + // Initialize arrays for node-centered and cell-centered + // shape factors + Real sx[depos_order + 1], sx0[depos_order + 1]; + // Compute shape factors for node-centered quantities + int j = (int) xmid; + Real xint = xmid-j; + compute_shape_factor(sx, xint); + // Compute shape factors for cell-centered quantities + int j0= (int) (xmid-stagger_shift); + xint = xmid-stagger_shift-j0; + compute_shape_factor(sx0, xint); + +#if (AMREX_SPACEDIM == 3) + // --- y direction + Real ymid= (Real) (yp[ip]-ymin)*dyi-dts2dy*vy; + Real sy[depos_order + 1], sy0[depos_order + 1]; + int k = (int) ymid; + Real yint = ymid-k; + compute_shape_factor(sy, yint); + int k0= (int) (ymid-stagger_shift); + yint = ymid-stagger_shift-k0; + compute_shape_factor(sy0, yint); +#endif + // --- z direction + Real zmid=(Real) (zp[ip]-zmin)*dzi-dts2dz*vz; + Real sz[depos_order + 1], sz0[depos_order + 1]; + int l = (int) zmid; + Real zint = zmid-l; + compute_shape_factor(sz, zint); + int l0= (int) (zmid-stagger_shift); + zint = zmid-stagger_shift-l0; + compute_shape_factor(sz0, zint); + +#if (AMREX_SPACEDIM == 2) + for (int iz=0; iz<=depos_order; iz++){ + for (int ix=0; ix<=depos_order; ix++){ + jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx0[ix]*sz [iz]*wqx; + jy_arr(lo.x+j +ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx [ix]*sz [iz]*wqy; + jz_arr(lo.x+j +ix+shape_lo.x, lo.y+l0+iz+shape_lo.y, 0) += sx [ix]*sz0[iz]*wqz; + } + } +#else // (AMREX_SPACEDIM == 3) + for (int iz=0; iz<=depos_order; iz++){ + for (int iy=0; iy<=depos_order; iy++){ + for (int ix=0; ix<=depos_order; ix++){ + jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx0[ix]*sy [iy]*sz [iz]*wqx; + jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx [ix]*sy0[iy]*sz [iz]*wqy; + jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z) += sx [ix]*sy [iy]*sz0[iz]*wqz; + } + } + } +#endif + } + ); +} + +#endif // CURRENTDEPOSITION_H_ diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 93940b4f0..eec633a37 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -10,7 +10,7 @@ // Import low-level single-particle kernels #include #include -#include +#include using namespace amrex; -- cgit v1.2.3 From 5baf28e5115f450d4f2fd821c33df5a37054792d Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Thu, 11 Jul 2019 09:07:26 -0700 Subject: remove print statements --- Source/Particles/PhysicalParticleContainer.cpp | 1 - Source/Particles/WarpXParticleContainer.cpp | 4 ---- 2 files changed, 5 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index b78d36c87..df08d77a7 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -1530,7 +1530,6 @@ PhysicalParticleContainer::Evolve (int lev, // Current Deposition // // Deposit inside domains - Print()<<"np_current "< Date: Thu, 11 Jul 2019 16:05:16 -0400 Subject: fix code for GPU. OK to machine precision for o1-2-3 --- Source/Particles/Deposition/CurrentDeposition.H | 12 ++++++--- Source/Particles/WarpXParticleContainer.cpp | 12 ++++----- Source/WarpX.H | 34 ++++++++++++------------- 3 files changed, 32 insertions(+), 26 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index 416cecd7b..cf6fe0d32 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -77,6 +77,8 @@ void doDepositionShapeN(Real* xp, Real* yp, Real* zp, const amrex::Real stagger_shift, const amrex::Real q) { + BL_PROFILE("PICSAR::CurrentDeposition"); + Real dxi = 1.0/dx[0]; Real dzi = 1.0/dx[2]; Real dts2dx = 0.5*dt*dxi; @@ -150,9 +152,13 @@ void doDepositionShapeN(Real* xp, Real* yp, Real* zp, for (int iz=0; iz<=depos_order; iz++){ for (int iy=0; iy<=depos_order; iy++){ for (int ix=0; ix<=depos_order; ix++){ - jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx0[ix]*sy [iy]*sz [iz]*wqx; - jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx [ix]*sy0[iy]*sz [iz]*wqy; - jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z) += sx [ix]*sy [iy]*sz0[iz]*wqz; + amrex::Gpu::Atomic::Add(&jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z), sx0[ix]*sy [iy]*sz [iz]*wqx); + amrex::Gpu::Atomic::Add(&jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z), sx [ix]*sy0[iy]*sz [iz]*wqy); + amrex::Gpu::Atomic::Add(&jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z), sx [ix]*sy [iy]*sz0[iz]*wqz); + // amrex::Gpu::Atomic::Add(&jx(j0+joff,k +koff,l +loff), sx0[joff]*sy [koff]*sz [loff]*wqx); + // jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx0[ix]*sy [iy]*sz [iz]*wqx; + // jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx [ix]*sy0[iy]*sz [iz]*wqy; + // jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z) += sx [ix]*sy [iy]*sz0[iz]*wqz; } } } diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 8f13b1b3e..6ea36c841 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -331,7 +331,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Real q = this->charge; const Real stagger_shift = j_is_nodal ? 0.0 : 0.5; - BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); // Get tile box where current is deposited. @@ -354,9 +353,12 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, #ifdef AMREX_USE_GPU // No tiling on GPU: jx_ptr points to the full // jx array (same for jy_ptr and jz_ptr). - Array4 const& jx_arr = jx.array(pti); - Array4 const& jy_arr = jy.array(pti); - Array4 const& jz_arr = jz.array(pti); + Array4 const& jx_arr = jx->array(pti); + Array4 const& jy_arr = jy->array(pti); + Array4 const& jz_arr = jz->array(pti); + //auto const& jx_arr = jx->array(pti); + //auto const& jy_arr = jy->array(pti); + //auto const& jz_arr = jz->array(pti); #else // Tiling is on: jx_ptr points to local_jx[thread_num] // (same for jy_ptr and jz_ptr) @@ -380,7 +382,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, // GPU, no tiling: deposit directly in jx // CPU, tiling: deposit into local_jx // (same for jx and jz) - BL_PROFILE_VAR_START(blp_pxr_cd); Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; @@ -417,7 +418,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, jz_ptr, &ngJ, jzntot.getVect(), &xyzmin[0], &dx[0]); #endif - BL_PROFILE_VAR_STOP(blp_pxr_cd); #ifndef AMREX_USE_GPU BL_PROFILE_VAR_START(blp_accumulate); diff --git a/Source/WarpX.H b/Source/WarpX.H index 251270e21..97a69340a 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -251,6 +251,23 @@ public: void WriteSlicePlotFile () const; void ClearSliceMultiFabs (); + + static void ComputeDivB (amrex::MultiFab& divB, int dcomp, + const std::array& B, + const std::array& dx); + + static void ComputeDivB (amrex::MultiFab& divB, int dcomp, + const std::array& B, + const std::array& dx, int ngrow); + + static void ComputeDivE (amrex::MultiFab& divE, int dcomp, + const std::array& B, + const std::array& dx); + + static void ComputeDivE (amrex::MultiFab& divE, int dcomp, + const std::array& B, + const std::array& dx, int ngrow); + protected: //! Tagging cells for refinement @@ -383,23 +400,6 @@ private: std::array, 3> getInterpolatedE(int lev) const; std::array, 3> getInterpolatedB(int lev) const; - - static void ComputeDivB (amrex::MultiFab& divB, int dcomp, - const std::array& B, - const std::array& dx); - - static void ComputeDivB (amrex::MultiFab& divB, int dcomp, - const std::array& B, - const std::array& dx, int ngrow); - - static void ComputeDivE (amrex::MultiFab& divE, int dcomp, - const std::array& B, - const std::array& dx); - - static void ComputeDivE (amrex::MultiFab& divE, int dcomp, - const std::array& B, - const std::array& dx, int ngrow); - void SyncCurrent (const std::array& fine, const std::array< amrex::MultiFab*,3>& crse, int ref_ratio); -- cgit v1.2.3 From a944bdd6c5a4612e32aaefc894c232b958129bea Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Thu, 11 Jul 2019 14:07:54 -0700 Subject: keep option to use PICSAR current deposition --- Source/Particles/Deposition/CurrentDeposition.H | 108 +++++++++++---- Source/Particles/WarpXParticleContainer.cpp | 171 ++++++++++++++++++++---- Source/WarpX.H | 1 + Source/WarpX.cpp | 2 + 4 files changed, 226 insertions(+), 56 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index cf6fe0d32..006ff05fe 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -3,33 +3,45 @@ using namespace amrex; - +// Compute lower bound for current deposition shape factor: +// order 1: 0 +// order 2: -1 +// order 3: -1 +// non-type template parameter depos_order is the order for current deposition +// shape factor. Specialized templates are defined below for orders 1, 2 and 3. +// Currently, the shape factor is the same in all directions. template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void compute_shape_factor_lo(amrex::Dim3& shape_lo) {}; +// Compute lower bound for order 1. template <> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void compute_shape_factor_lo <1> (amrex::Dim3& shape_lo) { shape_lo = {0, 0, 0}; } +// Compute lower bound for order 2. template <> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void compute_shape_factor_lo <2> (amrex::Dim3& shape_lo) { shape_lo = {-1, -1, -1}; } +// Compute lower bound for order 3. template <> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void compute_shape_factor_lo <3> (amrex::Dim3& shape_lo) { shape_lo = {-1, -1, -1}; } +// Compute shape factor and return index of cell containing particle. +// Specialized templates are defined below for orders 1, 2 and 3. template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int compute_shape_factor(Real* sx, Real xint) {return 0;}; +// Compute shape factor for order 1. template <> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int compute_shape_factor <1> (Real* sx, Real xmid){ @@ -40,6 +52,7 @@ int compute_shape_factor <1> (Real* sx, Real xmid){ return j; } +// Compute shape factor for order 2. template <> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int compute_shape_factor <2> (Real* sx, Real xmid){ @@ -51,6 +64,7 @@ int compute_shape_factor <2> (Real* sx, Real xmid){ return j; } +// Compute shape factor for order 3. template <> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int compute_shape_factor <3> (Real* sx, Real xmid){ @@ -63,6 +77,23 @@ int compute_shape_factor <3> (Real* sx, Real xmid){ return j; } +/* \brief Current Deposition for thread thread_num + * /param xp, yp, zp : Pointer to arrays of particle positions. + * \param wp : Pointer to array of particle weights. + * \param uxp uyp uzp : Pointer to arrays of particle momentum. + * \param jx_arr : Array4 of current density, either full array or tile. + * \param jy_arr : Array4 of current density, either full array or tile. + * \param jz_arr : Array4 of current density, either full array or tile. + * \param offset : Index of first particle for which current is deposited + * \param np_to_depose : Number of particles for which current is deposited. + Particles [offset,offset+np_tp_depose] deposit current. + * \param dt : Time step for particle level + * \param dx : 3D cell size + * \param xyzmin : Physical lower bounds of domain. + * \param lo : Index lower bounds of domain. + * \param stagger_shift: 0 if nodal, 0.5 if staggered. + * /param q : species charge. + */ template void doDepositionShapeN(Real* xp, Real* yp, Real* zp, Real* wp, Real* uxp, @@ -102,63 +133,82 @@ void doDepositionShapeN(Real* xp, Real* yp, Real* zp, ParallelFor( np_to_depose, [=] AMREX_GPU_DEVICE (long ip) { - // macro-particle current in all 3D (even for 2D runs) - // wqx, wqy and wqz are the particle current in each - // direction Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq - + uyp[ip]*uyp[ip]*clightsq - + uzp[ip]*uzp[ip]*clightsq); - Real wq = q*wp[ip]; - Real vx = uxp[ip]*gaminv; + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + Real wq = q*wp[ip]; + Real vx = uxp[ip]*gaminv; + Real vy = uyp[ip]*gaminv; + Real vz = uzp[ip]*gaminv; + // wqx, wqy wqz are particle current in each direction Real wqx = wq*invvol*vx; - Real vy = uyp[ip]*gaminv; Real wqy = wq*invvol*vy; - Real vz = uzp[ip]*gaminv; Real wqz = wq*invvol*vz; // --- x direction // Get particle position after 1/2 push back in position - Real xmid= (Real) (xp[ip]-xmin)*dxi-dts2dx*vx; - // Initialize arrays for node-centered and cell-centered - // shape factors - Real sx[depos_order + 1], sx0[depos_order + 1]; + Real xmid = (Real) (xp[ip]-xmin)*dxi-dts2dx*vx; // Compute shape factors for node-centered quantities - int j = compute_shape_factor(sx, xmid); + Real sx [depos_order + 1]; + int j = compute_shape_factor(sx, xmid); // Compute shape factors for cell-centered quantities + Real sx0[depos_order + 1]; int j0 = compute_shape_factor(sx0, xmid-stagger_shift); #if (AMREX_SPACEDIM == 3) // --- y direction Real ymid= (Real) (yp[ip]-ymin)*dyi-dts2dy*vy; - Real sy[depos_order + 1], sy0[depos_order + 1]; - int k = compute_shape_factor(sy, ymid); + Real sy [depos_order + 1]; + int k = compute_shape_factor(sy, ymid); + Real sy0[depos_order + 1]; int k0 = compute_shape_factor(sy0, ymid-stagger_shift); #endif // --- z direction Real zmid=(Real) (zp[ip]-zmin)*dzi-dts2dz*vz; - Real sz[depos_order + 1], sz0[depos_order + 1]; - int l = compute_shape_factor(sz, zmid); + Real sz [depos_order + 1]; + int l = compute_shape_factor(sz, zmid); + Real sz0[depos_order + 1]; int l0 = compute_shape_factor(sz0, zmid-stagger_shift); #if (AMREX_SPACEDIM == 2) for (int iz=0; iz<=depos_order; iz++){ for (int ix=0; ix<=depos_order; ix++){ - jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx0[ix]*sz [iz]*wqx; - jy_arr(lo.x+j +ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx [ix]*sz [iz]*wqy; - jz_arr(lo.x+j +ix+shape_lo.x, lo.y+l0+iz+shape_lo.y, 0) += sx [ix]*sz0[iz]*wqz; + amrex::Gpu::Atomic::Add( + &jx_arr(lo.x+j0+ix+shape_lo.x, + lo.y+l +iz+shape_lo.y, + 0), + sx0[ix]*sz [iz]*wqx); + amrex::Gpu::Atomic::Add( + &jy_arr(lo.x+j +ix+shape_lo.x, + lo.y+l +iz+shape_lo.y, + 0), + sx [ix]*sz [iz]*wqy); + amrex::Gpu::Atomic::Add( + &jz_arr(lo.x+j +ix+shape_lo.x, + lo.y+l0+iz+shape_lo.y, + 0), + sx [ix]*sz0[iz]*wqz); } } #else // (AMREX_SPACEDIM == 3) for (int iz=0; iz<=depos_order; iz++){ for (int iy=0; iy<=depos_order; iy++){ for (int ix=0; ix<=depos_order; ix++){ - amrex::Gpu::Atomic::Add(&jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z), sx0[ix]*sy [iy]*sz [iz]*wqx); - amrex::Gpu::Atomic::Add(&jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z), sx [ix]*sy0[iy]*sz [iz]*wqy); - amrex::Gpu::Atomic::Add(&jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z), sx [ix]*sy [iy]*sz0[iz]*wqz); - // amrex::Gpu::Atomic::Add(&jx(j0+joff,k +koff,l +loff), sx0[joff]*sy [koff]*sz [loff]*wqx); - // jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx0[ix]*sy [iy]*sz [iz]*wqx; - // jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx [ix]*sy0[iy]*sz [iz]*wqy; - // jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z) += sx [ix]*sy [iy]*sz0[iz]*wqz; + amrex::Gpu::Atomic::Add( + &jx_arr(lo.x+j0+ix+shape_lo.x, + lo.y+k +iy+shape_lo.y, + lo.z+l +iz+shape_lo.z), + sx0[ix]*sy [iy]*sz [iz]*wqx); + amrex::Gpu::Atomic::Add( + &jy_arr(lo.x+j +ix+shape_lo.x, + lo.y+k0+iy+shape_lo.y, + lo.z+l +iz+shape_lo.z), + sx [ix]*sy0[iy]*sz [iz]*wqy); + amrex::Gpu::Atomic::Add( + &jz_arr(lo.x+j +ix+shape_lo.x, + lo.y+k +iy+shape_lo.y, + lo.z+l0+iz+shape_lo.z), + sx [ix]*sy [iy]*sz0[iz]*wqz); } } } diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 6ea36c841..42b99efcb 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -280,23 +280,7 @@ WarpXParticleContainer::AddNParticles (int lev, Redistribute(); } -//doDepositionShapeN<1>(wp, uxp, uyp, uzp, jx_arr, jy_arr, jz_arr, -// offset, np_to_depose, dt, dx, -// tilebox, stagger_shift, q); - -// template -/* -void WarpXParticleContainer::doDepositionShapeN(RealVector& wp, RealVector& uxp, - RealVector& uyp, RealVector& uzp, - Array4& jx_arr, - Array4& jy_arr, - Array4& jz_arr, - const long offset, const long np_to_depose, - const Real dt, const std::array& dx, - const Box tilebox, const Real stagger_shift, - const Real q) -*/ -/* \brief Current Deposition for thread thread_num +/* \brief Current Deposition for thread thread_num using PICSAR * \param pti : Particle iterator * \param wp : Array of particle weights * \param uxp uyp uzp : Array of particle @@ -311,6 +295,139 @@ void WarpXParticleContainer::doDepositionShapeN(RealVector& wp, RealVector& uxp, * \param dt : Time step for particle level */ void +WarpXParticleContainer::DepositCurrentFortran(WarpXParIter& pti, + RealVector& wp, RealVector& uxp, + RealVector& uyp, RealVector& uzp, + MultiFab* jx, MultiFab* jy, MultiFab* jz, + const long offset, const long np_to_depose, + int thread_num, int lev, int depos_lev, + Real dt) +{ + AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) || + (depos_lev==(lev )), + "Deposition buffers only work for lev-1"); + // If no particles, do not do anything + if (np_to_depose == 0) return; + + const long ngJ = jx->nGrow(); + const std::array& dx = WarpX::CellSize(std::max(depos_lev,0)); + const long lvect = 8; + int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal(); + + BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); + BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); + + // Get tile box where current is deposited. + // The tile box is different when depositing in the buffers (depos_lev& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); + const std::array& xyzmin = xyzmin_tile; + +#ifdef AMREX_USE_GPU + // No tiling on GPU: jx_ptr points to the full + // jx array (same for jy_ptr and jz_ptr). + Real* jx_ptr = (*jx)[pti].dataPtr(); + Real* jy_ptr = (*jy)[pti].dataPtr(); + Real* jz_ptr = (*jz)[pti].dataPtr(); + + auto jxntot = (*jx)[pti].length(); + auto jyntot = (*jy)[pti].length(); + auto jzntot = (*jz)[pti].length(); +#else + // Tiling is on: jx_ptr points to local_jx[thread_num] + // (same for jy_ptr and jz_ptr) + tbx.grow(ngJ); + tby.grow(ngJ); + tbz.grow(ngJ); + + local_jx[thread_num].resize(tbx); + local_jy[thread_num].resize(tby); + local_jz[thread_num].resize(tbz); + + Real* jx_ptr = local_jx[thread_num].dataPtr(); + Real* jy_ptr = local_jy[thread_num].dataPtr(); + Real* jz_ptr = local_jz[thread_num].dataPtr(); + + // local_jx[thread_num] is set to zero + local_jx[thread_num].setVal(0.0); + local_jy[thread_num].setVal(0.0); + local_jz[thread_num].setVal(0.0); + + auto jxntot = local_jx[thread_num].length(); + auto jyntot = local_jy[thread_num].length(); + auto jzntot = local_jz[thread_num].length(); +#endif + // GPU, no tiling: deposit directly in jx + // CPU, tiling: deposit into local_jx + // (same for jx and jz) + BL_PROFILE_VAR_START(blp_pxr_cd); + warpx_current_deposition( + jx_ptr, &ngJ, jxntot.getVect(), + jy_ptr, &ngJ, jyntot.getVect(), + jz_ptr, &ngJ, jzntot.getVect(), + &np_to_depose, + m_xp[thread_num].dataPtr() + offset, + m_yp[thread_num].dataPtr() + offset, + m_zp[thread_num].dataPtr() + offset, + uxp.dataPtr() + offset, + uyp.dataPtr() + offset, + uzp.dataPtr() + offset, + m_giv[thread_num].dataPtr() + offset, + wp.dataPtr() + offset, &this->charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dt, &dx[0], &dx[1], &dx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, + &lvect,&WarpX::current_deposition_algo); + +#ifdef WARPX_RZ + // Rescale current in r-z mode + warpx_current_deposition_rz_volume_scaling( + jx_ptr, &ngJ, jxntot.getVect(), + jy_ptr, &ngJ, jyntot.getVect(), + jz_ptr, &ngJ, jzntot.getVect(), + &xyzmin[0], &dx[0]); +#endif + BL_PROFILE_VAR_STOP(blp_pxr_cd); + +#ifndef AMREX_USE_GPU + BL_PROFILE_VAR_START(blp_accumulate); + // CPU, tiling: atomicAdd local_jx into jx + // (same for jx and jz) + (*jx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); + (*jy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); + (*jz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); + BL_PROFILE_VAR_STOP(blp_accumulate); +#endif +} + +/* \brief Current Deposition for thread thread_num + * \param pti : Particle iterator + * \param wp : Array of particle weights + * \param uxp uyp uzp : Array of particle + * \param jx jy jz : Full array of current density + * \param offset : Index of first particle for which current is deposited + * \param np_to_depose: Number of particles for which current is deposited. + Particles [offset,offset+np_tp_depose] deposit current + * \param thread_num : Thread number (if tiling) + * \param lev : Level of box that contains particles + * \param depos_lev : Level on which particles deposit (if buffers are used) + * \param dt : Time step for particle level + */ +void WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, RealVector& wp, RealVector& uxp, RealVector& uyp, RealVector& uzp, @@ -356,9 +473,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Array4 const& jx_arr = jx->array(pti); Array4 const& jy_arr = jy->array(pti); Array4 const& jz_arr = jz->array(pti); - //auto const& jx_arr = jx->array(pti); - //auto const& jy_arr = jy->array(pti); - //auto const& jz_arr = jz->array(pti); #else // Tiling is on: jx_ptr points to local_jx[thread_num] // (same for jy_ptr and jz_ptr) @@ -396,17 +510,20 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, // Better for memory? worth trying? const Dim3 lo = lbound(tilebox); - if (WarpX::nox == 1){ - doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, - offset, np_to_depose, dt, dx, + if (WarpX::nox == 1){ + doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } else if (WarpX::nox == 2){ - doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, - offset, np_to_depose, dt, dx, + doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } else if (WarpX::nox == 3){ - doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, - offset, np_to_depose, dt, dx, + doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } diff --git a/Source/WarpX.H b/Source/WarpX.H index 97a69340a..d611aa39a 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -81,6 +81,7 @@ public: static amrex::Vector B_external; // Algorithms + static long use_picsar_deposition; static long current_deposition_algo; static long charge_deposition_algo; static long field_gathering_algo; diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 877882037..a24476312 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -37,6 +37,7 @@ Vector WarpX::boost_direction = {0,0,0}; int WarpX::do_compute_max_step_from_zmax = 0; Real WarpX::zmax_plasma_to_compute_max_step = 0.; +long WarpX::use_picsar_deposition = 1; long WarpX::current_deposition_algo; long WarpX::charge_deposition_algo; long WarpX::field_gathering_algo; @@ -484,6 +485,7 @@ WarpX::ReadParameters () { ParmParse pp("algo"); + pp.query("use_picsar_deposition", use_picsar_deposition) current_deposition_algo = GetAlgorithmInteger(pp, "current_deposition"); charge_deposition_algo = GetAlgorithmInteger(pp, "charge_deposition"); field_gathering_algo = GetAlgorithmInteger(pp, "field_gathering"); -- cgit v1.2.3 From 57d4f470b58203a978f02af0fb44142a71d57fd5 Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Thu, 11 Jul 2019 14:26:38 -0700 Subject: option use_picsar_deposition --- Source/Particles/Deposition/CurrentDeposition.H | 3 ++- Source/Particles/PhysicalParticleContainer.cpp | 32 ++++++++++++++++++------- Source/Particles/WarpXParticleContainer.H | 15 ++++++++++++ Source/Particles/WarpXParticleContainer.cpp | 2 +- Source/WarpX.cpp | 6 ++--- 5 files changed, 44 insertions(+), 14 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index 006ff05fe..66db003a3 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -109,7 +109,7 @@ void doDepositionShapeN(Real* xp, Real* yp, Real* zp, const amrex::Real q) { BL_PROFILE("PICSAR::CurrentDeposition"); - + Real dxi = 1.0/dx[0]; Real dzi = 1.0/dx[2]; Real dts2dx = 0.5*dt*dxi; @@ -130,6 +130,7 @@ void doDepositionShapeN(Real* xp, Real* yp, Real* zp, compute_shape_factor_lo(shape_lo); Real clightsq = 1.0/PhysConst::c/PhysConst::c; + // Loop over particles and deposit into jx_arr, jy_arr and jz_arr ParallelFor( np_to_depose, [=] AMREX_GPU_DEVICE (long ip) { diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index df08d77a7..86cd0827f 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -1529,16 +1529,30 @@ PhysicalParticleContainer::Evolve (int lev, // // Current Deposition // - // Deposit inside domains - DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, - 0, np_current, thread_num, - lev, lev, dt); - if (has_buffer){ - // Deposit in buffers - DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, - np_current, np-np_current, thread_num, - lev, lev-1, dt); + if (WarpX::use_picsar_deposition) { + // Deposit inside domains + DepositCurrentFortran(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, + 0, np_current, thread_num, + lev, lev, dt); + if (has_buffer){ + // Deposit in buffers + DepositCurrentFortran(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, + np_current, np-np_current, thread_num, + lev, lev-1, dt); + } + } else { + // Deposit inside domains + DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, + 0, np_current, thread_num, + lev, lev, dt); + if (has_buffer){ + // Deposit in buffers + DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, + np_current, np-np_current, thread_num, + lev, lev-1, dt); + } } + // // copy particle data back // diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H index 6182ec5bd..662b2e1b8 100644 --- a/Source/Particles/WarpXParticleContainer.H +++ b/Source/Particles/WarpXParticleContainer.H @@ -189,6 +189,21 @@ public: int depos_lev, amrex::Real dt); + virtual void DepositCurrentFortran(WarpXParIter& pti, + RealVector& wp, + RealVector& uxp, + RealVector& uyp, + RealVector& uzp, + amrex::MultiFab* jx, + amrex::MultiFab* jy, + amrex::MultiFab* jz, + const long offset, + const long np_to_depose, + int thread_num, + int lev, + int depos_lev, + amrex::Real dt); + // If particles start outside of the domain, ContinuousInjection // makes sure that they are initialized when they enter the domain, and // NOT before. Virtual function, overriden by derived classes. diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 42b99efcb..ac123f9b6 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -435,7 +435,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, const long offset, const long np_to_depose, int thread_num, int lev, int depos_lev, Real dt) -{ +{ AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) || (depos_lev==(lev )), "Deposition buffers only work for lev-1"); diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 24011fb2f..fd4840b8d 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -485,11 +485,11 @@ WarpX::ReadParameters () { ParmParse pp("algo"); - pp.query("use_picsar_deposition", use_picsar_deposition) + pp.query("use_picsar_deposition", use_picsar_deposition); current_deposition_algo = GetAlgorithmInteger(pp, "current_deposition"); - if (!use_picsar_algo){ + if (!use_picsar_deposition){ AMREX_ALWAYS_ASSERT_WITH_MESSAGE( current_deposition_algo >= 2, - "if not use_picsar_deposition, cannot use Esirkepov deposition.") + "if not use_picsar_deposition, cannot use Esirkepov deposition."); } charge_deposition_algo = GetAlgorithmInteger(pp, "charge_deposition"); field_gathering_algo = GetAlgorithmInteger(pp, "field_gathering"); -- cgit v1.2.3 From 91ada99c4a2aaabf8c6789e60b37c83b9d2ba9f4 Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Thu, 11 Jul 2019 18:50:22 -0700 Subject: avoid compilation error when USE_RZ=TRUE --- Source/Particles/WarpXParticleContainer.cpp | 9 --------- 1 file changed, 9 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index ac123f9b6..7316dcc95 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -527,15 +527,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, xyzmin, lo, stagger_shift, q); } -#ifdef WARPX_RZ - // Rescale current in r-z mode - warpx_current_deposition_rz_volume_scaling( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &xyzmin[0], &dx[0]); -#endif - #ifndef AMREX_USE_GPU BL_PROFILE_VAR_START(blp_accumulate); // CPU, tiling: atomicAdd local_jx into jx -- cgit v1.2.3