From 1ce5957cb32a3b7ae39ad68b528e5a370854c58f Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Sun, 7 Jul 2019 11:14:31 -0700 Subject: avoid duplication in current deposition when using buffers --- Source/Particles/WarpXParticleContainer.cpp | 283 ++++++++++++---------------- 1 file changed, 116 insertions(+), 167 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index ac532912d..eb03059f6 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -279,187 +279,136 @@ WarpXParticleContainer::AddNParticles (int lev, Redistribute(); } - +/* \brief Current Deposition for thread thread_num + * \param pti : Particle iterator + * \param wp : Array of particle weights + * \param uxp uyp uzp : Array of particle + * \param jx jy jz : Full array of current density + * \param offset : Index of first particle for which current is deposited + * \param np_to_depose: Number of particles for which current is deposited. + Particles [offset,offset+np_tp_depose] deposit their + current + * \param thread_num : Thread number (if tiling) + * \param lev : Level of box that contains particles + * \param depos_lev : Level on which particles deposit (if buffers are used) + * \param dt : Time step for particle level + * \param ngJ : Number of ghosts cells (of lev) + */ void WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, RealVector& wp, RealVector& uxp, RealVector& uyp, RealVector& uzp, - MultiFab& jx, MultiFab& jy, MultiFab& jz, - MultiFab* cjx, MultiFab* cjy, MultiFab* cjz, - const long np_current, const long np, - int thread_num, int lev, Real dt ) + MultiFab* jx, MultiFab* jy, MultiFab* jz, + const long offset, const long np_to_depose, + int thread_num, int lev, int depos_lev, + Real dt, const long ngJ) { - Real *jx_ptr, *jy_ptr, *jz_ptr; - const std::array& xyzmin_tile = WarpX::LowerCorner(pti.tilebox(), lev); - const std::array& dx = WarpX::CellSize(lev); - const std::array& cdx = WarpX::CellSize(std::max(lev-1,0)); - const std::array& xyzmin = xyzmin_tile; - const long lvect = 8; - - BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); - BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); - - Box tbx = convert(pti.tilebox(), WarpX::jx_nodal_flag); - Box tby = convert(pti.tilebox(), WarpX::jy_nodal_flag); - Box tbz = convert(pti.tilebox(), WarpX::jz_nodal_flag); - - // WarpX assumes the same number of guard cells for Jx, Jy, Jz - long ngJ = jx.nGrow(); - - int j_is_nodal = jx.is_nodal() and jy.is_nodal() and jz.is_nodal(); - - // Deposit charge for particles that are not in the current buffers - if (np_current > 0) - { -#ifdef AMREX_USE_GPU - jx_ptr = jx[pti].dataPtr(); - jy_ptr = jy[pti].dataPtr(); - jz_ptr = jz[pti].dataPtr(); - - auto jxntot = jx[pti].length(); - auto jyntot = jy[pti].length(); - auto jzntot = jz[pti].length(); -#else - tbx.grow(ngJ); - tby.grow(ngJ); - tbz.grow(ngJ); - - local_jx[thread_num].resize(tbx); - local_jy[thread_num].resize(tby); - local_jz[thread_num].resize(tbz); - - jx_ptr = local_jx[thread_num].dataPtr(); - jy_ptr = local_jy[thread_num].dataPtr(); - jz_ptr = local_jz[thread_num].dataPtr(); - - local_jx[thread_num].setVal(0.0); - local_jy[thread_num].setVal(0.0); - local_jz[thread_num].setVal(0.0); - - auto jxntot = local_jx[thread_num].length(); - auto jyntot = local_jy[thread_num].length(); - auto jzntot = local_jz[thread_num].length(); -#endif - - BL_PROFILE_VAR_START(blp_pxr_cd); - warpx_current_deposition( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &np_current, - m_xp[thread_num].dataPtr(), - m_yp[thread_num].dataPtr(), - m_zp[thread_num].dataPtr(), - uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), - m_giv[thread_num].dataPtr(), - wp.dataPtr(), &this->charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dt, &dx[0], &dx[1], &dx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, - &lvect,&WarpX::current_deposition_algo); - -#ifdef WARPX_RZ - warpx_current_deposition_rz_volume_scaling( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &xyzmin[0], &dx[0]); -#endif - BL_PROFILE_VAR_STOP(blp_pxr_cd); - -#ifndef AMREX_USE_GPU - BL_PROFILE_VAR_START(blp_accumulate); - - jx[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); - jy[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); - jz[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); - - BL_PROFILE_VAR_STOP(blp_accumulate); -#endif - } + AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) || + (depos_lev==(lev )), + "Deposition buffers only work for lev-1"); + // If no particles, do not do anything + if (np_to_depose == 0) return; + + const std::array& dx = WarpX::CellSize(std::max(depos_lev,0)); + const long lvect = 8; + int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal(); + + BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); + BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); + + // Get tile box where current is deposited + Box tilebox; + if (lev == depos_lev) { + tilebox = pti.tilebox(); + } else { + const IntVect& ref_ratio = WarpX::RefRatio(depos_lev); + tilebox = amrex::coarsen(pti.tilebox(),ref_ratio); + } + + // Staggered tile boxes (different in each direction) + Box tbx = convert(tilebox, WarpX::jx_nodal_flag); + Box tby = convert(tilebox, WarpX::jy_nodal_flag); + Box tbz = convert(tilebox, WarpX::jz_nodal_flag); - // Deposit charge for particles that are in the current buffers - if (np_current < np) - { - const IntVect& ref_ratio = WarpX::RefRatio(lev-1); - const Box& ctilebox = amrex::coarsen(pti.tilebox(),ref_ratio); - const std::array& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); + // Lower corner of tile box physical domain + const std::array& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); + const std::array& xyzmin = xyzmin_tile; #ifdef AMREX_USE_GPU - jx_ptr = (*cjx)[pti].dataPtr(); - jy_ptr = (*cjy)[pti].dataPtr(); - jz_ptr = (*cjz)[pti].dataPtr(); - - auto jxntot = jx[pti].length(); - auto jyntot = jy[pti].length(); - auto jzntot = jz[pti].length(); + // No tiling on GPU: jx_ptr points to the full + // jx array (same for jy_ptr and jz_ptr). + Real* jx_ptr = (*jx)[pti].dataPtr(); + Real* jy_ptr = (*jy)[pti].dataPtr(); + Real* jz_ptr = (*jz)[pti].dataPtr(); + + auto jxntot = jx[pti].length(); + auto jyntot = jy[pti].length(); + auto jzntot = jz[pti].length(); #else - - tbx = amrex::convert(ctilebox, WarpX::jx_nodal_flag); - tby = amrex::convert(ctilebox, WarpX::jy_nodal_flag); - tbz = amrex::convert(ctilebox, WarpX::jz_nodal_flag); - tbx.grow(ngJ); - tby.grow(ngJ); - tbz.grow(ngJ); - - local_jx[thread_num].resize(tbx); - local_jy[thread_num].resize(tby); - local_jz[thread_num].resize(tbz); - - jx_ptr = local_jx[thread_num].dataPtr(); - jy_ptr = local_jy[thread_num].dataPtr(); - jz_ptr = local_jz[thread_num].dataPtr(); - - local_jx[thread_num].setVal(0.0); - local_jy[thread_num].setVal(0.0); - local_jz[thread_num].setVal(0.0); - - auto jxntot = local_jx[thread_num].length(); - auto jyntot = local_jy[thread_num].length(); - auto jzntot = local_jz[thread_num].length(); -#endif - - long ncrse = np - np_current; - BL_PROFILE_VAR_START(blp_pxr_cd); - warpx_current_deposition( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &ncrse, - m_xp[thread_num].dataPtr() +np_current, - m_yp[thread_num].dataPtr() +np_current, - m_zp[thread_num].dataPtr() +np_current, - uxp.dataPtr()+np_current, - uyp.dataPtr()+np_current, - uzp.dataPtr()+np_current, - m_giv[thread_num].dataPtr()+np_current, - wp.dataPtr()+np_current, &this->charge, - &cxyzmin_tile[0], &cxyzmin_tile[1], &cxyzmin_tile[2], - &dt, &cdx[0], &cdx[1], &cdx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, - &lvect,&WarpX::current_deposition_algo); + // Tiling is on: jx_ptr points to local_jx[thread_num] + // (same for jy_ptr and jz_ptr) + tbx.grow(ngJ); + tby.grow(ngJ); + tbz.grow(ngJ); + + local_jx[thread_num].resize(tbx); + local_jy[thread_num].resize(tby); + local_jz[thread_num].resize(tbz); + + Real* jx_ptr = local_jx[thread_num].dataPtr(); + Real* jy_ptr = local_jy[thread_num].dataPtr(); + Real* jz_ptr = local_jz[thread_num].dataPtr(); + + // local_jx[thread_num] is set to zero + local_jx[thread_num].setVal(0.0); + local_jy[thread_num].setVal(0.0); + local_jz[thread_num].setVal(0.0); + + auto jxntot = local_jx[thread_num].length(); + auto jyntot = local_jy[thread_num].length(); + auto jzntot = local_jz[thread_num].length(); +#endif + // GPU, no tiling: deposit directly in jx + // CPU, tiling: deposit into local_jx + // (same for jx and jz) + BL_PROFILE_VAR_START(blp_pxr_cd); + warpx_current_deposition( + jx_ptr, &ngJ, jxntot.getVect(), + jy_ptr, &ngJ, jyntot.getVect(), + jz_ptr, &ngJ, jzntot.getVect(), + &np_to_depose, + m_xp[thread_num].dataPtr() + offset, + m_yp[thread_num].dataPtr() + offset, + m_zp[thread_num].dataPtr() + offset, + uxp.dataPtr() + offset, + uyp.dataPtr() + offset, + uzp.dataPtr() + offset, + m_giv[thread_num].dataPtr() + offset, + wp.dataPtr() + offset, &this->charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dt, &dx[0], &dx[1], &dx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, + &lvect,&WarpX::current_deposition_algo); + #ifdef WARPX_RZ - warpx_current_deposition_rz_volume_scaling( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &xyzmin[0], &dx[0]); + warpx_current_deposition_rz_volume_scaling( + jx_ptr, &ngJ, jxntot.getVect(), + jy_ptr, &ngJ, jyntot.getVect(), + jz_ptr, &ngJ, jzntot.getVect(), + &xyzmin[0], &dx[0]); #endif - - BL_PROFILE_VAR_STOP(blp_pxr_cd); + BL_PROFILE_VAR_STOP(blp_pxr_cd); #ifndef AMREX_USE_GPU - BL_PROFILE_VAR_START(blp_accumulate); - - (*cjx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); - (*cjy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); - (*cjz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); - - BL_PROFILE_VAR_STOP(blp_accumulate); + BL_PROFILE_VAR_START(blp_accumulate); + // CPU, tiling: atomicAdd local_jx into jx + // (same for jx and jz) + (*jx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); + (*jy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); + (*jz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); + BL_PROFILE_VAR_STOP(blp_accumulate); #endif - } -}; - +} void WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, -- cgit v1.2.3 From 044344d7dd0fe44a9ed9ba06f4edffbd64a64b8c Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Mon, 8 Jul 2019 08:32:26 -0700 Subject: Test implementation for direct deposition order 1 --- Examples/Tests/SingleParticle/inputs | 8 +- Source/Particles/WarpXParticleContainer.cpp | 140 ++++++++++++++++++++++++---- 2 files changed, 127 insertions(+), 21 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Examples/Tests/SingleParticle/inputs b/Examples/Tests/SingleParticle/inputs index 548848d79..c4bb19ccf 100644 --- a/Examples/Tests/SingleParticle/inputs +++ b/Examples/Tests/SingleParticle/inputs @@ -1,8 +1,8 @@ max_step = 1 -amr.n_cell = 16 16 +amr.n_cell = 8 8 amr.max_level = 0 amr.blocking_factor = 8 -amr.max_grid_size = 8 +amr.max_grid_size = 16 amr.plot_int = 1 geometry.coord_sys = 0 geometry.is_periodic = 0 0 @@ -16,8 +16,8 @@ warpx.cfl = 1.0 particles.nspecies = 1 particles.species_names = electron electron.charge = -q_e -electron.mass = m_e +electron.mass = 1.e10 electron.injection_style = "SingleParticle" electron.single_particle_pos = 0.0 0.0 0.0 -electron.single_particle_vel = 1.e20 0.0 0.0 # gamma*beta +electron.single_particle_vel = 1.e20 0.0 1.e20 # gamma*beta electron.single_particle_weight = 1.0 diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 95e692c28..829efe53f 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -333,6 +333,8 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, // Lower corner of tile box physical domain const std::array& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); const std::array& xyzmin = xyzmin_tile; + Print()<<" xyzmin_tile "<charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dt, &dx[0], &dx[1], &dx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, - &lvect,&WarpX::current_deposition_algo); + bool use_new = true; + if (use_new){ + Real dxi = 1.0/dx[0]; + Real dzi = 1.0/dx[2]; + Real invvol = dxi*dzi; + Real dts2dx = 0.5*dt*dxi; + Real dts2dz = 0.5*dt*dzi; + Real clightsq = 1.0/PhysConst::c/PhysConst::c; + Real c = PhysConst::c; + + Real xmin = xyzmin[0]; + Real zmin = xyzmin[2]; + Real q = this->charge; + Real stagger_shift = j_is_nodal ? 0.0 : 0.5; + Print()<<"xmin "<array(pti); + auto jy_arr = jy->array(pti); + auto jz_arr = jz->array(pti); + + // auto const& jx_arr = jx->array(); + // auto const& jy_arr = jy->array(); + // auto const& jz_arr = jz->array(); + + // - momenta are stored as a struct of array, in `attribs` + auto& attribs = pti.GetAttribs(); + Real* AMREX_RESTRICT uxp = attribs[PIdx::ux].dataPtr(); + Real* AMREX_RESTRICT uyp = attribs[PIdx::uy].dataPtr(); + Real* AMREX_RESTRICT uzp = attribs[PIdx::uz].dataPtr(); + Real* AMREX_RESTRICT w = attribs[PIdx::w ].dataPtr(); + Cuda::ManagedDeviceVector Vx, Vy, Vz; + pti.GetPosition(Vx, Vy, Vz); + Real* AMREX_RESTRICT xp = Vx.dataPtr() + offset; + // Real* AMREX_RESTRICT yp = y.dataPtr(); + Real* AMREX_RESTRICT zp = Vz.dataPtr() + offset; + // Loop over the particles and convert momentum + // const long np = pti.numParticles(); + Print()<<"np_to_depose "<charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dt, &dx[0], &dx[1], &dx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, + &lvect,&WarpX::current_deposition_algo); + } #ifdef WARPX_RZ warpx_current_deposition_rz_volume_scaling( jx_ptr, &ngJ, jxntot.getVect(), -- cgit v1.2.3 From 8bd48733e9ce659a227c9f19f6b4857596bf9896 Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Mon, 8 Jul 2019 11:19:47 -0700 Subject: fix position pointer. simple test gives correct result --- Examples/Tests/SingleParticle/inputs | 5 +++++ Source/Particles/WarpXParticleContainer.cpp | 26 +++++++++++++++++--------- 2 files changed, 22 insertions(+), 9 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Examples/Tests/SingleParticle/inputs b/Examples/Tests/SingleParticle/inputs index c4bb19ccf..89e3eb1c6 100644 --- a/Examples/Tests/SingleParticle/inputs +++ b/Examples/Tests/SingleParticle/inputs @@ -13,6 +13,11 @@ algo.charge_deposition = standard algo.field_gathering = standard warpx.cfl = 1.0 +algo.current_deposition = direct +interpolation.nox = 1 +interpolation.noy = 1 +interpolation.noz = 1 + particles.nspecies = 1 particles.species_names = electron electron.charge = -q_e diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 1841ea0ce..6acf7b1ea 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -408,21 +408,29 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Real* AMREX_RESTRICT uyp = attribs[PIdx::uy].dataPtr(); Real* AMREX_RESTRICT uzp = attribs[PIdx::uz].dataPtr(); Real* AMREX_RESTRICT w = attribs[PIdx::w ].dataPtr(); - Cuda::ManagedDeviceVector Vx, Vy, Vz; - pti.GetPosition(Vx, Vy, Vz); - Real* AMREX_RESTRICT xp = Vx.dataPtr() + offset; - // Real* AMREX_RESTRICT yp = y.dataPtr(); - Real* AMREX_RESTRICT zp = Vz.dataPtr() + offset; + + + //Cuda::ManagedDeviceVector Vx, Vy, Vz; + //pti.GetPosition(Vx, Vy, Vz); + //Real* AMREX_RESTRICT xp = Vx.dataPtr() + offset; + //Real* AMREX_RESTRICT zp = Vz.dataPtr() + offset; + + Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; + Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; + // Loop over the particles and convert momentum // const long np = pti.numParticles(); Print()<<"np_to_depose "< Date: Mon, 8 Jul 2019 16:01:37 -0700 Subject: bug when tiling on CPU --- Examples/Tests/SingleParticle/Untitled.ipynb | 114 +++++++++++++++++++++++++ Examples/Tests/SingleParticle/inputs | 10 +-- Source/Particles/PhysicalParticleContainer.cpp | 2 +- Source/Particles/WarpXParticleContainer.cpp | 66 +++++++++++--- 4 files changed, 172 insertions(+), 20 deletions(-) create mode 100644 Examples/Tests/SingleParticle/Untitled.ipynb (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Examples/Tests/SingleParticle/Untitled.ipynb b/Examples/Tests/SingleParticle/Untitled.ipynb new file mode 100644 index 000000000..7a98be71a --- /dev/null +++ b/Examples/Tests/SingleParticle/Untitled.ipynb @@ -0,0 +1,114 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# Import statements\n", + "import yt ; yt.funcs.mylog.setLevel(50)\n", + "import numpy as np\n", + "import scipy.constants as scc\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib notebook" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mthevenet/anaconda3/lib/python3.7/site-packages/yt/units/yt_array.py:1394: RuntimeWarning: invalid value encountered in true_divide\n", + " out=out, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ds = yt.load( './old_diags/plt00001' ) # Create a dataset object\n", + "sl = yt.SlicePlot(ds, 2, 'jz', aspect=.4) # Create a sliceplot object\n", + "sl.annotate_grids() # Show grids\n", + "sl.show() # Show the plot" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mthevenet/anaconda3/lib/python3.7/site-packages/yt/units/yt_array.py:1394: RuntimeWarning: invalid value encountered in true_divide\n", + " out=out, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ds = yt.load( './new_diags/plt00001' ) # Create a dataset object\n", + "sl = yt.SlicePlot(ds, 2, 'jx', aspect=.4) # Create a sliceplot object\n", + "sl.annotate_grids() # Show grids\n", + "sl.show() # Show the plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Examples/Tests/SingleParticle/inputs b/Examples/Tests/SingleParticle/inputs index 89e3eb1c6..feecb996c 100644 --- a/Examples/Tests/SingleParticle/inputs +++ b/Examples/Tests/SingleParticle/inputs @@ -1,11 +1,11 @@ -max_step = 1 -amr.n_cell = 8 8 +max_step = 10 +amr.n_cell = 8 16 amr.max_level = 0 amr.blocking_factor = 8 amr.max_grid_size = 16 amr.plot_int = 1 geometry.coord_sys = 0 -geometry.is_periodic = 0 0 +geometry.is_periodic = 1 1 geometry.prob_lo = -8 -12 geometry.prob_hi = 8 12 warpx.do_pml = 0 @@ -23,6 +23,6 @@ particles.species_names = electron electron.charge = -q_e electron.mass = 1.e10 electron.injection_style = "SingleParticle" -electron.single_particle_pos = 0.0 0.0 0.0 -electron.single_particle_vel = 1.e20 0.0 1.e20 # gamma*beta +electron.single_particle_pos = 4 0 6 +electron.single_particle_vel = -2 0 -5 # -2. 1. 4. electron.single_particle_weight = 1.0 diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 43b46ec49..b78d36c87 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -1530,6 +1530,7 @@ PhysicalParticleContainer::Evolve (int lev, // Current Deposition // // Deposit inside domains + Print()<<"np_current "<& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); - const std::array& xyzmin = xyzmin_tile; - Print()<<" xyzmin_tile "<& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); + // const std::array& xyzmin = xyzmin_tile; + const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; + + // Print()<<" xyzmin_tile "<array(pti); auto jy_arr = jy->array(pti); auto jz_arr = jz->array(pti); - - // auto const& jx_arr = jx->array(); - // auto const& jy_arr = jy->array(); - // auto const& jz_arr = jz->array(); + */ + + // const auto& jx_arr = local_jx[thread_num].array(); + // const auto& jy_arr = local_jy[thread_num].array(); + // const auto& jz_arr = local_jz[thread_num].array(); + + //Array4 const& jx_arr = local_jx[thread_num].array(tbx); + //Array4 const& jy_arr = local_jy[thread_num].array(tby); + //Array4 const& jz_arr = local_jz[thread_num].array(tbz); + Array4 const& jx_arr = local_jx[thread_num].array(); + Array4 const& jy_arr = local_jy[thread_num].array(); + Array4 const& jz_arr = local_jz[thread_num].array(); + + Dim3 lox = lbound(jx_arr); + Dim3 loy = lbound(jy_arr); + Dim3 loz = lbound(jz_arr); + /* + Dim3 loy = lbound(local_jy[thread_num]); + Dim3 loz = lbound(local_jz[thread_num]); + Dim3 hix = hbound(local_jx[thread_num]); + Dim3 hiy = hbound(local_jy[thread_num]); + Dim3 hiz = hbound(local_jz[thread_num]); + */ + Print()<<" lox "< Vx, Vy, Vz; //pti.GetPosition(Vx, Vy, Vz); @@ -446,7 +474,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, std::cout<<" vx "< Date: Tue, 9 Jul 2019 17:57:07 -0700 Subject: works for order 1, direct deposition 2D --- Examples/Tests/SingleParticle/Untitled.ipynb | 14 ++-- Examples/Tests/SingleParticle/inputs | 2 +- Source/Particles/WarpXParticleContainer.cpp | 115 +++++++++++++-------------- 3 files changed, 62 insertions(+), 69 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Examples/Tests/SingleParticle/Untitled.ipynb b/Examples/Tests/SingleParticle/Untitled.ipynb index 7a98be71a..497210df7 100644 --- a/Examples/Tests/SingleParticle/Untitled.ipynb +++ b/Examples/Tests/SingleParticle/Untitled.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 116, "metadata": {}, "outputs": [ { @@ -30,10 +30,10 @@ { "data": { "text/html": [ - "
" + "
" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -42,14 +42,14 @@ ], "source": [ "ds = yt.load( './old_diags/plt00001' ) # Create a dataset object\n", - "sl = yt.SlicePlot(ds, 2, 'jz', aspect=.4) # Create a sliceplot object\n", + "sl = yt.SlicePlot(ds, 2, 'jx', aspect=.4) # Create a sliceplot object\n", "sl.annotate_grids() # Show grids\n", "sl.show() # Show the plot" ] }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 117, "metadata": {}, "outputs": [ { @@ -63,10 +63,10 @@ { "data": { "text/html": [ - "
" + "
" ], "text/plain": [ - "" + "" ] }, "metadata": {}, diff --git a/Examples/Tests/SingleParticle/inputs b/Examples/Tests/SingleParticle/inputs index feecb996c..9f478cfa0 100644 --- a/Examples/Tests/SingleParticle/inputs +++ b/Examples/Tests/SingleParticle/inputs @@ -1,5 +1,5 @@ max_step = 10 -amr.n_cell = 8 16 +amr.n_cell = 24 24 amr.max_level = 0 amr.blocking_factor = 8 amr.max_grid_size = 16 diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index d383ab6f4..69f7aa3d0 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -292,7 +292,6 @@ WarpXParticleContainer::AddNParticles (int lev, * \param lev : Level of box that contains particles * \param depos_lev : Level on which particles deposit (if buffers are used) * \param dt : Time step for particle level - * \param ngJ : Number of ghosts cells (of lev) */ void WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, @@ -336,10 +335,10 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, // Lower corner of tile box physical domain // const std::array& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); // const std::array& xyzmin = xyzmin_tile; + tilebox.grow(ngJ); const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; // Print()<<" xyzmin_tile "<& xyzminx = WarpX::LowerCorner(tbx, depos_lev);; + const std::array& xyzminy = WarpX::LowerCorner(tby, depos_lev);; + const std::array& xyzminz = WarpX::LowerCorner(tbz, depos_lev);; + Print()<<"ngJ "<charge; Real stagger_shift = j_is_nodal ? 0.0 : 0.5; - Print()<<"xmin "<array(pti); */ - // const auto& jx_arr = local_jx[thread_num].array(); - // const auto& jy_arr = local_jy[thread_num].array(); - // const auto& jz_arr = local_jz[thread_num].array(); - - //Array4 const& jx_arr = local_jx[thread_num].array(tbx); - //Array4 const& jy_arr = local_jy[thread_num].array(tby); - //Array4 const& jz_arr = local_jz[thread_num].array(tbz); Array4 const& jx_arr = local_jx[thread_num].array(); Array4 const& jy_arr = local_jy[thread_num].array(); Array4 const& jz_arr = local_jz[thread_num].array(); - Dim3 lox = lbound(jx_arr); - Dim3 loy = lbound(jy_arr); - Dim3 loz = lbound(jz_arr); - /* - Dim3 loy = lbound(local_jy[thread_num]); - Dim3 loz = lbound(local_jz[thread_num]); - Dim3 hix = hbound(local_jx[thread_num]); - Dim3 hiy = hbound(local_jy[thread_num]); - Dim3 hiz = hbound(local_jz[thread_num]); - */ - Print()<<" lox "< Date: Tue, 9 Jul 2019 21:26:39 -0700 Subject: some cleaning --- Examples/Tests/SingleParticle/inputs | 4 +- Source/Diagnostics/WarpXIO.cpp | 1 - Source/Particles/WarpXParticleContainer.cpp | 248 +++++++++------------------- 3 files changed, 82 insertions(+), 171 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Examples/Tests/SingleParticle/inputs b/Examples/Tests/SingleParticle/inputs index 9f478cfa0..7ec1a659f 100644 --- a/Examples/Tests/SingleParticle/inputs +++ b/Examples/Tests/SingleParticle/inputs @@ -1,5 +1,5 @@ max_step = 10 -amr.n_cell = 24 24 +amr.n_cell = 8 16 amr.max_level = 0 amr.blocking_factor = 8 amr.max_grid_size = 16 @@ -24,5 +24,5 @@ electron.charge = -q_e electron.mass = 1.e10 electron.injection_style = "SingleParticle" electron.single_particle_pos = 4 0 6 -electron.single_particle_vel = -2 0 -5 # -2. 1. 4. +electron.single_particle_vel = -2 0 -5000 # -2. 1. 4. electron.single_particle_weight = 1.0 diff --git a/Source/Diagnostics/WarpXIO.cpp b/Source/Diagnostics/WarpXIO.cpp index 24272c588..38399bf9e 100644 --- a/Source/Diagnostics/WarpXIO.cpp +++ b/Source/Diagnostics/WarpXIO.cpp @@ -755,7 +755,6 @@ WarpX::WriteSlicePlotFile () const // writing plotfile // const std::string& slice_plotfilename = amrex::Concatenate(slice_plot_file,istep[0]); amrex::Print() << " Writing slice plotfile " << slice_plotfilename << "\n"; - const int ngrow = 0; Vector rfs; VisMF::Header::Version current_version = VisMF::GetHeaderVersion(); diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 69f7aa3d0..cd0c37bd8 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -310,7 +310,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, const long ngJ = jx->nGrow(); const std::array& dx = WarpX::CellSize(std::max(depos_lev,0)); - const long lvect = 8; int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal(); BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); @@ -332,13 +331,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Box tby = convert(tilebox, WarpX::jy_nodal_flag); Box tbz = convert(tilebox, WarpX::jz_nodal_flag); - // Lower corner of tile box physical domain - // const std::array& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); - // const std::array& xyzmin = xyzmin_tile; tilebox.grow(ngJ); - const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; - - // Print()<<" xyzmin_tile "<& xyzminx = WarpX::LowerCorner(tbx, depos_lev);; - const std::array& xyzminy = WarpX::LowerCorner(tby, depos_lev);; - const std::array& xyzminz = WarpX::LowerCorner(tbz, depos_lev);; - - Print()<<"ngJ "<charge; - Real stagger_shift = j_is_nodal ? 0.0 : 0.5; - // Print()<<"xmin "<array(pti); - auto jy_arr = jy->array(pti); - auto jz_arr = jz->array(pti); - */ - - Array4 const& jx_arr = local_jx[thread_num].array(); - Array4 const& jy_arr = local_jy[thread_num].array(); - Array4 const& jz_arr = local_jz[thread_num].array(); - - Print()<<"tbx"< Vx, Vy, Vz; - //pti.GetPosition(Vx, Vy, Vz); - //Real* AMREX_RESTRICT xp = Vx.dataPtr() + offset; - //Real* AMREX_RESTRICT zp = Vz.dataPtr() + offset; - - Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; - Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; - - Print()<<"np_to_depose "<charge; - Real* jx_ptr = local_jx[thread_num].dataPtr(); - Real* jy_ptr = local_jy[thread_num].dataPtr(); - Real* jz_ptr = local_jz[thread_num].dataPtr(); - - auto jxntot = local_jx[thread_num].length(); - auto jyntot = local_jy[thread_num].length(); - auto jzntot = local_jz[thread_num].length(); - - warpx_current_deposition( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &np_to_depose, - m_xp[thread_num].dataPtr() + offset, - m_yp[thread_num].dataPtr() + offset, - m_zp[thread_num].dataPtr() + offset, - uxp.dataPtr() + offset, - uyp.dataPtr() + offset, - uzp.dataPtr() + offset, - m_giv[thread_num].dataPtr() + offset, - wp.dataPtr() + offset, &this->charge, - &xyzmin[0], &xyzmin[1], &xyzmin[2], - &dt, &dx[0], &dx[1], &dx[2], - &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, - &lvect,&WarpX::current_deposition_algo); - } + // Lower corner of tile box physical domain + const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; + // xmin and zmin are built on pti.tilebox(), so it does + // not include staggering, so the stagger_shift has to be done by hand. + // Alternatively, we could define xminx from tbx (and the same for 3 + // directions and for jx, jy, jz). This way, sxo would not be needed. + // Better for memory? worth trying? + const Real xmin = xyzmin[0]; + const Real zmin = xyzmin[2]; + const Real stagger_shift = j_is_nodal ? 0.0 : 0.5; + + Array4 const& jx_arr = local_jx[thread_num].array(); + Array4 const& jy_arr = local_jy[thread_num].array(); + Array4 const& jz_arr = local_jz[thread_num].array(); + + const Dim3 lo = lbound(tilebox); + + Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; + Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; + + ParallelFor( np_to_depose, + [=] AMREX_GPU_DEVICE (long ip) { + Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + + Real vx = uxp[ip]*gaminv; + Real vy = uyp[ip]*gaminv; + Real vz = uzp[ip]*gaminv; + + Real wq=q*wp[ip]; + Real wqx=wq*invvol*vx; + Real wqy=wq*invvol*vy; + Real wqz=wq*invvol*vz; + + Real x = (Real) (xp[ip]-xmin)*dxi; + Real z = (Real) (zp[ip]-zmin)*dzi; + Real xmid=x-dts2dx*vx; + Real zmid=z-dts2dz*vz; + + int j = (int) xmid; + int l = (int) zmid; + int j0= (int) (xmid-stagger_shift); + int l0= (int) (zmid-stagger_shift); + + Real xint = xmid-j; + Real zint = zmid-l; + + Real sx[2] = {1.0 - xint, xint}; + Real sz[2] = {1.0 - zint, zint}; + + xint = xmid-stagger_shift-j0; + zint = zmid-stagger_shift-l0; + + Real sx0[2] = {1.0 - xint, xint}; + Real sz0[2] = {1.0 - zint, zint}; + + jx_arr(lo.x+j0 ,lo.y+l ,0) += sx0[0]*sz[0]*wqx; + jx_arr(lo.x+j0+1,lo.y+l ,0) += sx0[1]*sz[0]*wqx; + jx_arr(lo.x+j0 ,lo.y+l+1,0) += sx0[0]*sz[1]*wqx; + jx_arr(lo.x+j0+1,lo.y+l+1,0) += sx0[1]*sz[1]*wqx; + + jy_arr(lo.x+j ,lo.y+l ,0) += sx[0]*sz[0]*wqy; + jy_arr(lo.x+j+1,lo.y+l ,0) += sx[1]*sz[0]*wqy; + jy_arr(lo.x+j ,lo.y+l+1,0) += sx[0]*sz[1]*wqy; + jy_arr(lo.x+j+1,lo.y+l+1,0) += sx[1]*sz[1]*wqy; + + jz_arr(lo.x+j ,lo.y+l0 ,0) += sx[0]*sz0[0]*wqz; + jz_arr(lo.x+j+1,lo.y+l0 ,0) += sx[1]*sz0[0]*wqz; + jz_arr(lo.x+j ,lo.y+l0+1,0) += sx[0]*sz0[1]*wqz; + jz_arr(lo.x+j+1,lo.y+l0+1,0) += sx[1]*sz0[1]*wqz; + } + ); #ifdef WARPX_RZ // Rescale current in r-z mode warpx_current_deposition_rz_volume_scaling( -- cgit v1.2.3 From ffe120809d57edf2049f6266454638fe60795501 Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Wed, 10 Jul 2019 10:02:45 -0700 Subject: 2d and 3d version, order 1, with comments --- .../Physics_applications/uniform_plasma/inputs.2d | 35 ------- .../Physics_applications/uniform_plasma/inputs.3d | 9 +- Source/Particles/WarpXParticleContainer.cpp | 109 ++++++++++++++------- 3 files changed, 83 insertions(+), 70 deletions(-) delete mode 100644 Examples/Physics_applications/uniform_plasma/inputs.2d (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Examples/Physics_applications/uniform_plasma/inputs.2d b/Examples/Physics_applications/uniform_plasma/inputs.2d deleted file mode 100644 index 4304638dd..000000000 --- a/Examples/Physics_applications/uniform_plasma/inputs.2d +++ /dev/null @@ -1,35 +0,0 @@ -################################# -####### GENERAL PARAMETERS ###### -################################# -max_step = 10 -amr.n_cell = 128 128 -amr.max_grid_size = 64 -amr.blocking_factor = 32 -amr.max_level = 0 -geometry.coord_sys = 0 # 0: Cartesian -geometry.is_periodic = 1 1 # Is periodic? -geometry.prob_lo = -20.e-6 -20.e-6 # physical domain -geometry.prob_hi = 20.e-6 20.e-6 - -################################# -############ NUMERICS ########### -################################# -warpx.serialize_ics = 1 -warpx.verbose = 1 -warpx.cfl = 1.0 -amr.plot_int = 10 - -################################# -############ PLASMA ############# -################################# -particles.nspecies = 1 -particles.species_names = electrons - -electrons.charge = -q_e -electrons.mass = m_e -electrons.injection_style = "NUniformPerCell" -electrons.num_particles_per_cell_each_dim = 2 2 -electrons.profile = constant -electrons.density = 1.e25 # number of electrons per m^3 -electrons.momentum_distribution_type = "gaussian" -electrons.u_th = 0.01 # uth the std of the (unitless) momentum diff --git a/Examples/Physics_applications/uniform_plasma/inputs.3d b/Examples/Physics_applications/uniform_plasma/inputs.3d index 6792dac2c..20e47b0a2 100644 --- a/Examples/Physics_applications/uniform_plasma/inputs.3d +++ b/Examples/Physics_applications/uniform_plasma/inputs.3d @@ -19,6 +19,11 @@ warpx.verbose = 1 warpx.cfl = 1.0 amr.plot_int = 10 +algo.current_deposition = direct +interpolation.nox = 1 +interpolation.noy = 1 +interpolation.noz = 1 + ################################# ############ PLASMA ############# ################################# @@ -32,4 +37,6 @@ electrons.num_particles_per_cell_each_dim = 2 2 2 electrons.profile = constant electrons.density = 1.e25 # number of electrons per m^3 electrons.momentum_distribution_type = "gaussian" -electrons.u_th = 0.01 # uth the std of the (unitless) momentum +electrons.ux_th = 0.01 +electrons.uy_th = 0.01 +electrons.uz_th = 0.01 diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index cd0c37bd8..99596f8c3 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -366,9 +366,15 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, BL_PROFILE_VAR_START(blp_pxr_cd); Real dxi = 1.0/dx[0]; Real dzi = 1.0/dx[2]; - Real invvol = dxi*dzi; Real dts2dx = 0.5*dt*dxi; Real dts2dz = 0.5*dt*dzi; +#if (AMREX_SPACEDIM == 2) + Real invvol = dxi*dzi; +#else // (AMREX_SPACEDIM == 3) + Real dyi = 1.0/dx[1]; + Real dts2dy = 0.5*dt*dyi; + Real invvol = dxi*dyi*dzi; +#endif Real clightsq = 1.0/PhysConst::c/PhysConst::c; Real q = this->charge; @@ -392,59 +398,94 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; +#if (AMREX_SPACEDIM == 3) + Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset; + const Real ymin = xyzmin[1]; +#endif + ParallelFor( np_to_depose, [=] AMREX_GPU_DEVICE (long ip) { + + // macro-particle current in all 3D (even for 2D runs) + // wqx, wqy and wqz are the particle current in each + // direction Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + uyp[ip]*uyp[ip]*clightsq + uzp[ip]*uzp[ip]*clightsq); - + Real wq = q*wp[ip]; Real vx = uxp[ip]*gaminv; + Real wqx = wq*invvol*vx; Real vy = uyp[ip]*gaminv; + Real wqy = wq*invvol*vy; Real vz = uzp[ip]*gaminv; + Real wqz = wq*invvol*vz; + + // --- x direction + // particle position in cell unit (1/2 timestep back) + // Real x = (Real) (xp[ip]-xmin)*dxi; + // particle position in cell unit + // Real xmid=x-dts2dx*vx; + Real xmid= (Real) (xp[ip]-xmin)*dxi-dts2dx*vx; + // x index of cell containing particle (cell-centered) + int j = (int) xmid; + // x index of cell containing particle (node-centered) + int j0= (int) (xmid-stagger_shift); + // for the cell-centered quantities, compute current on + // each point + Real xint = xmid-j; + Real sx[2] = {1.0 - xint, xint}; + // for the node-centered quantities, compute current on + // each point + xint = xmid-stagger_shift-j0; + Real sx0[2] = {1.0 - xint, xint}; - Real wq=q*wp[ip]; - Real wqx=wq*invvol*vx; - Real wqy=wq*invvol*vy; - Real wqz=wq*invvol*vz; - - Real x = (Real) (xp[ip]-xmin)*dxi; +#if (AMREX_SPACEDIM == 3) + // --- y direction + Real y = (Real) (yp[ip]-ymin)*dyi; + Real ymid=y-dts2dy*vy; + int k = (int) ymid; + int k0= (int) (ymid-stagger_shift); + Real yint = ymid-k; + Real sy[2] = {1.0 - yint, yint}; + yint = ymid-stagger_shift-k0; + Real sy0[2] = {1.0 - yint, yint}; +#endif + // --- z direction Real z = (Real) (zp[ip]-zmin)*dzi; - Real xmid=x-dts2dx*vx; Real zmid=z-dts2dz*vz; - - int j = (int) xmid; int l = (int) zmid; - int j0= (int) (xmid-stagger_shift); int l0= (int) (zmid-stagger_shift); - - Real xint = xmid-j; Real zint = zmid-l; - - Real sx[2] = {1.0 - xint, xint}; Real sz[2] = {1.0 - zint, zint}; - - xint = xmid-stagger_shift-j0; zint = zmid-stagger_shift-l0; - - Real sx0[2] = {1.0 - xint, xint}; Real sz0[2] = {1.0 - zint, zint}; - jx_arr(lo.x+j0 ,lo.y+l ,0) += sx0[0]*sz[0]*wqx; - jx_arr(lo.x+j0+1,lo.y+l ,0) += sx0[1]*sz[0]*wqx; - jx_arr(lo.x+j0 ,lo.y+l+1,0) += sx0[0]*sz[1]*wqx; - jx_arr(lo.x+j0+1,lo.y+l+1,0) += sx0[1]*sz[1]*wqx; - - jy_arr(lo.x+j ,lo.y+l ,0) += sx[0]*sz[0]*wqy; - jy_arr(lo.x+j+1,lo.y+l ,0) += sx[1]*sz[0]*wqy; - jy_arr(lo.x+j ,lo.y+l+1,0) += sx[0]*sz[1]*wqy; - jy_arr(lo.x+j+1,lo.y+l+1,0) += sx[1]*sz[1]*wqy; - - jz_arr(lo.x+j ,lo.y+l0 ,0) += sx[0]*sz0[0]*wqz; - jz_arr(lo.x+j+1,lo.y+l0 ,0) += sx[1]*sz0[0]*wqz; - jz_arr(lo.x+j ,lo.y+l0+1,0) += sx[0]*sz0[1]*wqz; - jz_arr(lo.x+j+1,lo.y+l0+1,0) += sx[1]*sz0[1]*wqz; + // Real xbounds[2], ybounds[2], zbounds[2]; + // compute_shape_factor(xbounds, sx, xint); + // deposit_current() + +#if (AMREX_SPACEDIM == 2) + for (int iz=0; iz<2; iz++){ + for (int ix=0; ix<2; ix++){ + jx_arr(lo.x+j0+ix ,lo.y+l +iz ,0) += sx0[ix]*sz [iz]*wqx; + jy_arr(lo.x+j +ix ,lo.y+l +iz ,0) += sx [ix]*sz [iz]*wqy; + jz_arr(lo.x+j +ix ,lo.y+l0+iz ,0) += sx [ix]*sz0[iz]*wqz; + } + } +#else // (AMREX_SPACEDIM == 3) + for (int iz=0; iz<2; iz++){ + for (int iy=0; iy<2; iy++){ + for (int ix=0; ix<2; ix++){ + jx_arr(lo.x+j0+ix ,lo.y+k +iy ,lo.z+l +iz ) += sx0[ix]*sy [iy]*sz [iz]*wqx; + jy_arr(lo.x+j +ix ,lo.y+k0+iy ,lo.z+l +iz ) += sx [ix]*sy0[iy]*sz [iz]*wqy; + jz_arr(lo.x+j +ix ,lo.y+k +iy ,lo.z+l0+iz ) += sx [ix]*sy [iy]*sz0[iz]*wqz; + } + } + } +#endif } ); + #ifdef WARPX_RZ // Rescale current in r-z mode warpx_current_deposition_rz_volume_scaling( -- cgit v1.2.3 From c81d4af3bfcc4e2df4e012f8259fc26ecdb00f3a Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Wed, 10 Jul 2019 18:48:00 -0700 Subject: generic solution OK for 2/3D order 1 and 3. Issues with order 2 --- Source/Particles/Deposition/ComputeShapeFactors.H | 232 ++++++++++++++++++++++ Source/Particles/Deposition/Make.package | 3 + Source/Particles/Make.package | 1 + Source/Particles/WarpXParticleContainer.H | 4 +- Source/Particles/WarpXParticleContainer.cpp | 171 +++++----------- 5 files changed, 286 insertions(+), 125 deletions(-) create mode 100644 Source/Particles/Deposition/ComputeShapeFactors.H create mode 100644 Source/Particles/Deposition/Make.package (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/Deposition/ComputeShapeFactors.H b/Source/Particles/Deposition/ComputeShapeFactors.H new file mode 100644 index 000000000..22e6fb6a5 --- /dev/null +++ b/Source/Particles/Deposition/ComputeShapeFactors.H @@ -0,0 +1,232 @@ +#ifndef COMPUTESHAPEFACTORS_H_ +#define COMPUTESHAPEFACTORS_H_ + +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include + +//#include +//#include +//#include +//#include +//#include + +//#include +//#include +//#include +//#include +//#include +// #include +// #include +//#include "WarpXParticleContainer.H" +//#include +//#include + +using namespace amrex; + + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo(amrex::Dim3& shape_lo) {}; + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo <1> (amrex::Dim3& shape_lo) { + shape_lo = {0, 0, 0}; +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo <2> (amrex::Dim3& shape_lo) { + shape_lo = {-1, -1, -1}; +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo <3> (amrex::Dim3& shape_lo) { + shape_lo = {-1, -1, -1}; +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor(Real* sx, Real xint) {}; + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor <1> (Real* sx, Real xint){ + sx[0] = 1.0 - xint; + sx[1] = xint; +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor <2> (Real* sx, Real xint){ + Print()<<"compute_shape_factor <2>\n"; + sx[0] = 0.5*(0.5-xint)*(0.5-xint); + sx[1] = 0.75-xint*xint; + sx[2] = 0.5*(0.5+xint)*(0.5+xint); +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor <3> (Real* sx, Real xint){ + Print()<<"compute_shape_factor <2>\n"; + sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); + sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0); + sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); + sx[3] = 1.0/6.0*xint*xint*xint; +} + +template +void doDepositionShapeN(Real* xp, Real* yp, Real* zp, + Real* wp, Real* uxp, + Real* uyp, Real* uzp, + const amrex::Array4& jx_arr, + const amrex::Array4& jy_arr, + const amrex::Array4& jz_arr, + const long offset, const long np_to_depose, + const amrex::Real dt, const std::array& dx, + const std::array xyzmin, + const Dim3 lo, + const amrex::Real stagger_shift, + const amrex::Real q) +{ + Real dxi = 1.0/dx[0]; + Real dzi = 1.0/dx[2]; + Real dts2dx = 0.5*dt*dxi; + Real dts2dz = 0.5*dt*dzi; +#if (AMREX_SPACEDIM == 2) + Real invvol = dxi*dzi; +#else // (AMREX_SPACEDIM == 3) + Real dyi = 1.0/dx[1]; + Real dts2dy = 0.5*dt*dyi; + Real invvol = dxi*dyi*dzi; +#endif + + const Real xmin = xyzmin[0]; + const Real ymin = xyzmin[1]; + const Real zmin = xyzmin[2]; + + Print()<<"toto\n"; + + Dim3 shape_lo; + compute_shape_factor_lo(shape_lo); + Real clightsq = 1.0/PhysConst::c/PhysConst::c; + ParallelFor( np_to_depose, + [=] AMREX_GPU_DEVICE (long ip) { + + // macro-particle current in all 3D (even for 2D runs) + // wqx, wqy and wqz are the particle current in each + // direction + Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + Real wq = q*wp[ip]; + Real vx = uxp[ip]*gaminv; + Real wqx = wq*invvol*vx; + Real vy = uyp[ip]*gaminv; + Real wqy = wq*invvol*vy; + Real vz = uzp[ip]*gaminv; + Real wqz = wq*invvol*vz; + + // --- x direction + // particle position in cell unit (1/2 timestep back) + // Real x = (Real) (xp[ip]-xmin)*dxi; + // particle position in cell unit + // Real xmid=x-dts2dx*vx; + Real xmid= (Real) (xp[ip]-xmin)*dxi-dts2dx*vx; + // x index of cell containing particle (cell-centered) + int j = (int) xmid; + // x index of cell containing particle (node-centered) + int j0= (int) (xmid-stagger_shift); + // for the cell-centered quantities, compute current on + // each point + Real xint = xmid-j; + Real sx[depos_order + 1], sx0[depos_order + 1]; + // Real sx[2] = {1.0 - xint, xint}; + compute_shape_factor(sx, xint); + // for the node-centered quantities, compute current on + // each point + xint = xmid-stagger_shift-j0; + compute_shape_factor(sx0, xint); + // Real sx0[2] = {1.0 - xint, xint}; + // compute_shape_factor(sx0, xint); + +#if (AMREX_SPACEDIM == 3) + // --- y direction + Real y = (Real) (yp[ip]-ymin)*dyi; + Real ymid=y-dts2dy*vy; + int k = (int) ymid; + int k0= (int) (ymid-stagger_shift); + Real yint = ymid-k; + Real sy[depos_order + 1], sy0[depos_order + 1]; + compute_shape_factor(sy, yint); + // Real sy[2] = {1.0 - yint, yint}; + yint = ymid-stagger_shift-k0; + compute_shape_factor(sy0, yint); + //Real sy0[2] = {1.0 - yint, yint}; + // compute_shape_factor(shape_lo, shape_hi, sy, yint); +#endif + // --- z direction + Real z = (Real) (zp[ip]-zmin)*dzi; + Real zmid=z-dts2dz*vz; + int l = (int) zmid; + int l0= (int) (zmid-stagger_shift); + Real zint = zmid-l; + Real sz[depos_order + 1], sz0[depos_order + 1]; + compute_shape_factor(sz, zint); + // Real sz[2] = {1.0 - zint, zint}; + zint = zmid-stagger_shift-l0; + compute_shape_factor(sz0, zint); + // Real sz0[2] = {1.0 - zint, zint}; + // compute_shape_factor(shape_lo, shape_hi, sz, zint); +/* + #if (AMREX_SPACEDIM == 2) + for (int iz=shape_lo.y; iz<=shape_hi.y; iz++){ + for (int ix=shape_lo.x; ix<=shape_hi.x; ix++){ + jx_arr(lo.x+j0+ix ,lo.y+l +iz ,0) += sx0[ix]*sz [iz]*wqx; + jy_arr(lo.x+j +ix ,lo.y+l +iz ,0) += sx [ix]*sz [iz]*wqy; + jz_arr(lo.x+j +ix ,lo.y+l0+iz ,0) += sx [ix]*sz0[iz]*wqz; + } + } + #else // (AMREX_SPACEDIM == 3) + for (int iz=shape_lo.z; iz<=shape_hi.z; iz++){ + for (int iy=shape_lo.y; iy<=shape_hi.y; iy++){ + for (int ix=shape_lo.x; ix<=shape_hi.x; ix++){ + jx_arr(lo.x+j0+ix ,lo.y+k +iy ,lo.z+l +iz ) += sx0[ix]*sy [iy]*sz [iz]*wqx; + jy_arr(lo.x+j +ix ,lo.y+k0+iy ,lo.z+l +iz ) += sx [ix]*sy0[iy]*sz [iz]*wqy; + jz_arr(lo.x+j +ix ,lo.y+k +iy ,lo.z+l0+iz ) += sx [ix]*sy [iy]*sz0[iz]*wqz; + } + } + } + #endif +*/ +#if (AMREX_SPACEDIM == 2) + for (int iz=0; iz<=depos_order; iz++){ + for (int ix=0; ix<=depos_order; ix++){ + jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx0[ix]*sz [iz]*wqx; + jy_arr(lo.x+j +ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx [ix]*sz [iz]*wqy; + jz_arr(lo.x+j +ix+shape_lo.x, lo.y+l0+iz+shape_lo.y, 0) += sx [ix]*sz0[iz]*wqz; + } + } +#else // (AMREX_SPACEDIM == 3) + for (int iz=0; iz<=depos_order; iz++){ + for (int iy=0; iy<=depos_order; iy++){ + for (int ix=0; ix<=depos_order; ix++){ + jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx0[ix]*sy [iy]*sz [iz]*wqx; + jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx [ix]*sy0[iy]*sz [iz]*wqy; + jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z) += sx [ix]*sy [iy]*sz0[iz]*wqz; + } + } + } +#endif + } + ); +} + +#endif // COMPUTESHAPEFACTORS_H_ diff --git a/Source/Particles/Deposition/Make.package b/Source/Particles/Deposition/Make.package new file mode 100644 index 000000000..7eb4a5660 --- /dev/null +++ b/Source/Particles/Deposition/Make.package @@ -0,0 +1,3 @@ +CEXE_headers += ComputeShapeFactors.H +INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles/Deposition +VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles/Deposition diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package index f33095738..2038472a1 100644 --- a/Source/Particles/Make.package +++ b/Source/Particles/Make.package @@ -11,6 +11,7 @@ CEXE_headers += RigidInjectedParticleContainer.H CEXE_headers += PhysicalParticleContainer.H include $(WARPX_HOME)/Source/Particles/Pusher/Make.package +include $(WARPX_HOME)/Source/Particles/Deposition/Make.package INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Particles VPATH_LOCATIONS += $(WARPX_HOME)/Source/Particles diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H index 0e800bf1d..6182ec5bd 100644 --- a/Source/Particles/WarpXParticleContainer.H +++ b/Source/Particles/WarpXParticleContainer.H @@ -253,8 +253,8 @@ public: AddIntComp(comm); } - int DoBoostedFrameDiags () const { return do_boosted_frame_diags; } - + int DoBoostedFrameDiags () const { return do_boosted_frame_diags; } + protected: std::map particle_comps; diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 99596f8c3..93940b4f0 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -10,6 +10,7 @@ // Import low-level single-particle kernels #include #include +#include using namespace amrex; @@ -279,6 +280,22 @@ WarpXParticleContainer::AddNParticles (int lev, Redistribute(); } +//doDepositionShapeN<1>(wp, uxp, uyp, uzp, jx_arr, jy_arr, jz_arr, +// offset, np_to_depose, dt, dx, +// tilebox, stagger_shift, q); + +// template +/* +void WarpXParticleContainer::doDepositionShapeN(RealVector& wp, RealVector& uxp, + RealVector& uyp, RealVector& uzp, + Array4& jx_arr, + Array4& jy_arr, + Array4& jz_arr, + const long offset, const long np_to_depose, + const Real dt, const std::array& dx, + const Box tilebox, const Real stagger_shift, + const Real q) +*/ /* \brief Current Deposition for thread thread_num * \param pti : Particle iterator * \param wp : Array of particle weights @@ -311,6 +328,8 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, const long ngJ = jx->nGrow(); const std::array& dx = WarpX::CellSize(std::max(depos_lev,0)); int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal(); + Real q = this->charge; + const Real stagger_shift = j_is_nodal ? 0.0 : 0.5; BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); @@ -330,19 +349,14 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Box tbx = convert(tilebox, WarpX::jx_nodal_flag); Box tby = convert(tilebox, WarpX::jy_nodal_flag); Box tbz = convert(tilebox, WarpX::jz_nodal_flag); - tilebox.grow(ngJ); #ifdef AMREX_USE_GPU // No tiling on GPU: jx_ptr points to the full // jx array (same for jy_ptr and jz_ptr). - Real* jx_ptr = (*jx)[pti].dataPtr(); - Real* jy_ptr = (*jy)[pti].dataPtr(); - Real* jz_ptr = (*jz)[pti].dataPtr(); - - auto jxntot = jx[pti].length(); - auto jyntot = jy[pti].length(); - auto jzntot = jz[pti].length(); + Array4 const& jx_arr = jx.array(pti); + Array4 const& jy_arr = jy.array(pti); + Array4 const& jz_arr = jz.array(pti); #else // Tiling is on: jx_ptr points to local_jx[thread_num] // (same for jy_ptr and jz_ptr) @@ -359,132 +373,41 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, local_jy[thread_num].setVal(0.0); local_jz[thread_num].setVal(0.0); + Array4 const& jx_arr = local_jx[thread_num].array(); + Array4 const& jy_arr = local_jy[thread_num].array(); + Array4 const& jz_arr = local_jz[thread_num].array(); #endif // GPU, no tiling: deposit directly in jx // CPU, tiling: deposit into local_jx // (same for jx and jz) BL_PROFILE_VAR_START(blp_pxr_cd); - Real dxi = 1.0/dx[0]; - Real dzi = 1.0/dx[2]; - Real dts2dx = 0.5*dt*dxi; - Real dts2dz = 0.5*dt*dzi; -#if (AMREX_SPACEDIM == 2) - Real invvol = dxi*dzi; -#else // (AMREX_SPACEDIM == 3) - Real dyi = 1.0/dx[1]; - Real dts2dy = 0.5*dt*dyi; - Real invvol = dxi*dyi*dzi; -#endif - Real clightsq = 1.0/PhysConst::c/PhysConst::c; - Real q = this->charge; - - // Lower corner of tile box physical domain - const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; - // xmin and zmin are built on pti.tilebox(), so it does - // not include staggering, so the stagger_shift has to be done by hand. - // Alternatively, we could define xminx from tbx (and the same for 3 - // directions and for jx, jy, jz). This way, sxo would not be needed. - // Better for memory? worth trying? - const Real xmin = xyzmin[0]; - const Real zmin = xyzmin[2]; - const Real stagger_shift = j_is_nodal ? 0.0 : 0.5; - - Array4 const& jx_arr = local_jx[thread_num].array(); - Array4 const& jy_arr = local_jy[thread_num].array(); - Array4 const& jz_arr = local_jz[thread_num].array(); - - const Dim3 lo = lbound(tilebox); Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; - -#if (AMREX_SPACEDIM == 3) Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset; - const Real ymin = xyzmin[1]; -#endif - ParallelFor( np_to_depose, - [=] AMREX_GPU_DEVICE (long ip) { - - // macro-particle current in all 3D (even for 2D runs) - // wqx, wqy and wqz are the particle current in each - // direction - Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq - + uyp[ip]*uyp[ip]*clightsq - + uzp[ip]*uzp[ip]*clightsq); - Real wq = q*wp[ip]; - Real vx = uxp[ip]*gaminv; - Real wqx = wq*invvol*vx; - Real vy = uyp[ip]*gaminv; - Real wqy = wq*invvol*vy; - Real vz = uzp[ip]*gaminv; - Real wqz = wq*invvol*vz; - - // --- x direction - // particle position in cell unit (1/2 timestep back) - // Real x = (Real) (xp[ip]-xmin)*dxi; - // particle position in cell unit - // Real xmid=x-dts2dx*vx; - Real xmid= (Real) (xp[ip]-xmin)*dxi-dts2dx*vx; - // x index of cell containing particle (cell-centered) - int j = (int) xmid; - // x index of cell containing particle (node-centered) - int j0= (int) (xmid-stagger_shift); - // for the cell-centered quantities, compute current on - // each point - Real xint = xmid-j; - Real sx[2] = {1.0 - xint, xint}; - // for the node-centered quantities, compute current on - // each point - xint = xmid-stagger_shift-j0; - Real sx0[2] = {1.0 - xint, xint}; + // Lower corner of tile box physical domain + const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; + // xyzmin is built on pti.tilebox(), so it does + // not include staggering, so the stagger_shift has to be done by hand. + // Alternatively, we could define xyzminx from tbx (and the same for 3 + // directions and for jx, jy, jz). This way, sx0 would not be needed. + // Better for memory? worth trying? + const Dim3 lo = lbound(tilebox); -#if (AMREX_SPACEDIM == 3) - // --- y direction - Real y = (Real) (yp[ip]-ymin)*dyi; - Real ymid=y-dts2dy*vy; - int k = (int) ymid; - int k0= (int) (ymid-stagger_shift); - Real yint = ymid-k; - Real sy[2] = {1.0 - yint, yint}; - yint = ymid-stagger_shift-k0; - Real sy0[2] = {1.0 - yint, yint}; -#endif - // --- z direction - Real z = (Real) (zp[ip]-zmin)*dzi; - Real zmid=z-dts2dz*vz; - int l = (int) zmid; - int l0= (int) (zmid-stagger_shift); - Real zint = zmid-l; - Real sz[2] = {1.0 - zint, zint}; - zint = zmid-stagger_shift-l0; - Real sz0[2] = {1.0 - zint, zint}; - - // Real xbounds[2], ybounds[2], zbounds[2]; - // compute_shape_factor(xbounds, sx, xint); - // deposit_current() - -#if (AMREX_SPACEDIM == 2) - for (int iz=0; iz<2; iz++){ - for (int ix=0; ix<2; ix++){ - jx_arr(lo.x+j0+ix ,lo.y+l +iz ,0) += sx0[ix]*sz [iz]*wqx; - jy_arr(lo.x+j +ix ,lo.y+l +iz ,0) += sx [ix]*sz [iz]*wqy; - jz_arr(lo.x+j +ix ,lo.y+l0+iz ,0) += sx [ix]*sz0[iz]*wqz; - } - } -#else // (AMREX_SPACEDIM == 3) - for (int iz=0; iz<2; iz++){ - for (int iy=0; iy<2; iy++){ - for (int ix=0; ix<2; ix++){ - jx_arr(lo.x+j0+ix ,lo.y+k +iy ,lo.z+l +iz ) += sx0[ix]*sy [iy]*sz [iz]*wqx; - jy_arr(lo.x+j +ix ,lo.y+k0+iy ,lo.z+l +iz ) += sx [ix]*sy0[iy]*sz [iz]*wqy; - jz_arr(lo.x+j +ix ,lo.y+k +iy ,lo.z+l0+iz ) += sx [ix]*sy [iy]*sz0[iz]*wqz; - } - } - } -#endif - } - ); + if (WarpX::nox == 1){ + doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, + offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } else if (WarpX::nox == 2){ + doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, + offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } else if (WarpX::nox == 3){ + doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, + offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } #ifdef WARPX_RZ // Rescale current in r-z mode @@ -1029,3 +952,5 @@ WarpXParticleContainer::particlePostLocate(ParticleType& p, if (pld.m_lev == lev-1){ } } + + -- cgit v1.2.3 From f00c210bcf2e5c7a44f5cbe15f8767b3ec9973e0 Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Wed, 10 Jul 2019 18:57:42 -0700 Subject: rename kernel file --- Source/Particles/Deposition/ComputeShapeFactors.H | 195 ---------------------- Source/Particles/Deposition/CurrentDeposition.H | 195 ++++++++++++++++++++++ Source/Particles/WarpXParticleContainer.cpp | 2 +- 3 files changed, 196 insertions(+), 196 deletions(-) delete mode 100644 Source/Particles/Deposition/ComputeShapeFactors.H create mode 100644 Source/Particles/Deposition/CurrentDeposition.H (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/Deposition/ComputeShapeFactors.H b/Source/Particles/Deposition/ComputeShapeFactors.H deleted file mode 100644 index 283fc5160..000000000 --- a/Source/Particles/Deposition/ComputeShapeFactors.H +++ /dev/null @@ -1,195 +0,0 @@ -#ifndef COMPUTESHAPEFACTORS_H_ -#define COMPUTESHAPEFACTORS_H_ - -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include - -//#include -//#include -//#include -//#include -//#include - -//#include -//#include -//#include -//#include -//#include -// #include -// #include -//#include "WarpXParticleContainer.H" -//#include -//#include - -using namespace amrex; - - -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor_lo(amrex::Dim3& shape_lo) {}; - -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor_lo <1> (amrex::Dim3& shape_lo) { - shape_lo = {0, 0, 0}; -} - -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor_lo <2> (amrex::Dim3& shape_lo) { - shape_lo = {-1, -1, -1}; -} - -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor_lo <3> (amrex::Dim3& shape_lo) { - shape_lo = {-1, -1, -1}; -} - -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor(Real* sx, Real xint) {}; - -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor <1> (Real* sx, Real xint){ - sx[0] = 1.0 - xint; - sx[1] = xint; -} - -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor <2> (Real* sx, Real xint){ - Print()<<"compute_shape_factor <2>\n"; - sx[0] = 0.5*(0.5-xint)*(0.5-xint); - sx[1] = 0.75-xint*xint; - sx[2] = 0.5*(0.5+xint)*(0.5+xint); -} - -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -void compute_shape_factor <3> (Real* sx, Real xint){ - Print()<<"compute_shape_factor <2>\n"; - sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); - sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0); - sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); - sx[3] = 1.0/6.0*xint*xint*xint; -} - -template -void doDepositionShapeN(Real* xp, Real* yp, Real* zp, - Real* wp, Real* uxp, - Real* uyp, Real* uzp, - const amrex::Array4& jx_arr, - const amrex::Array4& jy_arr, - const amrex::Array4& jz_arr, - const long offset, const long np_to_depose, - const amrex::Real dt, const std::array& dx, - const std::array xyzmin, - const Dim3 lo, - const amrex::Real stagger_shift, - const amrex::Real q) -{ - Real dxi = 1.0/dx[0]; - Real dzi = 1.0/dx[2]; - Real dts2dx = 0.5*dt*dxi; - Real dts2dz = 0.5*dt*dzi; -#if (AMREX_SPACEDIM == 2) - Real invvol = dxi*dzi; -#else // (AMREX_SPACEDIM == 3) - Real dyi = 1.0/dx[1]; - Real dts2dy = 0.5*dt*dyi; - Real invvol = dxi*dyi*dzi; -#endif - - const Real xmin = xyzmin[0]; - const Real ymin = xyzmin[1]; - const Real zmin = xyzmin[2]; - - Dim3 shape_lo; - compute_shape_factor_lo(shape_lo); - Real clightsq = 1.0/PhysConst::c/PhysConst::c; - - ParallelFor( np_to_depose, - [=] AMREX_GPU_DEVICE (long ip) { - - // macro-particle current in all 3D (even for 2D runs) - // wqx, wqy and wqz are the particle current in each - // direction - Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq - + uyp[ip]*uyp[ip]*clightsq - + uzp[ip]*uzp[ip]*clightsq); - Real wq = q*wp[ip]; - Real vx = uxp[ip]*gaminv; - Real wqx = wq*invvol*vx; - Real vy = uyp[ip]*gaminv; - Real wqy = wq*invvol*vy; - Real vz = uzp[ip]*gaminv; - Real wqz = wq*invvol*vz; - - // --- x direction - // Get particle position after 1/2 push back in position - Real xmid= (Real) (xp[ip]-xmin)*dxi-dts2dx*vx; - // Initialize arrays for node-centered and cell-centered - // shape factors - Real sx[depos_order + 1], sx0[depos_order + 1]; - // Compute shape factors for node-centered quantities - int j = (int) xmid; - Real xint = xmid-j; - compute_shape_factor(sx, xint); - // Compute shape factors for cell-centered quantities - int j0= (int) (xmid-stagger_shift); - xint = xmid-stagger_shift-j0; - compute_shape_factor(sx0, xint); - -#if (AMREX_SPACEDIM == 3) - // --- y direction - Real ymid= (Real) (yp[ip]-ymin)*dyi-dts2dy*vy; - Real sy[depos_order + 1], sy0[depos_order + 1]; - int k = (int) ymid; - Real yint = ymid-k; - compute_shape_factor(sy, yint); - int k0= (int) (ymid-stagger_shift); - yint = ymid-stagger_shift-k0; - compute_shape_factor(sy0, yint); -#endif - // --- z direction - Real zmid=(Real) (zp[ip]-zmin)*dzi-dts2dz*vz; - Real sz[depos_order + 1], sz0[depos_order + 1]; - int l = (int) zmid; - Real zint = zmid-l; - compute_shape_factor(sz, zint); - int l0= (int) (zmid-stagger_shift); - zint = zmid-stagger_shift-l0; - compute_shape_factor(sz0, zint); - -#if (AMREX_SPACEDIM == 2) - for (int iz=0; iz<=depos_order; iz++){ - for (int ix=0; ix<=depos_order; ix++){ - jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx0[ix]*sz [iz]*wqx; - jy_arr(lo.x+j +ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx [ix]*sz [iz]*wqy; - jz_arr(lo.x+j +ix+shape_lo.x, lo.y+l0+iz+shape_lo.y, 0) += sx [ix]*sz0[iz]*wqz; - } - } -#else // (AMREX_SPACEDIM == 3) - for (int iz=0; iz<=depos_order; iz++){ - for (int iy=0; iy<=depos_order; iy++){ - for (int ix=0; ix<=depos_order; ix++){ - jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx0[ix]*sy [iy]*sz [iz]*wqx; - jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx [ix]*sy0[iy]*sz [iz]*wqy; - jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z) += sx [ix]*sy [iy]*sz0[iz]*wqz; - } - } - } -#endif - } - ); -} - -#endif // COMPUTESHAPEFACTORS_H_ diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H new file mode 100644 index 000000000..5e73dd9e0 --- /dev/null +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -0,0 +1,195 @@ +#ifndef CURRENTDEPOSITION_H_ +#define CURRENTDEPOSITION_H_ + +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include + +//#include +//#include +//#include +//#include +//#include + +//#include +//#include +//#include +//#include +//#include +// #include +// #include +//#include "WarpXParticleContainer.H" +//#include +//#include + +using namespace amrex; + + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo(amrex::Dim3& shape_lo) {}; + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo <1> (amrex::Dim3& shape_lo) { + shape_lo = {0, 0, 0}; +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo <2> (amrex::Dim3& shape_lo) { + shape_lo = {-1, -1, -1}; +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor_lo <3> (amrex::Dim3& shape_lo) { + shape_lo = {-1, -1, -1}; +} + +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor(Real* sx, Real xint) {}; + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor <1> (Real* sx, Real xint){ + sx[0] = 1.0 - xint; + sx[1] = xint; +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor <2> (Real* sx, Real xint){ + Print()<<"compute_shape_factor <2>\n"; + sx[0] = 0.5*(0.5-xint)*(0.5-xint); + sx[1] = 0.75-xint*xint; + sx[2] = 0.5*(0.5+xint)*(0.5+xint); +} + +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +void compute_shape_factor <3> (Real* sx, Real xint){ + Print()<<"compute_shape_factor <2>\n"; + sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); + sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0); + sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); + sx[3] = 1.0/6.0*xint*xint*xint; +} + +template +void doDepositionShapeN(Real* xp, Real* yp, Real* zp, + Real* wp, Real* uxp, + Real* uyp, Real* uzp, + const amrex::Array4& jx_arr, + const amrex::Array4& jy_arr, + const amrex::Array4& jz_arr, + const long offset, const long np_to_depose, + const amrex::Real dt, const std::array& dx, + const std::array xyzmin, + const Dim3 lo, + const amrex::Real stagger_shift, + const amrex::Real q) +{ + Real dxi = 1.0/dx[0]; + Real dzi = 1.0/dx[2]; + Real dts2dx = 0.5*dt*dxi; + Real dts2dz = 0.5*dt*dzi; +#if (AMREX_SPACEDIM == 2) + Real invvol = dxi*dzi; +#else // (AMREX_SPACEDIM == 3) + Real dyi = 1.0/dx[1]; + Real dts2dy = 0.5*dt*dyi; + Real invvol = dxi*dyi*dzi; +#endif + + const Real xmin = xyzmin[0]; + const Real ymin = xyzmin[1]; + const Real zmin = xyzmin[2]; + + Dim3 shape_lo; + compute_shape_factor_lo(shape_lo); + Real clightsq = 1.0/PhysConst::c/PhysConst::c; + + ParallelFor( np_to_depose, + [=] AMREX_GPU_DEVICE (long ip) { + + // macro-particle current in all 3D (even for 2D runs) + // wqx, wqy and wqz are the particle current in each + // direction + Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + Real wq = q*wp[ip]; + Real vx = uxp[ip]*gaminv; + Real wqx = wq*invvol*vx; + Real vy = uyp[ip]*gaminv; + Real wqy = wq*invvol*vy; + Real vz = uzp[ip]*gaminv; + Real wqz = wq*invvol*vz; + + // --- x direction + // Get particle position after 1/2 push back in position + Real xmid= (Real) (xp[ip]-xmin)*dxi-dts2dx*vx; + // Initialize arrays for node-centered and cell-centered + // shape factors + Real sx[depos_order + 1], sx0[depos_order + 1]; + // Compute shape factors for node-centered quantities + int j = (int) xmid; + Real xint = xmid-j; + compute_shape_factor(sx, xint); + // Compute shape factors for cell-centered quantities + int j0= (int) (xmid-stagger_shift); + xint = xmid-stagger_shift-j0; + compute_shape_factor(sx0, xint); + +#if (AMREX_SPACEDIM == 3) + // --- y direction + Real ymid= (Real) (yp[ip]-ymin)*dyi-dts2dy*vy; + Real sy[depos_order + 1], sy0[depos_order + 1]; + int k = (int) ymid; + Real yint = ymid-k; + compute_shape_factor(sy, yint); + int k0= (int) (ymid-stagger_shift); + yint = ymid-stagger_shift-k0; + compute_shape_factor(sy0, yint); +#endif + // --- z direction + Real zmid=(Real) (zp[ip]-zmin)*dzi-dts2dz*vz; + Real sz[depos_order + 1], sz0[depos_order + 1]; + int l = (int) zmid; + Real zint = zmid-l; + compute_shape_factor(sz, zint); + int l0= (int) (zmid-stagger_shift); + zint = zmid-stagger_shift-l0; + compute_shape_factor(sz0, zint); + +#if (AMREX_SPACEDIM == 2) + for (int iz=0; iz<=depos_order; iz++){ + for (int ix=0; ix<=depos_order; ix++){ + jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx0[ix]*sz [iz]*wqx; + jy_arr(lo.x+j +ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx [ix]*sz [iz]*wqy; + jz_arr(lo.x+j +ix+shape_lo.x, lo.y+l0+iz+shape_lo.y, 0) += sx [ix]*sz0[iz]*wqz; + } + } +#else // (AMREX_SPACEDIM == 3) + for (int iz=0; iz<=depos_order; iz++){ + for (int iy=0; iy<=depos_order; iy++){ + for (int ix=0; ix<=depos_order; ix++){ + jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx0[ix]*sy [iy]*sz [iz]*wqx; + jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx [ix]*sy0[iy]*sz [iz]*wqy; + jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z) += sx [ix]*sy [iy]*sz0[iz]*wqz; + } + } + } +#endif + } + ); +} + +#endif // CURRENTDEPOSITION_H_ diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 93940b4f0..eec633a37 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -10,7 +10,7 @@ // Import low-level single-particle kernels #include #include -#include +#include using namespace amrex; -- cgit v1.2.3 From 5baf28e5115f450d4f2fd821c33df5a37054792d Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Thu, 11 Jul 2019 09:07:26 -0700 Subject: remove print statements --- Source/Particles/PhysicalParticleContainer.cpp | 1 - Source/Particles/WarpXParticleContainer.cpp | 4 ---- 2 files changed, 5 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index b78d36c87..df08d77a7 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -1530,7 +1530,6 @@ PhysicalParticleContainer::Evolve (int lev, // Current Deposition // // Deposit inside domains - Print()<<"np_current "< Date: Thu, 11 Jul 2019 16:05:16 -0400 Subject: fix code for GPU. OK to machine precision for o1-2-3 --- Source/Particles/Deposition/CurrentDeposition.H | 12 ++++++--- Source/Particles/WarpXParticleContainer.cpp | 12 ++++----- Source/WarpX.H | 34 ++++++++++++------------- 3 files changed, 32 insertions(+), 26 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index 416cecd7b..cf6fe0d32 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -77,6 +77,8 @@ void doDepositionShapeN(Real* xp, Real* yp, Real* zp, const amrex::Real stagger_shift, const amrex::Real q) { + BL_PROFILE("PICSAR::CurrentDeposition"); + Real dxi = 1.0/dx[0]; Real dzi = 1.0/dx[2]; Real dts2dx = 0.5*dt*dxi; @@ -150,9 +152,13 @@ void doDepositionShapeN(Real* xp, Real* yp, Real* zp, for (int iz=0; iz<=depos_order; iz++){ for (int iy=0; iy<=depos_order; iy++){ for (int ix=0; ix<=depos_order; ix++){ - jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx0[ix]*sy [iy]*sz [iz]*wqx; - jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx [ix]*sy0[iy]*sz [iz]*wqy; - jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z) += sx [ix]*sy [iy]*sz0[iz]*wqz; + amrex::Gpu::Atomic::Add(&jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z), sx0[ix]*sy [iy]*sz [iz]*wqx); + amrex::Gpu::Atomic::Add(&jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z), sx [ix]*sy0[iy]*sz [iz]*wqy); + amrex::Gpu::Atomic::Add(&jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z), sx [ix]*sy [iy]*sz0[iz]*wqz); + // amrex::Gpu::Atomic::Add(&jx(j0+joff,k +koff,l +loff), sx0[joff]*sy [koff]*sz [loff]*wqx); + // jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx0[ix]*sy [iy]*sz [iz]*wqx; + // jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx [ix]*sy0[iy]*sz [iz]*wqy; + // jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z) += sx [ix]*sy [iy]*sz0[iz]*wqz; } } } diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 8f13b1b3e..6ea36c841 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -331,7 +331,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Real q = this->charge; const Real stagger_shift = j_is_nodal ? 0.0 : 0.5; - BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); // Get tile box where current is deposited. @@ -354,9 +353,12 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, #ifdef AMREX_USE_GPU // No tiling on GPU: jx_ptr points to the full // jx array (same for jy_ptr and jz_ptr). - Array4 const& jx_arr = jx.array(pti); - Array4 const& jy_arr = jy.array(pti); - Array4 const& jz_arr = jz.array(pti); + Array4 const& jx_arr = jx->array(pti); + Array4 const& jy_arr = jy->array(pti); + Array4 const& jz_arr = jz->array(pti); + //auto const& jx_arr = jx->array(pti); + //auto const& jy_arr = jy->array(pti); + //auto const& jz_arr = jz->array(pti); #else // Tiling is on: jx_ptr points to local_jx[thread_num] // (same for jy_ptr and jz_ptr) @@ -380,7 +382,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, // GPU, no tiling: deposit directly in jx // CPU, tiling: deposit into local_jx // (same for jx and jz) - BL_PROFILE_VAR_START(blp_pxr_cd); Real* AMREX_RESTRICT xp = m_xp[thread_num].dataPtr() + offset; Real* AMREX_RESTRICT zp = m_zp[thread_num].dataPtr() + offset; @@ -417,7 +418,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, jz_ptr, &ngJ, jzntot.getVect(), &xyzmin[0], &dx[0]); #endif - BL_PROFILE_VAR_STOP(blp_pxr_cd); #ifndef AMREX_USE_GPU BL_PROFILE_VAR_START(blp_accumulate); diff --git a/Source/WarpX.H b/Source/WarpX.H index 251270e21..97a69340a 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -251,6 +251,23 @@ public: void WriteSlicePlotFile () const; void ClearSliceMultiFabs (); + + static void ComputeDivB (amrex::MultiFab& divB, int dcomp, + const std::array& B, + const std::array& dx); + + static void ComputeDivB (amrex::MultiFab& divB, int dcomp, + const std::array& B, + const std::array& dx, int ngrow); + + static void ComputeDivE (amrex::MultiFab& divE, int dcomp, + const std::array& B, + const std::array& dx); + + static void ComputeDivE (amrex::MultiFab& divE, int dcomp, + const std::array& B, + const std::array& dx, int ngrow); + protected: //! Tagging cells for refinement @@ -383,23 +400,6 @@ private: std::array, 3> getInterpolatedE(int lev) const; std::array, 3> getInterpolatedB(int lev) const; - - static void ComputeDivB (amrex::MultiFab& divB, int dcomp, - const std::array& B, - const std::array& dx); - - static void ComputeDivB (amrex::MultiFab& divB, int dcomp, - const std::array& B, - const std::array& dx, int ngrow); - - static void ComputeDivE (amrex::MultiFab& divE, int dcomp, - const std::array& B, - const std::array& dx); - - static void ComputeDivE (amrex::MultiFab& divE, int dcomp, - const std::array& B, - const std::array& dx, int ngrow); - void SyncCurrent (const std::array& fine, const std::array< amrex::MultiFab*,3>& crse, int ref_ratio); -- cgit v1.2.3 From a944bdd6c5a4612e32aaefc894c232b958129bea Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Thu, 11 Jul 2019 14:07:54 -0700 Subject: keep option to use PICSAR current deposition --- Source/Particles/Deposition/CurrentDeposition.H | 108 +++++++++++---- Source/Particles/WarpXParticleContainer.cpp | 171 ++++++++++++++++++++---- Source/WarpX.H | 1 + Source/WarpX.cpp | 2 + 4 files changed, 226 insertions(+), 56 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index cf6fe0d32..006ff05fe 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -3,33 +3,45 @@ using namespace amrex; - +// Compute lower bound for current deposition shape factor: +// order 1: 0 +// order 2: -1 +// order 3: -1 +// non-type template parameter depos_order is the order for current deposition +// shape factor. Specialized templates are defined below for orders 1, 2 and 3. +// Currently, the shape factor is the same in all directions. template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void compute_shape_factor_lo(amrex::Dim3& shape_lo) {}; +// Compute lower bound for order 1. template <> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void compute_shape_factor_lo <1> (amrex::Dim3& shape_lo) { shape_lo = {0, 0, 0}; } +// Compute lower bound for order 2. template <> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void compute_shape_factor_lo <2> (amrex::Dim3& shape_lo) { shape_lo = {-1, -1, -1}; } +// Compute lower bound for order 3. template <> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void compute_shape_factor_lo <3> (amrex::Dim3& shape_lo) { shape_lo = {-1, -1, -1}; } +// Compute shape factor and return index of cell containing particle. +// Specialized templates are defined below for orders 1, 2 and 3. template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int compute_shape_factor(Real* sx, Real xint) {return 0;}; +// Compute shape factor for order 1. template <> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int compute_shape_factor <1> (Real* sx, Real xmid){ @@ -40,6 +52,7 @@ int compute_shape_factor <1> (Real* sx, Real xmid){ return j; } +// Compute shape factor for order 2. template <> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int compute_shape_factor <2> (Real* sx, Real xmid){ @@ -51,6 +64,7 @@ int compute_shape_factor <2> (Real* sx, Real xmid){ return j; } +// Compute shape factor for order 3. template <> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int compute_shape_factor <3> (Real* sx, Real xmid){ @@ -63,6 +77,23 @@ int compute_shape_factor <3> (Real* sx, Real xmid){ return j; } +/* \brief Current Deposition for thread thread_num + * /param xp, yp, zp : Pointer to arrays of particle positions. + * \param wp : Pointer to array of particle weights. + * \param uxp uyp uzp : Pointer to arrays of particle momentum. + * \param jx_arr : Array4 of current density, either full array or tile. + * \param jy_arr : Array4 of current density, either full array or tile. + * \param jz_arr : Array4 of current density, either full array or tile. + * \param offset : Index of first particle for which current is deposited + * \param np_to_depose : Number of particles for which current is deposited. + Particles [offset,offset+np_tp_depose] deposit current. + * \param dt : Time step for particle level + * \param dx : 3D cell size + * \param xyzmin : Physical lower bounds of domain. + * \param lo : Index lower bounds of domain. + * \param stagger_shift: 0 if nodal, 0.5 if staggered. + * /param q : species charge. + */ template void doDepositionShapeN(Real* xp, Real* yp, Real* zp, Real* wp, Real* uxp, @@ -102,63 +133,82 @@ void doDepositionShapeN(Real* xp, Real* yp, Real* zp, ParallelFor( np_to_depose, [=] AMREX_GPU_DEVICE (long ip) { - // macro-particle current in all 3D (even for 2D runs) - // wqx, wqy and wqz are the particle current in each - // direction Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq - + uyp[ip]*uyp[ip]*clightsq - + uzp[ip]*uzp[ip]*clightsq); - Real wq = q*wp[ip]; - Real vx = uxp[ip]*gaminv; + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + Real wq = q*wp[ip]; + Real vx = uxp[ip]*gaminv; + Real vy = uyp[ip]*gaminv; + Real vz = uzp[ip]*gaminv; + // wqx, wqy wqz are particle current in each direction Real wqx = wq*invvol*vx; - Real vy = uyp[ip]*gaminv; Real wqy = wq*invvol*vy; - Real vz = uzp[ip]*gaminv; Real wqz = wq*invvol*vz; // --- x direction // Get particle position after 1/2 push back in position - Real xmid= (Real) (xp[ip]-xmin)*dxi-dts2dx*vx; - // Initialize arrays for node-centered and cell-centered - // shape factors - Real sx[depos_order + 1], sx0[depos_order + 1]; + Real xmid = (Real) (xp[ip]-xmin)*dxi-dts2dx*vx; // Compute shape factors for node-centered quantities - int j = compute_shape_factor(sx, xmid); + Real sx [depos_order + 1]; + int j = compute_shape_factor(sx, xmid); // Compute shape factors for cell-centered quantities + Real sx0[depos_order + 1]; int j0 = compute_shape_factor(sx0, xmid-stagger_shift); #if (AMREX_SPACEDIM == 3) // --- y direction Real ymid= (Real) (yp[ip]-ymin)*dyi-dts2dy*vy; - Real sy[depos_order + 1], sy0[depos_order + 1]; - int k = compute_shape_factor(sy, ymid); + Real sy [depos_order + 1]; + int k = compute_shape_factor(sy, ymid); + Real sy0[depos_order + 1]; int k0 = compute_shape_factor(sy0, ymid-stagger_shift); #endif // --- z direction Real zmid=(Real) (zp[ip]-zmin)*dzi-dts2dz*vz; - Real sz[depos_order + 1], sz0[depos_order + 1]; - int l = compute_shape_factor(sz, zmid); + Real sz [depos_order + 1]; + int l = compute_shape_factor(sz, zmid); + Real sz0[depos_order + 1]; int l0 = compute_shape_factor(sz0, zmid-stagger_shift); #if (AMREX_SPACEDIM == 2) for (int iz=0; iz<=depos_order; iz++){ for (int ix=0; ix<=depos_order; ix++){ - jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx0[ix]*sz [iz]*wqx; - jy_arr(lo.x+j +ix+shape_lo.x, lo.y+l +iz+shape_lo.y, 0) += sx [ix]*sz [iz]*wqy; - jz_arr(lo.x+j +ix+shape_lo.x, lo.y+l0+iz+shape_lo.y, 0) += sx [ix]*sz0[iz]*wqz; + amrex::Gpu::Atomic::Add( + &jx_arr(lo.x+j0+ix+shape_lo.x, + lo.y+l +iz+shape_lo.y, + 0), + sx0[ix]*sz [iz]*wqx); + amrex::Gpu::Atomic::Add( + &jy_arr(lo.x+j +ix+shape_lo.x, + lo.y+l +iz+shape_lo.y, + 0), + sx [ix]*sz [iz]*wqy); + amrex::Gpu::Atomic::Add( + &jz_arr(lo.x+j +ix+shape_lo.x, + lo.y+l0+iz+shape_lo.y, + 0), + sx [ix]*sz0[iz]*wqz); } } #else // (AMREX_SPACEDIM == 3) for (int iz=0; iz<=depos_order; iz++){ for (int iy=0; iy<=depos_order; iy++){ for (int ix=0; ix<=depos_order; ix++){ - amrex::Gpu::Atomic::Add(&jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z), sx0[ix]*sy [iy]*sz [iz]*wqx); - amrex::Gpu::Atomic::Add(&jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z), sx [ix]*sy0[iy]*sz [iz]*wqy); - amrex::Gpu::Atomic::Add(&jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z), sx [ix]*sy [iy]*sz0[iz]*wqz); - // amrex::Gpu::Atomic::Add(&jx(j0+joff,k +koff,l +loff), sx0[joff]*sy [koff]*sz [loff]*wqx); - // jx_arr(lo.x+j0+ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx0[ix]*sy [iy]*sz [iz]*wqx; - // jy_arr(lo.x+j +ix+shape_lo.x, lo.y+k0+iy+shape_lo.y, lo.z+l +iz+shape_lo.z) += sx [ix]*sy0[iy]*sz [iz]*wqy; - // jz_arr(lo.x+j +ix+shape_lo.x, lo.y+k +iy+shape_lo.y, lo.z+l0+iz+shape_lo.z) += sx [ix]*sy [iy]*sz0[iz]*wqz; + amrex::Gpu::Atomic::Add( + &jx_arr(lo.x+j0+ix+shape_lo.x, + lo.y+k +iy+shape_lo.y, + lo.z+l +iz+shape_lo.z), + sx0[ix]*sy [iy]*sz [iz]*wqx); + amrex::Gpu::Atomic::Add( + &jy_arr(lo.x+j +ix+shape_lo.x, + lo.y+k0+iy+shape_lo.y, + lo.z+l +iz+shape_lo.z), + sx [ix]*sy0[iy]*sz [iz]*wqy); + amrex::Gpu::Atomic::Add( + &jz_arr(lo.x+j +ix+shape_lo.x, + lo.y+k +iy+shape_lo.y, + lo.z+l0+iz+shape_lo.z), + sx [ix]*sy [iy]*sz0[iz]*wqz); } } } diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 6ea36c841..42b99efcb 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -280,23 +280,7 @@ WarpXParticleContainer::AddNParticles (int lev, Redistribute(); } -//doDepositionShapeN<1>(wp, uxp, uyp, uzp, jx_arr, jy_arr, jz_arr, -// offset, np_to_depose, dt, dx, -// tilebox, stagger_shift, q); - -// template -/* -void WarpXParticleContainer::doDepositionShapeN(RealVector& wp, RealVector& uxp, - RealVector& uyp, RealVector& uzp, - Array4& jx_arr, - Array4& jy_arr, - Array4& jz_arr, - const long offset, const long np_to_depose, - const Real dt, const std::array& dx, - const Box tilebox, const Real stagger_shift, - const Real q) -*/ -/* \brief Current Deposition for thread thread_num +/* \brief Current Deposition for thread thread_num using PICSAR * \param pti : Particle iterator * \param wp : Array of particle weights * \param uxp uyp uzp : Array of particle @@ -311,6 +295,139 @@ void WarpXParticleContainer::doDepositionShapeN(RealVector& wp, RealVector& uxp, * \param dt : Time step for particle level */ void +WarpXParticleContainer::DepositCurrentFortran(WarpXParIter& pti, + RealVector& wp, RealVector& uxp, + RealVector& uyp, RealVector& uzp, + MultiFab* jx, MultiFab* jy, MultiFab* jz, + const long offset, const long np_to_depose, + int thread_num, int lev, int depos_lev, + Real dt) +{ + AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) || + (depos_lev==(lev )), + "Deposition buffers only work for lev-1"); + // If no particles, do not do anything + if (np_to_depose == 0) return; + + const long ngJ = jx->nGrow(); + const std::array& dx = WarpX::CellSize(std::max(depos_lev,0)); + const long lvect = 8; + int j_is_nodal = jx->is_nodal() and jy->is_nodal() and jz->is_nodal(); + + BL_PROFILE_VAR_NS("PICSAR::CurrentDeposition", blp_pxr_cd); + BL_PROFILE_VAR_NS("PPC::Evolve::Accumulate", blp_accumulate); + + // Get tile box where current is deposited. + // The tile box is different when depositing in the buffers (depos_lev& xyzmin_tile = WarpX::LowerCorner(tilebox, depos_lev); + const std::array& xyzmin = xyzmin_tile; + +#ifdef AMREX_USE_GPU + // No tiling on GPU: jx_ptr points to the full + // jx array (same for jy_ptr and jz_ptr). + Real* jx_ptr = (*jx)[pti].dataPtr(); + Real* jy_ptr = (*jy)[pti].dataPtr(); + Real* jz_ptr = (*jz)[pti].dataPtr(); + + auto jxntot = (*jx)[pti].length(); + auto jyntot = (*jy)[pti].length(); + auto jzntot = (*jz)[pti].length(); +#else + // Tiling is on: jx_ptr points to local_jx[thread_num] + // (same for jy_ptr and jz_ptr) + tbx.grow(ngJ); + tby.grow(ngJ); + tbz.grow(ngJ); + + local_jx[thread_num].resize(tbx); + local_jy[thread_num].resize(tby); + local_jz[thread_num].resize(tbz); + + Real* jx_ptr = local_jx[thread_num].dataPtr(); + Real* jy_ptr = local_jy[thread_num].dataPtr(); + Real* jz_ptr = local_jz[thread_num].dataPtr(); + + // local_jx[thread_num] is set to zero + local_jx[thread_num].setVal(0.0); + local_jy[thread_num].setVal(0.0); + local_jz[thread_num].setVal(0.0); + + auto jxntot = local_jx[thread_num].length(); + auto jyntot = local_jy[thread_num].length(); + auto jzntot = local_jz[thread_num].length(); +#endif + // GPU, no tiling: deposit directly in jx + // CPU, tiling: deposit into local_jx + // (same for jx and jz) + BL_PROFILE_VAR_START(blp_pxr_cd); + warpx_current_deposition( + jx_ptr, &ngJ, jxntot.getVect(), + jy_ptr, &ngJ, jyntot.getVect(), + jz_ptr, &ngJ, jzntot.getVect(), + &np_to_depose, + m_xp[thread_num].dataPtr() + offset, + m_yp[thread_num].dataPtr() + offset, + m_zp[thread_num].dataPtr() + offset, + uxp.dataPtr() + offset, + uyp.dataPtr() + offset, + uzp.dataPtr() + offset, + m_giv[thread_num].dataPtr() + offset, + wp.dataPtr() + offset, &this->charge, + &xyzmin[0], &xyzmin[1], &xyzmin[2], + &dt, &dx[0], &dx[1], &dx[2], + &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, + &lvect,&WarpX::current_deposition_algo); + +#ifdef WARPX_RZ + // Rescale current in r-z mode + warpx_current_deposition_rz_volume_scaling( + jx_ptr, &ngJ, jxntot.getVect(), + jy_ptr, &ngJ, jyntot.getVect(), + jz_ptr, &ngJ, jzntot.getVect(), + &xyzmin[0], &dx[0]); +#endif + BL_PROFILE_VAR_STOP(blp_pxr_cd); + +#ifndef AMREX_USE_GPU + BL_PROFILE_VAR_START(blp_accumulate); + // CPU, tiling: atomicAdd local_jx into jx + // (same for jx and jz) + (*jx)[pti].atomicAdd(local_jx[thread_num], tbx, tbx, 0, 0, 1); + (*jy)[pti].atomicAdd(local_jy[thread_num], tby, tby, 0, 0, 1); + (*jz)[pti].atomicAdd(local_jz[thread_num], tbz, tbz, 0, 0, 1); + BL_PROFILE_VAR_STOP(blp_accumulate); +#endif +} + +/* \brief Current Deposition for thread thread_num + * \param pti : Particle iterator + * \param wp : Array of particle weights + * \param uxp uyp uzp : Array of particle + * \param jx jy jz : Full array of current density + * \param offset : Index of first particle for which current is deposited + * \param np_to_depose: Number of particles for which current is deposited. + Particles [offset,offset+np_tp_depose] deposit current + * \param thread_num : Thread number (if tiling) + * \param lev : Level of box that contains particles + * \param depos_lev : Level on which particles deposit (if buffers are used) + * \param dt : Time step for particle level + */ +void WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, RealVector& wp, RealVector& uxp, RealVector& uyp, RealVector& uzp, @@ -356,9 +473,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Array4 const& jx_arr = jx->array(pti); Array4 const& jy_arr = jy->array(pti); Array4 const& jz_arr = jz->array(pti); - //auto const& jx_arr = jx->array(pti); - //auto const& jy_arr = jy->array(pti); - //auto const& jz_arr = jz->array(pti); #else // Tiling is on: jx_ptr points to local_jx[thread_num] // (same for jy_ptr and jz_ptr) @@ -396,17 +510,20 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, // Better for memory? worth trying? const Dim3 lo = lbound(tilebox); - if (WarpX::nox == 1){ - doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, - offset, np_to_depose, dt, dx, + if (WarpX::nox == 1){ + doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } else if (WarpX::nox == 2){ - doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, - offset, np_to_depose, dt, dx, + doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } else if (WarpX::nox == 3){ - doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, jz_arr, - offset, np_to_depose, dt, dx, + doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } diff --git a/Source/WarpX.H b/Source/WarpX.H index 97a69340a..d611aa39a 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -81,6 +81,7 @@ public: static amrex::Vector B_external; // Algorithms + static long use_picsar_deposition; static long current_deposition_algo; static long charge_deposition_algo; static long field_gathering_algo; diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 877882037..a24476312 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -37,6 +37,7 @@ Vector WarpX::boost_direction = {0,0,0}; int WarpX::do_compute_max_step_from_zmax = 0; Real WarpX::zmax_plasma_to_compute_max_step = 0.; +long WarpX::use_picsar_deposition = 1; long WarpX::current_deposition_algo; long WarpX::charge_deposition_algo; long WarpX::field_gathering_algo; @@ -484,6 +485,7 @@ WarpX::ReadParameters () { ParmParse pp("algo"); + pp.query("use_picsar_deposition", use_picsar_deposition) current_deposition_algo = GetAlgorithmInteger(pp, "current_deposition"); charge_deposition_algo = GetAlgorithmInteger(pp, "charge_deposition"); field_gathering_algo = GetAlgorithmInteger(pp, "field_gathering"); -- cgit v1.2.3 From 57d4f470b58203a978f02af0fb44142a71d57fd5 Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Thu, 11 Jul 2019 14:26:38 -0700 Subject: option use_picsar_deposition --- Source/Particles/Deposition/CurrentDeposition.H | 3 ++- Source/Particles/PhysicalParticleContainer.cpp | 32 ++++++++++++++++++------- Source/Particles/WarpXParticleContainer.H | 15 ++++++++++++ Source/Particles/WarpXParticleContainer.cpp | 2 +- Source/WarpX.cpp | 6 ++--- 5 files changed, 44 insertions(+), 14 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index 006ff05fe..66db003a3 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -109,7 +109,7 @@ void doDepositionShapeN(Real* xp, Real* yp, Real* zp, const amrex::Real q) { BL_PROFILE("PICSAR::CurrentDeposition"); - + Real dxi = 1.0/dx[0]; Real dzi = 1.0/dx[2]; Real dts2dx = 0.5*dt*dxi; @@ -130,6 +130,7 @@ void doDepositionShapeN(Real* xp, Real* yp, Real* zp, compute_shape_factor_lo(shape_lo); Real clightsq = 1.0/PhysConst::c/PhysConst::c; + // Loop over particles and deposit into jx_arr, jy_arr and jz_arr ParallelFor( np_to_depose, [=] AMREX_GPU_DEVICE (long ip) { diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index df08d77a7..86cd0827f 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -1529,16 +1529,30 @@ PhysicalParticleContainer::Evolve (int lev, // // Current Deposition // - // Deposit inside domains - DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, - 0, np_current, thread_num, - lev, lev, dt); - if (has_buffer){ - // Deposit in buffers - DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, - np_current, np-np_current, thread_num, - lev, lev-1, dt); + if (WarpX::use_picsar_deposition) { + // Deposit inside domains + DepositCurrentFortran(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, + 0, np_current, thread_num, + lev, lev, dt); + if (has_buffer){ + // Deposit in buffers + DepositCurrentFortran(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, + np_current, np-np_current, thread_num, + lev, lev-1, dt); + } + } else { + // Deposit inside domains + DepositCurrent(pti, wp, uxp, uyp, uzp, &jx, &jy, &jz, + 0, np_current, thread_num, + lev, lev, dt); + if (has_buffer){ + // Deposit in buffers + DepositCurrent(pti, wp, uxp, uyp, uzp, cjx, cjy, cjz, + np_current, np-np_current, thread_num, + lev, lev-1, dt); + } } + // // copy particle data back // diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H index 6182ec5bd..662b2e1b8 100644 --- a/Source/Particles/WarpXParticleContainer.H +++ b/Source/Particles/WarpXParticleContainer.H @@ -189,6 +189,21 @@ public: int depos_lev, amrex::Real dt); + virtual void DepositCurrentFortran(WarpXParIter& pti, + RealVector& wp, + RealVector& uxp, + RealVector& uyp, + RealVector& uzp, + amrex::MultiFab* jx, + amrex::MultiFab* jy, + amrex::MultiFab* jz, + const long offset, + const long np_to_depose, + int thread_num, + int lev, + int depos_lev, + amrex::Real dt); + // If particles start outside of the domain, ContinuousInjection // makes sure that they are initialized when they enter the domain, and // NOT before. Virtual function, overriden by derived classes. diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 42b99efcb..ac123f9b6 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -435,7 +435,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, const long offset, const long np_to_depose, int thread_num, int lev, int depos_lev, Real dt) -{ +{ AMREX_ALWAYS_ASSERT_WITH_MESSAGE((depos_lev==(lev-1)) || (depos_lev==(lev )), "Deposition buffers only work for lev-1"); diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 24011fb2f..fd4840b8d 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -485,11 +485,11 @@ WarpX::ReadParameters () { ParmParse pp("algo"); - pp.query("use_picsar_deposition", use_picsar_deposition) + pp.query("use_picsar_deposition", use_picsar_deposition); current_deposition_algo = GetAlgorithmInteger(pp, "current_deposition"); - if (!use_picsar_algo){ + if (!use_picsar_deposition){ AMREX_ALWAYS_ASSERT_WITH_MESSAGE( current_deposition_algo >= 2, - "if not use_picsar_deposition, cannot use Esirkepov deposition.") + "if not use_picsar_deposition, cannot use Esirkepov deposition."); } charge_deposition_algo = GetAlgorithmInteger(pp, "charge_deposition"); field_gathering_algo = GetAlgorithmInteger(pp, "field_gathering"); -- cgit v1.2.3 From 91ada99c4a2aaabf8c6789e60b37c83b9d2ba9f4 Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Thu, 11 Jul 2019 18:50:22 -0700 Subject: avoid compilation error when USE_RZ=TRUE --- Source/Particles/WarpXParticleContainer.cpp | 9 --------- 1 file changed, 9 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index ac123f9b6..7316dcc95 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -527,15 +527,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, xyzmin, lo, stagger_shift, q); } -#ifdef WARPX_RZ - // Rescale current in r-z mode - warpx_current_deposition_rz_volume_scaling( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &xyzmin[0], &dx[0]); -#endif - #ifndef AMREX_USE_GPU BL_PROFILE_VAR_START(blp_accumulate); // CPU, tiling: atomicAdd local_jx into jx -- cgit v1.2.3 From 70714d9712cb0c4e5f2bc842fb1fc6defb5d76cf Mon Sep 17 00:00:00 2001 From: Dave Grote Date: Thu, 18 Jul 2019 08:46:52 -0700 Subject: Initial conversion of Esirkepov deposition to c++ --- Source/Particles/Deposition/CurrentDeposition.H | 187 ++++++++++++++++++++++++ Source/Particles/WarpXParticleContainer.cpp | 50 +++++-- Source/WarpX.cpp | 4 - 3 files changed, 222 insertions(+), 19 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index efda97892..20493d8c6 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -176,4 +176,191 @@ void doDepositionShapeN(const Real * const xp, const Real * const yp, const Real ); } +// Compute shape factor and return index of leftmost cell where +// particle writes. +// Specialized templates are defined below for orders 1, 2 and 3. +template +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shifted_shape_factor(Real* const sx, Real x_old, int j_now) {return 0;}; + +// Compute shape factor for order 1. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shifted_shape_factor <1> (Real* const sx, Real x_old, int j_now){ + int j = (int) x_old; + int i_shift = j - j_now; + Real xint = x_old - j; + sx[1+i_shift] = 1.0 - xint; + sx[2+i_shift] = xint; + return j; +} + +// Compute shape factor for order 2. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shifted_shape_factor <2> (Real* const sx, Real x_old, int j_now){ + int j = (int) (x_old+0.5); + int i_shift = j - (j_now + 1); + Real xint = x_old - j; + sx[1+i_shift] = 0.5*(0.5-xint)*(0.5-xint); + sx[2+i_shift] = 0.75-xint*xint; + sx[3+i_shift] = 0.5*(0.5+xint)*(0.5+xint); + // index of the leftmost cell where particle deposits + return j-1; +} + +// Compute shape factor for order 3. +template <> +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +int compute_shifted_shape_factor <3> (Real* const sx, Real x_old, int j_now){ + int j = (int) x_old; + int i_shift = j - (j_now + 1); + Real xint = x_old - j; + sx[1+i_shift] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); + sx[2+i_shift] = 2.0/3.0-xint*xint*(1-xint/2.0); + sx[3+i_shift] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); + sx[4+i_shift] = 1.0/6.0*xint*xint*xint; + // index of the leftmost cell where particle deposits + return j-1; +} + +/* \brief Esirkepov Current Deposition for thread thread_num + * /param xp, yp, zp : Pointer to arrays of particle positions. + * \param wp : Pointer to array of particle weights. + * \param uxp uyp uzp : Pointer to arrays of particle momentum. + * \param jx_arr : Array4 of current density, either full array or tile. + * \param jy_arr : Array4 of current density, either full array or tile. + * \param jz_arr : Array4 of current density, either full array or tile. + * \param offset : Index of first particle for which current is deposited + * \param np_to_depose : Number of particles for which current is deposited. + Particles [offset,offset+np_tp_depose] deposit current. + * \param dt : Time step for particle level + * \param dx : 3D cell size + * \param xyzmin : Physical lower bounds of domain. + * \param lo : Index lower bounds of domain. + * /param q : species charge. + */ +template +void doEsirkepovDepositionShapeN(const Real * const xp, const Real * const yp, const Real * const zp, + const Real * const wp, const Real * const uxp, + const Real * const uyp, const Real * const uzp, + const amrex::Array4& jx_arr, + const amrex::Array4& jy_arr, + const amrex::Array4& jz_arr, + const long offset, const long np_to_depose, + const amrex::Real dt, const std::array& dx, + const std::array xyzmin, + const Dim3 lo, + const amrex::Real q) +{ + const Real dxi = 1.0/dx[0]; + const Real dyi = 1.0/dx[1]; + const Real dzi = 1.0/dx[2]; + const Real dtsdx0 = dt*dxi; + const Real dtsdy0 = dt*dyi; + const Real dtsdz0 = dt*dzi; + const Real invdtdx = 1.0/(dt*dx[1]*dx[2]); + const Real invdtdy = 1.0/(dt*dx[0]*dx[2]); + const Real invdtdz = 1.0/(dt*dx[0]*dx[1]); + + const Real xmin = xyzmin[0]; + const Real ymin = xyzmin[1]; + const Real zmin = xyzmin[2]; + const Real clightsq = 1.0/PhysConst::c/PhysConst::c; + + // Loop over particles and deposit into jx_arr, jy_arr and jz_arr + ParallelFor( np_to_depose, + [=] AMREX_GPU_DEVICE (long ip) { + // --- Get particle quantities + const Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + + // wqx, wqy wqz are particle current in each direction + const Real wq = q*wp[ip]; + const Real wqx = wq*invdtdx; + const Real wqy = wq*invdtdy; + const Real wqz = wq*invdtdz; + + // computes current position in grid units + const Real x_now = (xp[ip] - xmin)*dxi; + const Real y_now = (yp[ip] - ymin)*dyi; + const Real z_now = (zp[ip] - zmin)*dzi; + + // computes old position in grid units + const Real x_old = x_now - dtsdx0*uxp[ip]*gaminv; + const Real y_old = y_now - dtsdy0*uyp[ip]*gaminv; + const Real z_old = z_now - dtsdz0*uzp[ip]*gaminv; + + // Shape factor arrays + // Note that there are extra values above and below + // to possibly hold the factor for the old particle + // which can be at a different grid location. + Real AMREX_RESTRICT sx_now[depos_order + 3]; + Real AMREX_RESTRICT sy_now[depos_order + 3]; + Real AMREX_RESTRICT sz_now[depos_order + 3]; + Real AMREX_RESTRICT sx_old[depos_order + 3]; + Real AMREX_RESTRICT sy_old[depos_order + 3]; + Real AMREX_RESTRICT sz_old[depos_order + 3]; + + // --- Compute shape factors + // Compute shape factors for position as they are now and at old positions + // [jkl]_now: leftmost grid point that the particle touches + const int j_now = compute_shape_factor(sx_now+1, x_now); + const int k_now = compute_shape_factor(sy_now+1, y_now); + const int l_now = compute_shape_factor(sz_now+1, z_now); + + const int j_old = compute_shifted_shape_factor(sx_old, x_old, j_now); + const int k_old = compute_shifted_shape_factor(sy_old, y_old, k_now); + const int l_old = compute_shifted_shape_factor(sz_old, z_old, l_now); + + // computes min/max positions of current contributions + int djs = 1, dje = 1; + if (j_old < j_now) djs = 0; + if (j_old > j_now) dje = 0; + int dks = 1, dke = 1; + if (k_old < k_now) dks = 0; + if (k_old > k_now) dke = 0; + int dls = 1, dle = 1; + if (l_old < l_now) dls = 0; + if (l_old > l_now) dle = 0; + + std::cout << "JJJ " << depos_order << " " << lo.z << " " << l_now-1 << " " << zp[ip] << " " << zmin << " " << dx[2] << "\n"; + for (int l=dls; l<=depos_order+2-dle; l++){ + for (int k=dks; k<=depos_order+2-dke; k++){ + Real sdxi = 0.; + for (int j=djs; j<=depos_order+1-dje; j++){ + sdxi += wqx*(sx_old[j] - sx_now[j])*((sy_now[k] + 0.5*(sy_old[k] - sy_now[k]))*sz_now[l] + + (0.5*sy_now[k] + 1./3.*(sy_old[k] - sy_now[k]))*(sz_old[l] - sz_now[l])); + amrex::Gpu::Atomic::Add( &jx_arr(lo.x+j_now-1+j, lo.y+k_now-1+k, lo.z+l_now-1+l), sdxi); + } + } + } + + for (int l=dls; l<=depos_order+2-dle; l++){ + for (int j=djs; j<=depos_order+2-dje; j++){ + Real sdyj = 0.; + for (int k=dks; k<=depos_order+1-dke; k++){ + sdyj += wqy*(sy_old[k] - sy_now[k])*((sz_now[l] + 0.5*(sz_old[l] - sz_now[l]))*sx_now[l] + + (0.5*sz_now[l] + 1./3.*(sz_old[l] - sz_now[l]))*(sx_old[l] - sx_now[l])); + amrex::Gpu::Atomic::Add( &jy_arr(lo.x+j_now-1+j, lo.y+k_now-1+k, lo.z+l_now-1+l), sdyj); + } + } + } + + for (int k=dks; k<=depos_order+2-dke; k++){ + for (int j=djs; j<=depos_order+2-dje; j++){ + Real sdzk = 0.; + for (int l=dls; l<=depos_order+1-dle; l++){ + sdzk += wqz*(sz_old[l] - sz_now[l])*((sx_now[l] + 0.5*(sx_old[l] - sx_now[l]))*sy_now[k] + + (0.5*sx_now[l] + 1./3.*(sx_old[l] - sx_now[l]))*(sy_old[k] - sy_now[k])); + amrex::Gpu::Atomic::Add( &jz_arr(lo.x+j_now-1+j, lo.y+k_now-1+k, lo.z+l_now-1+l), sdzk); + } + } + } + } + ); + +} + #endif // CURRENTDEPOSITION_H_ diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 7316dcc95..5a3f9242f 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -6,6 +6,7 @@ #include #include #include +#include // Import low-level single-particle kernels #include @@ -510,21 +511,40 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, // Better for memory? worth trying? const Dim3 lo = lbound(tilebox); - if (WarpX::nox == 1){ - doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, - xyzmin, lo, stagger_shift, q); - } else if (WarpX::nox == 2){ - doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, - xyzmin, lo, stagger_shift, q); - } else if (WarpX::nox == 3){ - doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, - xyzmin, lo, stagger_shift, q); + if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) { + if (WarpX::nox == 1){ + doEsirkepovDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, + xyzmin, lo, q); + } else if (WarpX::nox == 2){ + doEsirkepovDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, + xyzmin, lo, q); + } else if (WarpX::nox == 3){ + doEsirkepovDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, + xyzmin, lo, q); + } + } else { + if (WarpX::nox == 1){ + doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } else if (WarpX::nox == 2){ + doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } else if (WarpX::nox == 3){ + doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), + uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + jz_arr, offset, np_to_depose, dt, dx, + xyzmin, lo, stagger_shift, q); + } } #ifndef AMREX_USE_GPU diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 08948227c..18d76c07e 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -492,10 +492,6 @@ WarpX::ReadParameters () pp.query("use_picsar_deposition", use_picsar_deposition); #endif current_deposition_algo = GetAlgorithmInteger(pp, "current_deposition"); - if (!use_picsar_deposition){ - AMREX_ALWAYS_ASSERT_WITH_MESSAGE( current_deposition_algo >= 2, - "if not use_picsar_deposition, cannot use Esirkepov deposition."); - } charge_deposition_algo = GetAlgorithmInteger(pp, "charge_deposition"); field_gathering_algo = GetAlgorithmInteger(pp, "field_gathering"); particle_pusher_algo = GetAlgorithmInteger(pp, "particle_pusher"); -- cgit v1.2.3 From a23454b5b3e8af636d0c44f0616150095c1a546e Mon Sep 17 00:00:00 2001 From: grote Date: Thu, 18 Jul 2019 17:26:15 -0700 Subject: conversion of Esirkepov deposition to c++ minor fix --- Source/Particles/Deposition/CurrentDeposition.H | 4 +--- Source/Particles/WarpXParticleContainer.cpp | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index d9f709567..a994eb158 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -231,9 +231,7 @@ int compute_shifted_shape_factor <3> (Real* const sx, Real x_old, int i_new){ * \param Jx_arr : Array4 of current density, either full array or tile. * \param Jy_arr : Array4 of current density, either full array or tile. * \param Jz_arr : Array4 of current density, either full array or tile. - * \param offset : Index of first particle for which current is deposited * \param np_to_depose : Number of particles for which current is deposited. - Particles [offset,offset+np_tp_depose] deposit current. * \param dt : Time step for particle level * \param dx : 3D cell size * \param xyzmin : Physical lower bounds of domain. @@ -247,7 +245,7 @@ void doEsirkepovDepositionShapeN (const Real * const xp, const Real * const yp, const amrex::Array4& Jx_arr, const amrex::Array4& Jy_arr, const amrex::Array4& Jz_arr, - const long offset, const long np_to_depose, + const long np_to_depose, const amrex::Real dt, const std::array& dx, const std::array xyzmin, const Dim3 lo, diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 5a3f9242f..a20f0035e 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -515,17 +515,17 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, if (WarpX::nox == 1){ doEsirkepovDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, + jz_arr, np_to_depose, dt, dx, xyzmin, lo, q); } else if (WarpX::nox == 2){ doEsirkepovDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, + jz_arr, np_to_depose, dt, dx, xyzmin, lo, q); } else if (WarpX::nox == 3){ doEsirkepovDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, + jz_arr, np_to_depose, dt, dx, xyzmin, lo, q); } } else { -- cgit v1.2.3 From a1b2a770f24c6469e0f773b7cd1d5231c7ed82a4 Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Tue, 30 Jul 2019 09:28:39 -0700 Subject: some cleaning in current deposition --- Source/Particles/Deposition/CurrentDeposition.H | 543 ++++++++++-------------- Source/Particles/Make.package | 1 + Source/Particles/WarpXParticleContainer.cpp | 6 +- 3 files changed, 232 insertions(+), 318 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index 97bc53c20..9b9853902 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -1,52 +1,7 @@ #ifndef CURRENTDEPOSITION_H_ #define CURRENTDEPOSITION_H_ -using namespace amrex; - -// Compute shape factor and return index of leftmost cell where -// particle writes. -// Specialized templates are defined below for orders 1, 2 and 3. -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shape_factor(Real* const sx, Real xint); - -// Compute shape factor for order 1. -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shape_factor <1> (Real* const sx, Real xmid){ - int j = (int) xmid; - Real xint = xmid-j; - sx[0] = 1.0 - xint; - sx[1] = xint; - return j; -} - -// Compute shape factor for order 2. -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shape_factor <2> (Real* const sx, Real xmid){ - int j = (int) (xmid+0.5); - Real xint = xmid-j; - sx[0] = 0.5*(0.5-xint)*(0.5-xint); - sx[1] = 0.75-xint*xint; - sx[2] = 0.5*(0.5+xint)*(0.5+xint); - // index of the leftmost cell where particle deposits - return j-1; -} - -// Compute shape factor for order 3. -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shape_factor <3> (Real* const sx, Real xmid){ - int j = (int) xmid; - Real xint = xmid-j; - sx[0] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); - sx[1] = 2.0/3.0-xint*xint*(1-xint/2.0); - sx[2] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); - sx[3] = 1.0/6.0*xint*xint*xint; - // index of the leftmost cell where particle deposits - return j-1; -} +#include "ShapeFactors.H" /* \brief Current Deposition for thread thread_num * /param xp, yp, zp : Pointer to arrays of particle positions. @@ -55,9 +10,7 @@ int compute_shape_factor <3> (Real* const sx, Real xmid){ * \param jx_arr : Array4 of current density, either full array or tile. * \param jy_arr : Array4 of current density, either full array or tile. * \param jz_arr : Array4 of current density, either full array or tile. - * \param offset : Index of first particle for which current is deposited * \param np_to_depose : Number of particles for which current is deposited. - Particles [offset,offset+np_tp_depose] deposit current. * \param dt : Time step for particle level * \param dx : 3D cell size * \param xyzmin : Physical lower bounds of domain. @@ -66,164 +19,121 @@ int compute_shape_factor <3> (Real* const sx, Real xmid){ * /param q : species charge. */ template -void doDepositionShapeN(const Real * const xp, const Real * const yp, const Real * const zp, - const Real * const wp, const Real * const uxp, - const Real * const uyp, const Real * const uzp, - const amrex::Array4& jx_arr, - const amrex::Array4& jy_arr, +void doDepositionShapeN(const amrex::Real * const xp, + const amrex::Real * const yp, + const amrex::Real * const zp, + const amrex::Real * const wp, + const amrex::Real * const uxp, + const amrex::Real * const uyp, + const amrex::Real * const uzp, + const amrex::Array4& jx_arr, + const amrex::Array4& jy_arr, const amrex::Array4& jz_arr, - const long offset, const long np_to_depose, - const amrex::Real dt, const std::array& dx, - const std::array xyzmin, - const Dim3 lo, + const long np_to_depose, const amrex::Real dt, + const std::array& dx, + const std::array xyzmin, + const amrex::Dim3 lo, const amrex::Real stagger_shift, const amrex::Real q) { - const Real dxi = 1.0/dx[0]; - const Real dzi = 1.0/dx[2]; - const Real dts2dx = 0.5*dt*dxi; - const Real dts2dz = 0.5*dt*dzi; + const amrex::Real dxi = 1.0/dx[0]; + const amrex::Real dzi = 1.0/dx[2]; + const amrex::Real dts2dx = 0.5*dt*dxi; + const amrex::Real dts2dz = 0.5*dt*dzi; #if (AMREX_SPACEDIM == 2) - const Real invvol = dxi*dzi; + const amrex::Real invvol = dxi*dzi; #else // (AMREX_SPACEDIM == 3) - const Real dyi = 1.0/dx[1]; - const Real dts2dy = 0.5*dt*dyi; - const Real invvol = dxi*dyi*dzi; + const amrex::Real dyi = 1.0/dx[1]; + const amrex::Real dts2dy = 0.5*dt*dyi; + const amrex::Real invvol = dxi*dyi*dzi; #endif - const Real xmin = xyzmin[0]; - const Real ymin = xyzmin[1]; - const Real zmin = xyzmin[2]; - const Real clightsq = 1.0/PhysConst::c/PhysConst::c; + const amrex::Real xmin = xyzmin[0]; + const amrex::Real ymin = xyzmin[1]; + const amrex::Real zmin = xyzmin[2]; + const amrex::Real clightsq = 1.0/PhysConst::c/PhysConst::c; // Loop over particles and deposit into jx_arr, jy_arr and jz_arr - ParallelFor( np_to_depose, - [=] AMREX_GPU_DEVICE (long ip) { - // --- Get particle quantities - const Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq - + uyp[ip]*uyp[ip]*clightsq - + uzp[ip]*uzp[ip]*clightsq); - const Real wq = q*wp[ip]; - const Real vx = uxp[ip]*gaminv; - const Real vy = uyp[ip]*gaminv; - const Real vz = uzp[ip]*gaminv; - // wqx, wqy wqz are particle current in each direction - const Real wqx = wq*invvol*vx; - const Real wqy = wq*invvol*vy; - const Real wqz = wq*invvol*vz; - - // --- Compute shape factors - // x direction - // Get particle position after 1/2 push back in position - const Real xmid = (xp[ip]-xmin)*dxi-dts2dx*vx; - // Compute shape factors for node-centered quantities - Real AMREX_RESTRICT sx [depos_order + 1]; - // j: leftmost grid point (node-centered) that the particle touches - const int j = compute_shape_factor(sx, xmid); - // Compute shape factors for cell-centered quantities - Real AMREX_RESTRICT sx0[depos_order + 1]; - // j0: leftmost grid point (cell-centered) that the particle touches - const int j0 = compute_shape_factor(sx0, xmid-stagger_shift); + amrex::ParallelFor( + np_to_depose, + [=] AMREX_GPU_DEVICE (long ip) { + // --- Get particle quantities + const amrex::Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + const amrex::Real wq = q*wp[ip]; + const amrex::Real vx = uxp[ip]*gaminv; + const amrex::Real vy = uyp[ip]*gaminv; + const amrex::Real vz = uzp[ip]*gaminv; + // wqx, wqy wqz are particle current in each direction + const amrex::Real wqx = wq*invvol*vx; + const amrex::Real wqy = wq*invvol*vy; + const amrex::Real wqz = wq*invvol*vz; + + // --- Compute shape factors + // x direction + // Get particle position after 1/2 push back in position + const amrex::Real xmid = (xp[ip]-xmin)*dxi-dts2dx*vx; + // Compute shape factors for node-centered quantities + amrex::Real AMREX_RESTRICT sx [depos_order + 1]; + // j: leftmost grid point (node-centered) that the particle touches + const int j = compute_shape_factor(sx, xmid); + // Compute shape factors for cell-centered quantities + amrex::Real AMREX_RESTRICT sx0[depos_order + 1]; + // j0: leftmost grid point (cell-centered) that the particle touches + const int j0 = compute_shape_factor(sx0, xmid-stagger_shift); #if (AMREX_SPACEDIM == 3) - // y direction - const Real ymid= (yp[ip]-ymin)*dyi-dts2dy*vy; - Real AMREX_RESTRICT sy [depos_order + 1]; - const int k = compute_shape_factor(sy, ymid); - Real AMREX_RESTRICT sy0[depos_order + 1]; - const int k0 = compute_shape_factor(sy0, ymid-stagger_shift); + // y direction + const amrex::Real ymid= (yp[ip]-ymin)*dyi-dts2dy*vy; + amrex::Real AMREX_RESTRICT sy [depos_order + 1]; + const int k = compute_shape_factor(sy, ymid); + amrex::Real AMREX_RESTRICT sy0[depos_order + 1]; + const int k0 = compute_shape_factor(sy0, ymid-stagger_shift); #endif - // z direction - const Real zmid= (zp[ip]-zmin)*dzi-dts2dz*vz; - Real AMREX_RESTRICT sz [depos_order + 1]; - const int l = compute_shape_factor(sz, zmid); - Real AMREX_RESTRICT sz0[depos_order + 1]; - const int l0 = compute_shape_factor(sz0, zmid-stagger_shift); - - // Deposit current into jx_arr, jy_arr and jz_arr + // z direction + const amrex::Real zmid= (zp[ip]-zmin)*dzi-dts2dz*vz; + amrex::Real AMREX_RESTRICT sz [depos_order + 1]; + const int l = compute_shape_factor(sz, zmid); + amrex::Real AMREX_RESTRICT sz0[depos_order + 1]; + const int l0 = compute_shape_factor(sz0, zmid-stagger_shift); + + // Deposit current into jx_arr, jy_arr and jz_arr #if (AMREX_SPACEDIM == 2) - for (int iz=0; iz<=depos_order; iz++){ - for (int ix=0; ix<=depos_order; ix++){ - amrex::Gpu::Atomic::Add( - &jx_arr(lo.x+j0+ix, lo.y+l +iz, 0), - sx0[ix]*sz [iz]*wqx); - amrex::Gpu::Atomic::Add( - &jy_arr(lo.x+j +ix, lo.y+l +iz, 0), - sx [ix]*sz [iz]*wqy); - amrex::Gpu::Atomic::Add( - &jz_arr(lo.x+j +ix, lo.y+l0+iz, 0), - sx [ix]*sz0[iz]*wqz); - } - } + for (int iz=0; iz<=depos_order; iz++){ + for (int ix=0; ix<=depos_order; ix++){ + amrex::Gpu::Atomic::Add( + &jx_arr(lo.x+j0+ix, lo.y+l +iz, 0), + sx0[ix]*sz [iz]*wqx); + amrex::Gpu::Atomic::Add( + &jy_arr(lo.x+j +ix, lo.y+l +iz, 0), + sx [ix]*sz [iz]*wqy); + amrex::Gpu::Atomic::Add( + &jz_arr(lo.x+j +ix, lo.y+l0+iz, 0), + sx [ix]*sz0[iz]*wqz); + } + } #else // (AMREX_SPACEDIM == 3) - for (int iz=0; iz<=depos_order; iz++){ - for (int iy=0; iy<=depos_order; iy++){ - for (int ix=0; ix<=depos_order; ix++){ - amrex::Gpu::Atomic::Add( - &jx_arr(lo.x+j0+ix, lo.y+k +iy, lo.z+l +iz), - sx0[ix]*sy [iy]*sz [iz]*wqx); - amrex::Gpu::Atomic::Add( - &jy_arr(lo.x+j +ix, lo.y+k0+iy, lo.z+l +iz), - sx [ix]*sy0[iy]*sz [iz]*wqy); - amrex::Gpu::Atomic::Add( - &jz_arr(lo.x+j +ix, lo.y+k +iy, lo.z+l0+iz), - sx [ix]*sy [iy]*sz0[iz]*wqz); - } - } - } + for (int iz=0; iz<=depos_order; iz++){ + for (int iy=0; iy<=depos_order; iy++){ + for (int ix=0; ix<=depos_order; ix++){ + amrex::Gpu::Atomic::Add( + &jx_arr(lo.x+j0+ix, lo.y+k +iy, lo.z+l +iz), + sx0[ix]*sy [iy]*sz [iz]*wqx); + amrex::Gpu::Atomic::Add( + &jy_arr(lo.x+j +ix, lo.y+k0+iy, lo.z+l +iz), + sx [ix]*sy0[iy]*sz [iz]*wqy); + amrex::Gpu::Atomic::Add( + &jz_arr(lo.x+j +ix, lo.y+k +iy, lo.z+l0+iz), + sx [ix]*sy [iy]*sz0[iz]*wqz); + } + } + } #endif - } + } ); } -// Compute shape factor and return index of leftmost cell where -// particle writes. -// Specialized templates are defined below for orders 1, 2 and 3. -template -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shifted_shape_factor (Real* const sx, const Real x_old, const int i_new); - -// Compute shape factor for order 1. -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shifted_shape_factor <1> (Real* const sx, const Real x_old, const int i_new){ - const int i = (int) x_old; - const int i_shift = i - i_new; - const Real xint = x_old - i; - sx[1+i_shift] = 1.0 - xint; - sx[2+i_shift] = xint; - return i; -} - -// Compute shape factor for order 2. -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shifted_shape_factor <2> (Real* const sx, const Real x_old, const int i_new){ - const int i = (int) (x_old+0.5); - const int i_shift = i - (i_new + 1); - const Real xint = x_old - i; - sx[1+i_shift] = 0.5*(0.5-xint)*(0.5-xint); - sx[2+i_shift] = 0.75-xint*xint; - sx[3+i_shift] = 0.5*(0.5+xint)*(0.5+xint); - // index of the leftmost cell where particle deposits - return i-1; -} - -// Compute shape factor for order 3. -template <> -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int compute_shifted_shape_factor <3> (Real* const sx, const Real x_old, const int i_new){ - const int i = (int) x_old; - const int i_shift = i - (i_new + 1); - const Real xint = x_old - i; - sx[1+i_shift] = 1.0/6.0*(1.0-xint)*(1.0-xint)*(1.0-xint); - sx[2+i_shift] = 2.0/3.0-xint*xint*(1-xint/2.0); - sx[3+i_shift] = 2.0/3.0-(1-xint)*(1-xint)*(1.0-0.5*(1-xint)); - sx[4+i_shift] = 1.0/6.0*xint*xint*xint; - // index of the leftmost cell where particle deposits - return i-1; -} - /* \brief Esirkepov Current Deposition for thread thread_num * /param xp, yp, zp : Pointer to arrays of particle positions. * \param wp : Pointer to array of particle weights. @@ -239,170 +149,173 @@ int compute_shifted_shape_factor <3> (Real* const sx, const Real x_old, const in * /param q : species charge. */ template -void doEsirkepovDepositionShapeN (const Real * const xp, const Real * const yp, const Real * const zp, - const Real * const wp, const Real * const uxp, - const Real * const uyp, const Real * const uzp, +void doEsirkepovDepositionShapeN (const amrex::Real * const xp, + const amrex::Real * const yp, + const amrex::Real * const zp, + const amrex::Real * const wp, + const amrex::Real * const uxp, + const amrex::Real * const uyp, + const amrex::Real * const uzp, const amrex::Array4& Jx_arr, const amrex::Array4& Jy_arr, const amrex::Array4& Jz_arr, const long np_to_depose, - const amrex::Real dt, const std::array& dx, - const std::array xyzmin, - const Dim3 lo, + const amrex::Real dt, + const std::array& dx, + const std::array xyzmin, + const amrex::Dim3 lo, const amrex::Real q) { - const Real dxi = 1.0/dx[0]; - const Real dtsdx0 = dt*dxi; - const Real xmin = xyzmin[0]; + const amrex::Real dxi = 1.0/dx[0]; + const amrex::Real dtsdx0 = dt*dxi; + const amrex::Real xmin = xyzmin[0]; #if (AMREX_SPACEDIM == 3) - const Real dyi = 1.0/dx[1]; - const Real dtsdy0 = dt*dyi; - const Real ymin = xyzmin[1]; + const amrex::Real dyi = 1.0/dx[1]; + const amrex::Real dtsdy0 = dt*dyi; + const amrex::Real ymin = xyzmin[1]; #endif - const Real dzi = 1.0/dx[2]; - const Real dtsdz0 = dt*dzi; - const Real zmin = xyzmin[2]; + const amrex::Real dzi = 1.0/dx[2]; + const amrex::Real dtsdz0 = dt*dzi; + const amrex::Real zmin = xyzmin[2]; #if (AMREX_SPACEDIM == 3) - const Real invdtdx = 1.0/(dt*dx[1]*dx[2]); - const Real invdtdy = 1.0/(dt*dx[0]*dx[2]); - const Real invdtdz = 1.0/(dt*dx[0]*dx[1]); + const amrex::Real invdtdx = 1.0/(dt*dx[1]*dx[2]); + const amrex::Real invdtdy = 1.0/(dt*dx[0]*dx[2]); + const amrex::Real invdtdz = 1.0/(dt*dx[0]*dx[1]); #elif (AMREX_SPACEDIM == 2) - const Real invdtdx = 1.0/(dt*dx[2]); - const Real invdtdz = 1.0/(dt*dx[0]); - const Real invvol = 1.0/(dx[0]*dx[2]); + const amrex::Real invdtdx = 1.0/(dt*dx[2]); + const amrex::Real invdtdz = 1.0/(dt*dx[0]); + const amrex::Real invvol = 1.0/(dx[0]*dx[2]); #endif - const Real clightsq = 1.0/PhysConst::c/PhysConst::c; + const amrex::Real clightsq = 1.0/PhysConst::c/PhysConst::c; // Loop over particles and deposit into Jx_arr, Jy_arr and Jz_arr - ParallelFor( np_to_depose, - [=] AMREX_GPU_DEVICE (long ip) { - - // --- Get particle quantities - const Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq - + uyp[ip]*uyp[ip]*clightsq - + uzp[ip]*uzp[ip]*clightsq); - - // wqx, wqy wqz are particle current in each direction - const Real wq = q*wp[ip]; - const Real wqx = wq*invdtdx; + amrex::ParallelFor( + np_to_depose, + [=] AMREX_GPU_DEVICE (long ip) { + + // --- Get particle quantities + const amrex::Real gaminv = 1.0/std::sqrt(1.0 + uxp[ip]*uxp[ip]*clightsq + + uyp[ip]*uyp[ip]*clightsq + + uzp[ip]*uzp[ip]*clightsq); + + // wqx, wqy wqz are particle current in each direction + const amrex::Real wq = q*wp[ip]; + const amrex::Real wqx = wq*invdtdx; #if (AMREX_SPACEDIM == 3) - const Real wqy = wq*invdtdy; + const amrex::Real wqy = wq*invdtdy; #endif - const Real wqz = wq*invdtdz; + const amrex::Real wqz = wq*invdtdz; - // computes current and old position in grid units - const Real x_new = (xp[ip] - xmin)*dxi; - const Real x_old = x_new - dtsdx0*uxp[ip]*gaminv; + // computes current and old position in grid units + const amrex::Real x_new = (xp[ip] - xmin)*dxi; + const amrex::Real x_old = x_new - dtsdx0*uxp[ip]*gaminv; #if (AMREX_SPACEDIM == 3) - const Real y_new = (yp[ip] - ymin)*dyi; - const Real y_old = y_new - dtsdy0*uyp[ip]*gaminv; + const amrex::Real y_new = (yp[ip] - ymin)*dyi; + const amrex::Real y_old = y_new - dtsdy0*uyp[ip]*gaminv; #endif - const Real z_new = (zp[ip] - zmin)*dzi; - const Real z_old = z_new - dtsdz0*uzp[ip]*gaminv; - - // Shape factor arrays - // Note that there are extra values above and below - // to possibly hold the factor for the old particle - // which can be at a different grid location. - Real AMREX_RESTRICT sx_new[depos_order + 3] = {0.}; - Real AMREX_RESTRICT sx_old[depos_order + 3] = {0.}; + const amrex::Real z_new = (zp[ip] - zmin)*dzi; + const amrex::Real z_old = z_new - dtsdz0*uzp[ip]*gaminv; + + // Shape factor arrays + // Note that there are extra values above and below + // to possibly hold the factor for the old particle + // which can be at a different grid location. + amrex::Real AMREX_RESTRICT sx_new[depos_order + 3] = {0.}; + amrex::Real AMREX_RESTRICT sx_old[depos_order + 3] = {0.}; #if (AMREX_SPACEDIM == 3) - Real AMREX_RESTRICT sy_new[depos_order + 3] = {0.}; - Real AMREX_RESTRICT sy_old[depos_order + 3] = {0.}; + amrex::Real AMREX_RESTRICT sy_new[depos_order + 3] = {0.}; + amrex::Real AMREX_RESTRICT sy_old[depos_order + 3] = {0.}; #endif - Real AMREX_RESTRICT sz_new[depos_order + 3] = {0.}; - Real AMREX_RESTRICT sz_old[depos_order + 3] = {0.}; - - // --- Compute shape factors - // Compute shape factors for position as they are now and at old positions - // [ijk]_new: leftmost grid point that the particle touches - const int i_new = compute_shape_factor(sx_new+1, x_new); - const int i_old = compute_shifted_shape_factor(sx_old, x_old, i_new); + amrex::Real AMREX_RESTRICT sz_new[depos_order + 3] = {0.}; + amrex::Real AMREX_RESTRICT sz_old[depos_order + 3] = {0.}; + + // --- Compute shape factors + // Compute shape factors for position as they are now and at old positions + // [ijk]_new: leftmost grid point that the particle touches + const int i_new = compute_shape_factor(sx_new+1, x_new); + const int i_old = compute_shifted_shape_factor(sx_old, x_old, i_new); #if (AMREX_SPACEDIM == 3) - const int j_new = compute_shape_factor(sy_new+1, y_new); - const int j_old = compute_shifted_shape_factor(sy_old, y_old, j_new); + const int j_new = compute_shape_factor(sy_new+1, y_new); + const int j_old = compute_shifted_shape_factor(sy_old, y_old, j_new); #endif - const int k_new = compute_shape_factor(sz_new+1, z_new); - const int k_old = compute_shifted_shape_factor(sz_old, z_old, k_new); + const int k_new = compute_shape_factor(sz_new+1, z_new); + const int k_old = compute_shifted_shape_factor(sz_old, z_old, k_new); - // computes min/max positions of current contributions - int dil = 1, diu = 1; - if (i_old < i_new) dil = 0; - if (i_old > i_new) diu = 0; + // computes min/max positions of current contributions + int dil = 1, diu = 1; + if (i_old < i_new) dil = 0; + if (i_old > i_new) diu = 0; #if (AMREX_SPACEDIM == 3) - int djl = 1, dju = 1; - if (j_old < j_new) djl = 0; - if (j_old > j_new) dju = 0; + int djl = 1, dju = 1; + if (j_old < j_new) djl = 0; + if (j_old > j_new) dju = 0; #endif - int dkl = 1, dku = 1; - if (k_old < k_new) dkl = 0; - if (k_old > k_new) dku = 0; + int dkl = 1, dku = 1; + if (k_old < k_new) dkl = 0; + if (k_old > k_new) dku = 0; #if (AMREX_SPACEDIM == 3) - for (int k=dkl; k<=depos_order+2-dku; k++) { - for (int j=djl; j<=depos_order+2-dju; j++) { - Real sdxi = 0.; - for (int i=dil; i<=depos_order+1-diu; i++) { - sdxi += wqx*(sx_old[i] - sx_new[i])*((sy_new[j] + 0.5*(sy_old[j] - sy_new[j]))*sz_new[k] + - (0.5*sy_new[j] + 1./3.*(sy_old[j] - sy_new[j]))*(sz_old[k] - sz_new[k])); - amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi); - } - } - } - for (int k=dkl; k<=depos_order+2-dku; k++) { - for (int i=dil; i<=depos_order+2-diu; i++) { - Real sdyj = 0.; - for (int j=djl; j<=depos_order+1-dju; j++) { - sdyj += wqy*(sy_old[j] - sy_new[j])*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] + - (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i])); - amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj); - } - } - } - for (int j=djl; j<=depos_order+2-dju; j++) { - for (int i=dil; i<=depos_order+2-diu; i++) { - Real sdzk = 0.; - for (int k=dkl; k<=depos_order+1-dku; k++) { - sdzk += wqz*(sz_old[k] - sz_new[k])*((sx_new[i] + 0.5*(sx_old[i] - sx_new[i]))*sy_new[j] + - (0.5*sx_new[i] + 1./3.*(sx_old[i] - sx_new[i]))*(sy_old[j] - sy_new[j])); - amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk); - } - } - } - -#elif (AMREX_SPACEDIM == 2) - - for (int k=dkl; k<=depos_order+2-dku; k++) { - Real sdxi = 0.; - for (int i=dil; i<=depos_order+1-diu; i++) { - sdxi += wqx*(sx_old[i] - sx_new[i])*(sz_new[k] + 0.5*(sz_old[k] - sz_new[k])); - amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdxi); - } + for (int k=dkl; k<=depos_order+2-dku; k++) { + for (int j=djl; j<=depos_order+2-dju; j++) { + amrex::Real sdxi = 0.; + for (int i=dil; i<=depos_order+1-diu; i++) { + sdxi += wqx*(sx_old[i] - sx_new[i])*((sy_new[j] + 0.5*(sy_old[j] - sy_new[j]))*sz_new[k] + + (0.5*sy_new[j] + 1./3.*(sy_old[j] - sy_new[j]))*(sz_old[k] - sz_new[k])); + amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdxi); } - for (int k=dkl; k<=depos_order+2-dku; k++) { - for (int i=dil; i<=depos_order+2-diu; i++) { - const Real sdyj = wq*uyp[ip]*gaminv*invvol*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] + - (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i])); - amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdyj); - } + } + } + for (int k=dkl; k<=depos_order+2-dku; k++) { + for (int i=dil; i<=depos_order+2-diu; i++) { + amrex::Real sdyj = 0.; + for (int j=djl; j<=depos_order+1-dju; j++) { + sdyj += wqy*(sy_old[j] - sy_new[j])*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] + + (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i])); + amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdyj); } - for (int i=dil; i<=depos_order+2-diu; i++) { - Real sdzk = 0.; - for (int k=dkl; k<=depos_order+1-dku; k++) { - sdzk += wqz*(sz_old[k] - sz_new[k])*(sx_new[i] + 0.5*(sx_old[i] - sx_new[i])); - amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdzk); - } + } + } + for (int j=djl; j<=depos_order+2-dju; j++) { + for (int i=dil; i<=depos_order+2-diu; i++) { + amrex::Real sdzk = 0.; + for (int k=dkl; k<=depos_order+1-dku; k++) { + sdzk += wqz*(sz_old[k] - sz_new[k])*((sx_new[i] + 0.5*(sx_old[i] - sx_new[i]))*sy_new[j] + + (0.5*sx_new[i] + 1./3.*(sx_old[i] - sx_new[i]))*(sy_old[j] - sy_new[j])); + amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+j_new-1+j, lo.z+k_new-1+k), sdzk); } + } + } -#endif - } - ); - +#elif (AMREX_SPACEDIM == 2) + for (int k=dkl; k<=depos_order+2-dku; k++) { + amrex::Real sdxi = 0.; + for (int i=dil; i<=depos_order+1-diu; i++) { + sdxi += wqx*(sx_old[i] - sx_new[i])*(sz_new[k] + 0.5*(sz_old[k] - sz_new[k])); + amrex::Gpu::Atomic::Add( &Jx_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdxi); + } + } + for (int k=dkl; k<=depos_order+2-dku; k++) { + for (int i=dil; i<=depos_order+2-diu; i++) { + const amrex::Real sdyj = wq*uyp[ip]*gaminv*invvol*((sz_new[k] + 0.5*(sz_old[k] - sz_new[k]))*sx_new[i] + + (0.5*sz_new[k] + 1./3.*(sz_old[k] - sz_new[k]))*(sx_old[i] - sx_new[i])); + amrex::Gpu::Atomic::Add( &Jy_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdyj); + } + } + for (int i=dil; i<=depos_order+2-diu; i++) { + amrex::Real sdzk = 0.; + for (int k=dkl; k<=depos_order+1-dku; k++) { + sdzk += wqz*(sz_old[k] - sz_new[k])*(sx_new[i] + 0.5*(sx_old[i] - sx_new[i])); + amrex::Gpu::Atomic::Add( &Jz_arr(lo.x+i_new-1+i, lo.y+k_new-1+k, 0), sdzk); + } + } +#endif + } + ); } #endif // CURRENTDEPOSITION_H_ diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package index 2038472a1..c43dc7e6c 100644 --- a/Source/Particles/Make.package +++ b/Source/Particles/Make.package @@ -9,6 +9,7 @@ CEXE_headers += MultiParticleContainer.H CEXE_headers += WarpXParticleContainer.H CEXE_headers += RigidInjectedParticleContainer.H CEXE_headers += PhysicalParticleContainer.H +CEXE_headers += ShapeFactors.H include $(WARPX_HOME)/Source/Particles/Pusher/Make.package include $(WARPX_HOME)/Source/Particles/Deposition/Make.package diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index a20f0035e..89f233b2c 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -532,17 +532,17 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, if (WarpX::nox == 1){ doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, + jz_arr, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } else if (WarpX::nox == 2){ doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, + jz_arr, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } else if (WarpX::nox == 3){ doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, - jz_arr, offset, np_to_depose, dt, dx, + jz_arr, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } } -- cgit v1.2.3 From 720f05cd0dcf4bc6abd775181846c22b070d4dd9 Mon Sep 17 00:00:00 2001 From: Dave Grote Date: Wed, 31 Jul 2019 11:50:41 -0700 Subject: Implemented RZ current density inverse volume scaling in c++ --- Source/Particles/WarpXParticleContainer.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 89f233b2c..d78837cf5 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -503,6 +503,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, Real* AMREX_RESTRICT yp = m_yp[thread_num].dataPtr() + offset; // Lower corner of tile box physical domain + // Note that this includes guard cells since it is after tilebox.ngrow const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; // xyzmin is built on pti.tilebox(), so it does // not include staggering, so the stagger_shift has to be done by hand. @@ -547,6 +548,12 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, } } +#ifdef WARPX_DIM_RZ + ApplyInverseVolumeScalingToCurrentDensity (jx_arr, jy_arr, jz_arr, + tbx, tby, tbz, + xyzmin[0], lo.x, dx[0], ngJ); +#endif + #ifndef AMREX_USE_GPU BL_PROFILE_VAR_START(blp_accumulate); // CPU, tiling: atomicAdd local_jx into jx -- cgit v1.2.3 From 2ddcc226e87e2d91b887bce09a1dc07b99685692 Mon Sep 17 00:00:00 2001 From: Dave Grote Date: Wed, 31 Jul 2019 13:05:01 -0700 Subject: Included ApplyInverseVolumeScalingToCurrentDensity --- Source/Particles/Deposition/CurrentDeposition.H | 83 ++++++++++++++++++++++++- Source/Particles/WarpXParticleContainer.cpp | 6 +- 2 files changed, 85 insertions(+), 4 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/Deposition/CurrentDeposition.H b/Source/Particles/Deposition/CurrentDeposition.H index 9a6978dbe..16092d513 100644 --- a/Source/Particles/Deposition/CurrentDeposition.H +++ b/Source/Particles/Deposition/CurrentDeposition.H @@ -40,7 +40,7 @@ void doDepositionShapeN(const amrex::Real * const xp, const amrex::Real dzi = 1.0/dx[2]; const amrex::Real dts2dx = 0.5*dt*dxi; const amrex::Real dts2dz = 0.5*dt*dzi; -#if (defined WARPX_DIM_2D) +#if (AMREX_SPACEDIM == 2) const amrex::Real invvol = dxi*dzi; #elif (defined WARPX_DIM_3D) const amrex::Real dyi = 1.0/dx[1]; @@ -333,4 +333,85 @@ void doEsirkepovDepositionShapeN (const amrex::Real * const xp, ); } +#ifdef WARPX_RZ +/* \brief Apply the inverse volume scaling on the current density + * \param J[rtz]_arr : The current density arrays + * /param tb[rtz] : Tileboxes of the J grids + * \param rmin : Minimum radius of the grid + * \param irmin : Grid cell index at rmin + * \param dr : Radial grid cell size + * \param ngJ : Number of guard cells + */ +void ApplyInverseVolumeScalingToCurrentDensity (amrex::Array4 const& Jr_arr, + amrex::Array4 const& Jt_arr, + amrex::Array4 const& Jz_arr, + const amrex::Box& tbr, + const amrex::Box& tbt, + const amrex::Box& tbz, + const amrex::Real rmin, + const int irmin, + const amrex::Real dr, + const long ngJ) +{ + // Rescale current in r-z mode since the inverse volume factor was not + // included in the current deposition. + amrex::ParallelFor(tbr, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + // Wrap the current density deposited in the guard cells around + // to the cells above the axis. + // Note that Jr(i==0) is at 1/2 dr. + if (rmin == 0. && 0 <= i && i < ngJ) { + Jr_arr(i,j,0) -= Jr_arr(-1-i,j,0); + } + // Apply the inverse volume scaling + // Since Jr is not cell centered in r, no need for distinction + // between on axis and off-axis factors + const amrex::Real r = std::abs(rmin + (i - irmin + 0.5)*dr); + Jr_arr(i,j,0) /= (2.*MathConst::pi*r); + }); + amrex::ParallelFor(tbt, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + // Wrap the current density deposited in the guard cells around + // to the cells above the axis. + // Jt is located on the boundary + if (rmin == 0. && 0 < i && i <= ngJ) { + Jt_arr(i,j,0) += Jt_arr(-i,j,0); + } + + // Apply the inverse volume scaling + // Jt is forced to zero on axis. + const amrex::Real r = std::abs(rmin + (i - irmin)*dr); + if (r == 0.) { + Jt_arr(i,j,0) = 0.; + } else { + Jt_arr(i,j,0) /= (2.*MathConst::pi*r); + } + }); + amrex::ParallelFor(tbz, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + // Wrap the current density deposited in the guard cells around + // to the cells above the axis. + // Jz is located on the boundary + if (rmin == 0. && 0 < i && i <= ngJ) { + Jz_arr(i,j,0) += Jz_arr(-i,j,0); + } + + // Apply the inverse volume scaling + const amrex::Real r = std::abs(rmin + (i - irmin)*dr); + if (r == 0.) { + // Verboncoeur JCP 164, 421-427 (2001) : corrected volume on axis + Jz_arr(i,j,0) /= (MathConst::pi*dr/3.); + // Alternative (when order in r is limited one, which is not implemented) + // Jz_arr(i,j,0) /= (MathConst::pi*dr/4.); + } else { + Jz_arr(i,j,0) /= (2.*MathConst::pi*r); + } + }); +} +#endif + + #endif // CURRENTDEPOSITION_H_ diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index d78837cf5..787c0e397 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -549,9 +549,9 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, } #ifdef WARPX_DIM_RZ - ApplyInverseVolumeScalingToCurrentDensity (jx_arr, jy_arr, jz_arr, - tbx, tby, tbz, - xyzmin[0], lo.x, dx[0], ngJ); + ApplyInverseVolumeScalingToCurrentDensity(jx_arr, jy_arr, jz_arr, + tbx, tby, tbz, + xyzmin[0], lo.x, dx[0], ngJ); #endif #ifndef AMREX_USE_GPU -- cgit v1.2.3 From f5207b1bb5bc9dd9b0f2b7d4170bcf67fc14c8a2 Mon Sep 17 00:00:00 2001 From: Dave Grote Date: Thu, 1 Aug 2019 17:12:29 -0700 Subject: Moved ApplyInverseVolumeScalingToCurrentDensity to WarpX and is called after all particles are deposited. --- Source/Evolve/WarpXEvolveEM.cpp | 7 +++ Source/FieldSolver/WarpXPushFieldsEM.cpp | 96 +++++++++++++++++++++++++++++ Source/Particles/WarpXParticleContainer.cpp | 8 +-- Source/WarpX.H | 7 +++ 4 files changed, 111 insertions(+), 7 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp index 32a4747db..57a0c44c0 100644 --- a/Source/Evolve/WarpXEvolveEM.cpp +++ b/Source/Evolve/WarpXEvolveEM.cpp @@ -481,6 +481,13 @@ WarpX::PushParticlesandDepose (int lev, Real cur_time) Efield_cax[lev][0].get(), Efield_cax[lev][1].get(), Efield_cax[lev][2].get(), Bfield_cax[lev][0].get(), Bfield_cax[lev][1].get(), Bfield_cax[lev][2].get(), cur_time, dt[lev]); +#ifdef WARPX_DIM_RZ + // This is called after all particles have deposited their current. + ApplyInverseVolumeScalingToCurrentDensity(current_fp[lev][0].get(), current_fp[lev][1].get(), current_fp[lev][2].get(), lev); + if (current_buf[lev][0].get()) { + ApplyInverseVolumeScalingToCurrentDensity(current_buf[lev][0].get(), current_buf[lev][1].get(), current_buf[lev][2].get(), lev-1); + } +#endif } void diff --git a/Source/FieldSolver/WarpXPushFieldsEM.cpp b/Source/FieldSolver/WarpXPushFieldsEM.cpp index 4fce4717b..892c08aa2 100644 --- a/Source/FieldSolver/WarpXPushFieldsEM.cpp +++ b/Source/FieldSolver/WarpXPushFieldsEM.cpp @@ -560,3 +560,99 @@ WarpX::EvolveF (int lev, PatchType patch_type, Real a_dt, DtType a_dt_type) } } +#ifdef WARPX_DIM_RZ +// This scales the current by the inverse volume and wraps around the depostion at negative radius. +// It is faster to apply this on the grid than to do it particle by particle. +// It is put here since there isn't another nice place for it. +void +WarpX::ApplyInverseVolumeScalingToCurrentDensity (MultiFab* Jx, MultiFab* Jy, MultiFab* Jz, int lev) +{ + const long ngJ = Jx->nGrow(); + const std::array& dx = WarpX::CellSize(lev); + const Real dr = dx[0]; + + Box tilebox; + + for ( MFIter mfi(*Jx, TilingIfNotGPU()); mfi.isValid(); ++mfi ) + { + + Array4 const& Jr_arr = Jx->array(mfi); + Array4 const& Jt_arr = Jy->array(mfi); + Array4 const& Jz_arr = Jz->array(mfi); + + tilebox = mfi.tilebox(); + Box tbr = convert(tilebox, WarpX::jx_nodal_flag); + Box tbt = convert(tilebox, WarpX::jy_nodal_flag); + Box tbz = convert(tilebox, WarpX::jz_nodal_flag); + + // Lower corner of tile box physical domain + // Note that this is done before the tilebox.grow so that + // these do not include the guard cells. + const std::array& xyzmin = WarpX::LowerCorner(tilebox, lev); + const Dim3 lo = lbound(tilebox); + const Real rmin = xyzmin[0]; + const int irmin = lo.x; + + tilebox.grow(ngJ); + + // Rescale current in r-z mode since the inverse volume factor was not + // included in the current deposition. + amrex::ParallelFor(tbr, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + // Wrap the current density deposited in the guard cells around + // to the cells above the axis. + // Note that Jr(i==0) is at 1/2 dr. + if (rmin == 0. && 0 <= i && i < ngJ) { + Jr_arr(i,j,0) -= Jr_arr(-1-i,j,0); + } + // Apply the inverse volume scaling + // Since Jr is not cell centered in r, no need for distinction + // between on axis and off-axis factors + const amrex::Real r = std::abs(rmin + (i - irmin + 0.5)*dr); + Jr_arr(i,j,0) /= (2.*MathConst::pi*r); + }); + amrex::ParallelFor(tbt, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + // Wrap the current density deposited in the guard cells around + // to the cells above the axis. + // Jt is located on the boundary + if (rmin == 0. && 0 < i && i <= ngJ) { + Jt_arr(i,j,0) += Jt_arr(-i,j,0); + } + + // Apply the inverse volume scaling + // Jt is forced to zero on axis. + const amrex::Real r = std::abs(rmin + (i - irmin)*dr); + if (r == 0.) { + Jt_arr(i,j,0) = 0.; + } else { + Jt_arr(i,j,0) /= (2.*MathConst::pi*r); + } + }); + amrex::ParallelFor(tbz, + [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + /* if (j == 5) std::cout << "Jz " << i << " " << Jz_arr(i,j,0) << "\n"; */ + // Wrap the current density deposited in the guard cells around + // to the cells above the axis. + // Jz is located on the boundary + if (rmin == 0. && 0 < i && i <= ngJ) { + Jz_arr(i,j,0) += Jz_arr(-i,j,0); + } + + // Apply the inverse volume scaling + const amrex::Real r = std::abs(rmin + (i - irmin)*dr); + if (r == 0.) { + // Verboncoeur JCP 164, 421-427 (2001) : corrected volume on axis + Jz_arr(i,j,0) /= (MathConst::pi*dr/3.); + // Alternative (when order in r is limited one, which is not implemented) + // Jz_arr(i,j,0) /= (MathConst::pi*dr/4.); + } else { + Jz_arr(i,j,0) /= (2.*MathConst::pi*r); + } + }); + } +} +#endif diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 787c0e397..0e76353a4 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -504,7 +504,7 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, // Lower corner of tile box physical domain // Note that this includes guard cells since it is after tilebox.ngrow - const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev);; + const std::array& xyzmin = WarpX::LowerCorner(tilebox, depos_lev); // xyzmin is built on pti.tilebox(), so it does // not include staggering, so the stagger_shift has to be done by hand. // Alternatively, we could define xyzminx from tbx (and the same for 3 @@ -548,12 +548,6 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, } } -#ifdef WARPX_DIM_RZ - ApplyInverseVolumeScalingToCurrentDensity(jx_arr, jy_arr, jz_arr, - tbx, tby, tbz, - xyzmin[0], lo.x, dx[0], ngJ); -#endif - #ifndef AMREX_USE_GPU BL_PROFILE_VAR_START(blp_accumulate); // CPU, tiling: atomicAdd local_jx into jx diff --git a/Source/WarpX.H b/Source/WarpX.H index a25eef9e4..dde2278d7 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -178,6 +178,13 @@ public: void EvolveE (int lev, PatchType patch_type, amrex::Real dt); void EvolveF (int lev, PatchType patch_type, amrex::Real dt, DtType dt_type); +#ifdef WARPX_DIM_RZ + void ApplyInverseVolumeScalingToCurrentDensity(amrex::MultiFab* Jx, + amrex::MultiFab* Jy, + amrex::MultiFab* Jz, + int lev); +#endif + void DampPML (); void DampPML (int lev); void DampPML (int lev, PatchType patch_type); -- cgit v1.2.3 From 9b813235d4f27e4b95e0448a9e43cb82b35c2c89 Mon Sep 17 00:00:00 2001 From: Dave Grote Date: Thu, 1 Aug 2019 17:15:20 -0700 Subject: Fixed SetPosition for RZ with CUDA --- Source/Particles/WarpXParticleContainer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 0e76353a4..1cbcee75e 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -47,7 +47,7 @@ WarpXParIter::SetPosition (const Cuda::ManagedDeviceVector& x, const Cuda: #ifdef WARPX_RZ auto& attribs = GetAttribs(); auto& theta = attribs[PIdx::theta]; - Cuda::DeviceVector r(x.size()); + Cuda::ManagedDeviceVector r(x.size()); for (unsigned int i=0 ; i < x.size() ; i++) { theta[i] = std::atan2(y[i], x[i]); r[i] = std::sqrt(x[i]*x[i] + y[i]*y[i]); -- cgit v1.2.3 From eb6f9caf956935425da5664eba07a86355f32761 Mon Sep 17 00:00:00 2001 From: Dave Grote Date: Fri, 2 Aug 2019 18:36:58 -0700 Subject: Removed obsolete warpx_current_deposition_rz_volume_scaling --- Source/FortranInterface/WarpX_picsar.F90 | 50 ----------------------------- Source/Particles/WarpXParticleContainer.cpp | 8 ----- 2 files changed, 58 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/FortranInterface/WarpX_picsar.F90 b/Source/FortranInterface/WarpX_picsar.F90 index 3b2fc97a7..14bd79ad4 100644 --- a/Source/FortranInterface/WarpX_picsar.F90 +++ b/Source/FortranInterface/WarpX_picsar.F90 @@ -10,7 +10,6 @@ #define WRPX_PXR_GETEB_ENERGY_CONSERVING geteb2drz_energy_conserving_generic #define WRPX_PXR_CURRENT_DEPOSITION depose_jrjtjz_generic_rz #define WRPX_PXR_RZ_VOLUME_SCALING_RHO apply_rz_volume_scaling_rho -#define WRPX_PXR_RZ_VOLUME_SCALING_J apply_rz_volume_scaling_j #else @@ -355,53 +354,4 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n end subroutine warpx_current_deposition - ! _________________________________________________________________ - !> - !> @brief - !> Applies the inverse volume scaling for RZ current deposition - !> - !> @details - !> The scaling is done for single mode only - ! - !> @param[inout] jx,jy,jz current arrays - !> @param[in] jx_ntot,jy_ntot,jz_ntot vectors with total number of - !> cells (including guard cells) along each axis for each current - !> @param[in] jx_ng,jy_ng,jz_ng vectors with number of guard cells along each - !> axis for each current - !> @param[in] rmin tile grid minimum radius - !> @param[in] dr radial space discretization steps - !> - subroutine warpx_current_deposition_rz_volume_scaling( & - jx,jx_ng,jx_ntot,jy,jy_ng,jy_ntot,jz,jz_ng,jz_ntot, & - rmin,dr) & - bind(C, name="warpx_current_deposition_rz_volume_scaling") - - integer, intent(in) :: jx_ntot(AMREX_SPACEDIM), jy_ntot(AMREX_SPACEDIM), jz_ntot(AMREX_SPACEDIM) - integer(c_long), intent(in) :: jx_ng, jy_ng, jz_ng - real(amrex_real), intent(IN OUT):: jx(*), jy(*), jz(*) - real(amrex_real), intent(IN) :: rmin, dr - -#ifdef WARPX_RZ - integer(c_long) :: type_rz_depose = 1 -#endif - ! Compute the number of valid cells and guard cells - integer(c_long) :: jx_nvalid(AMREX_SPACEDIM), jy_nvalid(AMREX_SPACEDIM), jz_nvalid(AMREX_SPACEDIM), & - jx_nguards(AMREX_SPACEDIM), jy_nguards(AMREX_SPACEDIM), jz_nguards(AMREX_SPACEDIM) - jx_nvalid = jx_ntot - 2*jx_ng - jy_nvalid = jy_ntot - 2*jy_ng - jz_nvalid = jz_ntot - 2*jz_ng - jx_nguards = jx_ng - jy_nguards = jy_ng - jz_nguards = jz_ng - -#ifdef WARPX_RZ - CALL WRPX_PXR_RZ_VOLUME_SCALING_J( & - jx,jx_nguards,jx_nvalid, & - jy,jy_nguards,jy_nvalid, & - jz,jz_nguards,jz_nvalid, & - rmin,dr,type_rz_depose) -#endif - - end subroutine warpx_current_deposition_rz_volume_scaling - end module warpx_to_pxr_module diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 1cbcee75e..f6c7afed5 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -394,14 +394,6 @@ WarpXParticleContainer::DepositCurrentFortran(WarpXParIter& pti, &WarpX::nox,&WarpX::noy,&WarpX::noz, &j_is_nodal, &lvect,&WarpX::current_deposition_algo); -#ifdef WARPX_RZ - // Rescale current in r-z mode - warpx_current_deposition_rz_volume_scaling( - jx_ptr, &ngJ, jxntot.getVect(), - jy_ptr, &ngJ, jyntot.getVect(), - jz_ptr, &ngJ, jzntot.getVect(), - &xyzmin[0], &dx[0]); -#endif BL_PROFILE_VAR_STOP(blp_pxr_cd); #ifndef AMREX_USE_GPU -- cgit v1.2.3 From ce63dce8b7d71c12cafe6309f6afc4c5cfc3d5c5 Mon Sep 17 00:00:00 2001 From: Dave Grote Date: Mon, 5 Aug 2019 09:43:57 -0700 Subject: Replaced WARPX_RZ with WARPX_DIM_RZ --- Source/Diagnostics/ParticleIO.cpp | 2 +- Source/Evolve/WarpXEvolveEM.cpp | 2 +- Source/FortranInterface/WarpX_f.H | 4 ++-- Source/FortranInterface/WarpX_picsar.F90 | 8 ++++---- Source/Make.WarpX | 1 - Source/Particles/PhysicalParticleContainer.cpp | 14 +++++++------- Source/Particles/Pusher/GetAndSetPosition.H | 6 +++--- Source/Particles/Pusher/UpdatePosition.H | 2 +- Source/Particles/WarpXParticleContainer.H | 2 +- Source/Particles/WarpXParticleContainer.cpp | 26 +++++++++++++------------- Source/Utils/WarpXAlgorithmSelection.cpp | 2 +- Source/WarpX.cpp | 16 ++++++++-------- 12 files changed, 42 insertions(+), 43 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Diagnostics/ParticleIO.cpp b/Source/Diagnostics/ParticleIO.cpp index f2a543ed5..f159e5302 100644 --- a/Source/Diagnostics/ParticleIO.cpp +++ b/Source/Diagnostics/ParticleIO.cpp @@ -98,7 +98,7 @@ MultiParticleContainer::WritePlotFile (const std::string& dir) const real_names.push_back("By"); real_names.push_back("Bz"); -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ real_names.push_back("theta"); #endif diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp index 57a0c44c0..e9bf98f81 100644 --- a/Source/Evolve/WarpXEvolveEM.cpp +++ b/Source/Evolve/WarpXEvolveEM.cpp @@ -498,7 +498,7 @@ WarpX::ComputeDt () if (maxwell_fdtd_solver_id == 0) { // CFL time step Yee solver -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ // Derived semi-analytically by R. Lehe deltat = cfl * 1./( std::sqrt((1+0.2105)/(dx[0]*dx[0]) + 1./(dx[1]*dx[1])) * PhysConst::c ); #else diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H index 689029059..07cfcb42e 100644 --- a/Source/FortranInterface/WarpX_f.H +++ b/Source/FortranInterface/WarpX_f.H @@ -62,7 +62,7 @@ #define WRPX_PUSH_LEAPFROG warpx_push_leapfrog_2d #define WRPX_PUSH_LEAPFROG_POSITIONS warpx_push_leapfrog_positions_2d -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ #define WRPX_COMPUTE_DIVE warpx_compute_dive_rz #else #define WRPX_COMPUTE_DIVE warpx_compute_dive_2d @@ -314,7 +314,7 @@ extern "C" const BL_FORT_FAB_ARG_ANYD(ey), const BL_FORT_FAB_ARG_ANYD(ez), const amrex::Real* dx -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ ,const amrex::Real* rmin #endif ); diff --git a/Source/FortranInterface/WarpX_picsar.F90 b/Source/FortranInterface/WarpX_picsar.F90 index e65c30e42..0625d40f9 100644 --- a/Source/FortranInterface/WarpX_picsar.F90 +++ b/Source/FortranInterface/WarpX_picsar.F90 @@ -4,7 +4,7 @@ #elif (AMREX_SPACEDIM == 2) -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ #define WRPX_PXR_CURRENT_DEPOSITION depose_jrjtjz_generic_rz #define WRPX_PXR_RZ_VOLUME_SCALING_RHO apply_rz_volume_scaling_rho @@ -138,7 +138,7 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n ! Dimension 2 #elif (AMREX_SPACEDIM==2) -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ logical(pxr_logical) :: l_2drz = .TRUE._c_long #else logical(pxr_logical) :: l_2drz = .FALSE._c_long @@ -175,7 +175,7 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n real(amrex_real), intent(IN OUT):: rho(*) real(amrex_real), intent(IN) :: rmin, dr -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ integer(c_long) :: type_rz_depose = 1 #endif @@ -184,7 +184,7 @@ subroutine warpx_charge_deposition(rho,np,xp,yp,zp,w,q,xmin,ymin,zmin,dx,dy,dz,n rho_nvalid = rho_ntot - 2*rho_ng rho_nguards = rho_ng -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ CALL WRPX_PXR_RZ_VOLUME_SCALING_RHO( & rho,rho_nguards,rho_nvalid, & rmin,dr,type_rz_depose) diff --git a/Source/Make.WarpX b/Source/Make.WarpX index d2551e059..069623b50 100644 --- a/Source/Make.WarpX +++ b/Source/Make.WarpX @@ -141,7 +141,6 @@ endif ifeq ($(USE_RZ),TRUE) USERSuffix := $(USERSuffix).RZ - DEFINES += -DWARPX_RZ endif ifeq ($(DO_ELECTROSTATIC),TRUE) diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 08f8a77b4..7d90857b4 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -148,7 +148,7 @@ PhysicalParticleContainer::AddGaussianBeam(Real x_m, Real y_m, Real z_m, npart /= 4; } for (long i = 0; i < npart; ++i) { -#if ( AMREX_SPACEDIM == 3 | WARPX_RZ) +#if ( AMREX_SPACEDIM == 3 | WARPX_DIM_RZ) Real weight = q_tot/npart/charge; Real x = distx(mt); Real y = disty(mt); @@ -269,7 +269,7 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) if (!part_realbox.ok()) part_realbox = geom.ProbDomain(); int num_ppc = plasma_injector->num_particles_per_cell; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ Real rmax = std::min(plasma_injector->xmax, part_realbox.hi(0)); #endif @@ -323,7 +323,7 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) Real density_min = plasma_injector->density_min; Real density_max = plasma_injector->density_max; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ bool radially_weighted = plasma_injector->radially_weighted; #endif @@ -496,11 +496,11 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) #endif // Save the x and y values to use in the insideBounds checks. - // This is needed with WARPX_RZ since x and y are modified. + // This is needed with WARPX_DIM_RZ since x and y are modified. Real xb = x; Real yb = y; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ // Replace the x and y, choosing the angle randomly. // These x and y are used to get the momentum and density Real theta = 2.*MathConst::pi*amrex::Random(); @@ -574,7 +574,7 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) // Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac)); Real weight = dens * scale_fac; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ if (radially_weighted) { weight *= 2.*MathConst::pi*xb; } else { @@ -603,7 +603,7 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) p.pos(1) = y; p.pos(2) = z; #elif (AMREX_SPACEDIM == 2) -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ pa[PIdx::theta][ip] = theta; #endif p.pos(0) = xb; diff --git a/Source/Particles/Pusher/GetAndSetPosition.H b/Source/Particles/Pusher/GetAndSetPosition.H index 42c61343e..3c74baeb2 100644 --- a/Source/Particles/Pusher/GetAndSetPosition.H +++ b/Source/Particles/Pusher/GetAndSetPosition.H @@ -5,7 +5,7 @@ #include #include -#ifndef WARPX_RZ +#ifndef WARPX_DIM_RZ /* \brief Extract the particle's coordinates from the ParticleType struct `p`, * and stores them in the variables `x`, `y`, `z`. */ @@ -42,7 +42,7 @@ void SetPosition( #endif } -# else // if WARPX_RZ is True +# elif defined WARPX_DIM_RZ /* \brief Extract the particle's coordinates from `theta` and the attributes * of the ParticleType struct `p` (which contains the radius), @@ -71,6 +71,6 @@ void SetCylindricalPositionFromCartesian( p.pos(1) = z; } -#endif // WARPX_RZ +#endif // WARPX_DIM_RZ #endif // WARPX_PARTICLES_PUSHER_GETANDSETPOSITION_H_ diff --git a/Source/Particles/Pusher/UpdatePosition.H b/Source/Particles/Pusher/UpdatePosition.H index 0a4f579f4..a9df63a30 100644 --- a/Source/Particles/Pusher/UpdatePosition.H +++ b/Source/Particles/Pusher/UpdatePosition.H @@ -20,7 +20,7 @@ void UpdatePosition( const amrex::Real inv_gamma = 1./std::sqrt(1. + (ux*ux + uy*uy + uz*uz)*inv_c2); // Update positions over one time step x += ux * inv_gamma * dt; -#if (AMREX_SPACEDIM == 3) || (defined WARPX_RZ) // RZ pushes particles in 3D +#if (AMREX_SPACEDIM == 3) || (defined WARPX_DIM_RZ) // RZ pushes particles in 3D y += uy * inv_gamma * dt; #endif z += uz * inv_gamma * dt; diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H index 7cf53260a..a609b4cb3 100644 --- a/Source/Particles/WarpXParticleContainer.H +++ b/Source/Particles/WarpXParticleContainer.H @@ -13,7 +13,7 @@ struct PIdx enum { // Particle Attributes stored in amrex::ParticleContainer's struct of array w = 0, // weight ux, uy, uz, Ex, Ey, Ez, Bx, By, Bz, -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ theta, // RZ needs all three position components #endif nattribs diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index f6c7afed5..c5c6afc19 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -27,7 +27,7 @@ void WarpXParIter::GetPosition (Cuda::ManagedDeviceVector& x, Cuda::ManagedDeviceVector& y, Cuda::ManagedDeviceVector& z) const { amrex::ParIter<0,0,PIdx::nattribs>::GetPosition(x, z); -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ const auto& attribs = GetAttribs(); const auto& theta = attribs[PIdx::theta]; y.resize(x.size()); @@ -44,7 +44,7 @@ WarpXParIter::GetPosition (Cuda::ManagedDeviceVector& x, Cuda::ManagedDevi void WarpXParIter::SetPosition (const Cuda::ManagedDeviceVector& x, const Cuda::ManagedDeviceVector& y, const Cuda::ManagedDeviceVector& z) { -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ auto& attribs = GetAttribs(); auto& theta = attribs[PIdx::theta]; Cuda::ManagedDeviceVector r(x.size()); @@ -80,7 +80,7 @@ WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies) particle_comps["Bx"] = PIdx::Bx; particle_comps["By"] = PIdx::By; particle_comps["Bz"] = PIdx::Bz; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ particle_comps["theta"] = PIdx::theta; #endif @@ -163,7 +163,7 @@ WarpXParticleContainer::AddOneParticle (ParticleTileType& particle_tile, p.pos(1) = y; p.pos(2) = z; #elif (AMREX_SPACEDIM == 2) -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ attribs[PIdx::theta] = std::atan2(y, x); x = std::sqrt(x*x + y*y); #endif @@ -209,7 +209,7 @@ WarpXParticleContainer::AddNParticles (int lev, std::size_t np = iend-ibegin; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ Vector theta(np); #endif @@ -228,7 +228,7 @@ WarpXParticleContainer::AddNParticles (int lev, p.pos(1) = y[i]; p.pos(2) = z[i]; #elif (AMREX_SPACEDIM == 2) -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ theta[i-ibegin] = std::atan2(y[i], x[i]); p.pos(0) = std::sqrt(x[i]*x[i] + y[i]*y[i]); #else @@ -265,7 +265,7 @@ WarpXParticleContainer::AddNParticles (int lev, for (int comp = PIdx::uz+1; comp < PIdx::nattribs; ++comp) { -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ if (comp == PIdx::theta) { particle_tile.push_back_real(comp, theta.front(), theta.back()); } @@ -610,7 +610,7 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, &ngRho, &ngRho, &ngRho, &WarpX::nox,&WarpX::noy,&WarpX::noz, &lvect, &WarpX::charge_deposition_algo); -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ warpx_charge_deposition_rz_volume_scaling( data_ptr, &ngRho, rholen.getVect(), &xyzmin[0], &dx[0]); @@ -670,7 +670,7 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, &ngRho, &ngRho, &ngRho, &WarpX::nox,&WarpX::noy,&WarpX::noz, &lvect, &WarpX::charge_deposition_algo); -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ warpx_charge_deposition_rz_volume_scaling( data_ptr, &ngRho, rholen.getVect(), &cxyzmin_tile[0], &cdx[0]); @@ -830,7 +830,7 @@ WarpXParticleContainer::GetChargeDensity (int lev, bool local) &dx[0], &dx[1], &dx[2], &nx, &ny, &nz, &nxg, &nyg, &nzg, &WarpX::nox,&WarpX::noy,&WarpX::noz, &lvect, &WarpX::charge_deposition_algo); -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ long ngRho = WarpX::nox; warpx_charge_deposition_rz_volume_scaling( data_ptr, &ngRho, rholen.getVect(), @@ -1015,7 +1015,7 @@ WarpXParticleContainer::PushX (int lev, Real dt) Real* AMREX_RESTRICT ux = attribs[PIdx::ux].dataPtr(); Real* AMREX_RESTRICT uy = attribs[PIdx::uy].dataPtr(); Real* AMREX_RESTRICT uz = attribs[PIdx::uz].dataPtr(); -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ Real* AMREX_RESTRICT theta = attribs[PIdx::theta].dataPtr(); #endif // Loop over the particles and update their position @@ -1023,12 +1023,12 @@ WarpXParticleContainer::PushX (int lev, Real dt) [=] AMREX_GPU_DEVICE (long i) { ParticleType& p = pstructs[i]; // Particle object that gets updated Real x, y, z; // Temporary variables -#ifndef WARPX_RZ +#ifndef WARPX_DIM_RZ GetPosition( x, y, z, p ); // Initialize x, y, z UpdatePosition( x, y, z, ux[i], uy[i], uz[i], dt); SetPosition( p, x, y, z ); // Update the object p #else - // For WARPX_RZ, the particles are still pushed in 3D Cartesian + // For WARPX_DIM_RZ, the particles are still pushed in 3D Cartesian GetCartesianPositionFromCylindrical( x, y, z, p, theta[i] ); UpdatePosition( x, y, z, ux[i], uy[i], uz[i], dt); SetCylindricalPositionFromCartesian( p, theta[i], x, y, z ); diff --git a/Source/Utils/WarpXAlgorithmSelection.cpp b/Source/Utils/WarpXAlgorithmSelection.cpp index 2c8038ccd..3aa4eb7b7 100644 --- a/Source/Utils/WarpXAlgorithmSelection.cpp +++ b/Source/Utils/WarpXAlgorithmSelection.cpp @@ -8,7 +8,7 @@ const std::map maxwell_solver_algo_to_int = { {"yee", MaxwellSolverAlgo::Yee }, -#ifndef WARPX_RZ // Not available in RZ +#ifndef WARPX_DIM_RZ // Not available in RZ {"ckc", MaxwellSolverAlgo::CKC }, #endif {"default", MaxwellSolverAlgo::Yee } diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 18252cb64..d45dd3a71 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -993,7 +993,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp, { Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2]; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ const Real rmin = GetInstance().Geom(0).ProbLo(0); #endif @@ -1012,7 +1012,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { warpx_computedivb(i, j, k, dcomp, divBfab, Bxfab, Byfab, Bzfab, dxinv, dyinv, dzinv -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ ,rmin #endif ); @@ -1027,7 +1027,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp, { Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2]; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ const Real rmin = GetInstance().Geom(0).ProbLo(0); #endif @@ -1046,7 +1046,7 @@ WarpX::ComputeDivB (MultiFab& divB, int dcomp, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { warpx_computedivb(i, j, k, dcomp, divBfab, Bxfab, Byfab, Bzfab, dxinv, dyinv, dzinv -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ ,rmin #endif ); @@ -1061,7 +1061,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp, { Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2]; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ const Real rmin = GetInstance().Geom(0).ProbLo(0); #endif @@ -1080,7 +1080,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { warpx_computedive(i, j, k, dcomp, divEfab, Exfab, Eyfab, Ezfab, dxinv, dyinv, dzinv -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ ,rmin #endif ); @@ -1095,7 +1095,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp, { Real dxinv = 1./dx[0], dyinv = 1./dx[1], dzinv = 1./dx[2]; -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ const Real rmin = GetInstance().Geom(0).ProbLo(0); #endif @@ -1114,7 +1114,7 @@ WarpX::ComputeDivE (MultiFab& divE, int dcomp, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { warpx_computedive(i, j, k, dcomp, divEfab, Exfab, Eyfab, Ezfab, dxinv, dyinv, dzinv -#ifdef WARPX_RZ +#ifdef WARPX_DIM_RZ ,rmin #endif ); -- cgit v1.2.3 From 7b72953b793af38045e2508302594c7fe83a42e9 Mon Sep 17 00:00:00 2001 From: MaxThevenet Date: Thu, 8 Aug 2019 16:51:22 -0700 Subject: fix current deposition in buffers --- Source/Particles/WarpXParticleContainer.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'Source/Particles/WarpXParticleContainer.cpp') diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index c5c6afc19..8e4adc528 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -506,35 +506,35 @@ WarpXParticleContainer::DepositCurrent(WarpXParIter& pti, if (WarpX::current_deposition_algo == CurrentDepositionAlgo::Esirkepov) { if (WarpX::nox == 1){ - doEsirkepovDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + doEsirkepovDepositionShapeN<1>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo, q); } else if (WarpX::nox == 2){ - doEsirkepovDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + doEsirkepovDepositionShapeN<2>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo, q); } else if (WarpX::nox == 3){ - doEsirkepovDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + doEsirkepovDepositionShapeN<3>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo, q); } } else { if (WarpX::nox == 1){ - doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + doDepositionShapeN<1>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } else if (WarpX::nox == 2){ - doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + doDepositionShapeN<2>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } else if (WarpX::nox == 3){ - doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr(), uxp.dataPtr(), - uyp.dataPtr(), uzp.dataPtr(), jx_arr, jy_arr, + doDepositionShapeN<3>(xp, yp, zp, wp.dataPtr() + offset, uxp.dataPtr() + offset, + uyp.dataPtr() + offset, uzp.dataPtr() + offset, jx_arr, jy_arr, jz_arr, np_to_depose, dt, dx, xyzmin, lo, stagger_shift, q); } -- cgit v1.2.3