diff options
-rw-r--r-- | Docs/source/building/cori.rst | 3 | ||||
-rw-r--r-- | Docs/source/building/summit.rst | 28 | ||||
-rw-r--r-- | Docs/source/running_cpp/parallelization.rst | 24 | ||||
-rw-r--r-- | Docs/source/running_cpp/platforms.rst | 69 | ||||
-rw-r--r-- | Docs/source/running_cpp/running_cpp.rst | 1 | ||||
-rwxr-xr-x | Examples/Tests/gpu_test/script_profiling.sh (renamed from Examples/Tests/gpu_test/script.sh) | 0 | ||||
-rw-r--r-- | Examples/batchScripts/batch_cori.sh | 33 | ||||
-rw-r--r-- | Examples/batchScripts/batch_summit.sh | 16 | ||||
-rw-r--r-- | Source/Diagnostics/ParticleIO.cpp | 12 | ||||
-rw-r--r-- | Source/Evolve/WarpXEvolveEM.cpp | 16 | ||||
-rw-r--r-- | Source/Particles/MultiParticleContainer.cpp | 20 | ||||
-rw-r--r-- | Source/Particles/PhysicalParticleContainer.cpp | 110 | ||||
-rw-r--r-- | Source/Particles/RigidInjectedParticleContainer.cpp | 43 | ||||
-rw-r--r-- | Source/Particles/WarpXParticleContainer.H | 18 | ||||
-rw-r--r-- | Source/Particles/WarpXParticleContainer.cpp | 23 | ||||
-rw-r--r-- | Tools/compute_domain.py | 114 |
16 files changed, 330 insertions, 200 deletions
diff --git a/Docs/source/building/cori.rst b/Docs/source/building/cori.rst index 2330e2153..d89f3f17b 100644 --- a/Docs/source/building/cori.rst +++ b/Docs/source/building/cori.rst @@ -51,6 +51,9 @@ In order to compile for the **Knight's Landing (KNL) architecture**: module swap PrgEnv-intel PrgEnv-gnu make -j 16 COMP=gnu +See :doc:`../running_cpp/platforms` for more information on how to run +WarpX on Cori. + GPU Build --------- diff --git a/Docs/source/building/summit.rst b/Docs/source/building/summit.rst index 88588eb72..424cb68f5 100644 --- a/Docs/source/building/summit.rst +++ b/Docs/source/building/summit.rst @@ -23,29 +23,5 @@ Then, ``cd`` into the directory ``WarpX`` and use the following set of commands module load cuda make -j 4 USE_GPU=TRUE COMP=pgi -In order to submit a simulation, create a file `submission_script` with -the following text (replace bracketed variables): - -:: - - #!/bin/bash - #BSUB -J <jobName> - #BSUB -W <requestedTime> - #BSUB -nnodes <numberOfNodes> - #BSUB -P <accountNumber> - - module load pgi - module load cuda - - omp=1 - export OMP_NUM_THREADS=${omp} - num_nodes=$(( $(printf '%s\n' ${LSB_HOSTS} | sort -u | wc -l) - 1 )) - - jsrun -n ${num_nodes} -a 6 -g 6 -c 6 --bind=packed:${omp} --smpiargs="-gpu" <warpxExecutable> <inputScript> - - -Then run - -:: - - bsub submission_script +See :doc:`../running_cpp/platforms` for more information on how to run +WarpX on Summit. diff --git a/Docs/source/running_cpp/parallelization.rst b/Docs/source/running_cpp/parallelization.rst index 440c17235..a8c89f340 100644 --- a/Docs/source/running_cpp/parallelization.rst +++ b/Docs/source/running_cpp/parallelization.rst @@ -61,22 +61,8 @@ and MPI decomposition and computer architecture used for the run: * Amount of high-bandwidth memory. -Below is a list of experience-based parameters -that were observed to give good performance on given supercomputers. - -Rule of thumb for 3D runs on NERSC Cori KNL -------------------------------------------- - -For a 3D simulation with a few (1-4) particles per cell using FDTD Maxwell -solver on Cori KNL for a well load-balanced problem (in our case laser -wakefield acceleration simulation in a boosted frame in the quasi-linear -regime), the following set of parameters provided good performance: - -* ``amr.max_grid_size=64`` and ``amr.blocking_factor=64`` so that the size of - each grid is fixed to ``64**3`` (we are not using load-balancing here). - -* **8 MPI ranks per KNL node**, with ``OMP_NUM_THREADS=8`` (that is 64 threads - per KNL node, i.e. 1 thread per physical core, and 4 cores left to the - system). - -* **2 grids per MPI**, *i.e.*, 16 grids per KNL node. +Because these parameters put additional contraints on the domain size for a +simulation, it can be cumbersome to calculate the number of cells and the +physical size of the computational domain for a given resolution. This +:download:`Python script<../../../Tools/compute_domain.py>` does it +automatically. diff --git a/Docs/source/running_cpp/platforms.rst b/Docs/source/running_cpp/platforms.rst new file mode 100644 index 000000000..fc4e2b1fb --- /dev/null +++ b/Docs/source/running_cpp/platforms.rst @@ -0,0 +1,69 @@ +Running on specific platforms +============================= + +Running on Cori KNL at NERSC +---------------------------- + +The batch script below can be used to run a WarpX simulation on 2 KNL nodes on +the supercomputer Cori at NERSC. Replace descriptions between chevrons ``<>`` +by relevant values, for instance ``<job name>`` could be ``laserWakefield``. + +.. literalinclude:: ../../../Examples/batchScripts/batch_cori.sh + :language: bash + +To run a simulation, copy the lines above to a file ``batch_cori.sh`` and +run +:: + + sbatch batch_cori.sh + +to submit the job. + +For a 3D simulation with a few (1-4) particles per cell using FDTD Maxwell +solver on Cori KNL for a well load-balanced problem (in our case laser +wakefield acceleration simulation in a boosted frame in the quasi-linear +regime), the following set of parameters provided good performance: + +* ``amr.max_grid_size=64`` and ``amr.blocking_factor=64`` so that the size of + each grid is fixed to ``64**3`` (we are not using load-balancing here). + +* **8 MPI ranks per KNL node**, with ``OMP_NUM_THREADS=8`` (that is 64 threads + per KNL node, i.e. 1 thread per physical core, and 4 cores left to the + system). + +* **2 grids per MPI**, *i.e.*, 16 grids per KNL node. + +Running on Summit at OLCF +------------------------- + +The batch script below can be used to run a WarpX simulation on 2 nodes on +the supercomputer Summit at OLCF. Replace descriptions between chevrons ``<>`` +by relevalt values, for instance ``<input file>`` could be +``plasma_mirror_inputs``. Note that the only option so far is to run with one +MPI rank per GPU. + +.. literalinclude:: ../../../Examples/batchScripts/batch_summit.sh + :language: bash + +To run a simulation, copy the lines above to a file ``batch_summit.sh`` and +run +:: + + bsub batch_summit.sh + +to submit the job. + +For a 3D simulation with a few (1-4) particles per cell using FDTD Maxwell +solver on Summit for a well load-balanced problem (in our case laser +wakefield acceleration simulation in a boosted frame in the quasi-linear +regime), the following set of parameters provided good performance: + +* ``amr.max_grid_size=256`` and ``amr.blocking_factor=128``. + +* **One MPI rank per GPU** (e.g., 6 MPI ranks for the 6 GPUs on each Summit + node) + +* **Two `128x128x128` grids per GPU**, or **one `128x128x256` grid per GPU**. + +A batch script with more options regarding profiling on Summit can be found at +:download:`Summit batch script<../../../Examples/Tests/gpu_test/script_profiling.sh>`
\ No newline at end of file diff --git a/Docs/source/running_cpp/running_cpp.rst b/Docs/source/running_cpp/running_cpp.rst index 7d82e55f1..31cecb12f 100644 --- a/Docs/source/running_cpp/running_cpp.rst +++ b/Docs/source/running_cpp/running_cpp.rst @@ -9,3 +9,4 @@ Running WarpX as an executable parameters profiling parallelization + platforms
\ No newline at end of file diff --git a/Examples/Tests/gpu_test/script.sh b/Examples/Tests/gpu_test/script_profiling.sh index cd6b0eadd..cd6b0eadd 100755 --- a/Examples/Tests/gpu_test/script.sh +++ b/Examples/Tests/gpu_test/script_profiling.sh diff --git a/Examples/batchScripts/batch_cori.sh b/Examples/batchScripts/batch_cori.sh new file mode 100644 index 000000000..e6cd5e1ef --- /dev/null +++ b/Examples/batchScripts/batch_cori.sh @@ -0,0 +1,33 @@ +#!/bin/bash -l + +#SBATCH -N 2 +#SBATCH -t 01:00:00 +#SBATCH -q regular +#SBATCH -C knl +#SBATCH -S 4 +#SBATCH -J <job name> +#SBATCH -A <allocation ID> +#SBATCH -e error.txt +#SBATCH -o output.txt + +export OMP_PLACES=threads +export OMP_PROC_BIND=spread + +# KNLs have 4 hyperthreads max +export CORI_MAX_HYPETHREAD_LEVEL=4 +# We use 64 cores out of the 68 available on Cori KNL, +# and leave 4 to the system (see "#SBATCH -S 4" above). +export CORI_NCORES_PER_NODE=64 + +# Typically use 8 MPI ranks per node without hyperthreading, +# i.e., OMP_NUM_THREADS=8 +export WARPX_NMPI_PER_NODE=8 +export WARPX_HYPERTHREAD_LEVEL=1 + +# Compute OMP_NUM_THREADS and the thread count (-c option) +export CORI_NHYPERTHREADS_MAX=$(( ${CORI_MAX_HYPETHREAD_LEVEL} * ${CORI_NCORES_PER_NODE} )) +export WARPX_NTHREADS_PER_NODE=$(( ${WARPX_HYPERTHREAD_LEVEL} * ${CORI_NCORES_PER_NODE} )) +export OMP_NUM_THREADS=$(( ${WARPX_NTHREADS_PER_NODE} / ${WARPX_NMPI_PER_NODE} )) +export WARPX_THREAD_COUNT=$(( ${CORI_NHYPERTHREADS_MAX} / ${WARPX_NMPI_PER_NODE} )) + +srun --cpu_bind=cores -n $(( ${SLURM_JOB_NUM_NODES} * ${WARPX_NMPI_PER_NODE} )) -c ${WARPX_THREAD_COUNT} <path/to/executable> <input file> diff --git a/Examples/batchScripts/batch_summit.sh b/Examples/batchScripts/batch_summit.sh new file mode 100644 index 000000000..002660b91 --- /dev/null +++ b/Examples/batchScripts/batch_summit.sh @@ -0,0 +1,16 @@ +#!/bin/bash +#BSUB -P <allocation ID> +#BSUB -W 00:10 +#BSUB -nnodes 2 +#BSUB -J WarpX +#BSUB -o WarpXo.%J +#BSUB -e WarpXe.%J + +module load pgi +module load cuda + +omp=1 +export OMP_NUM_THREADS=${omp} + +num_nodes=$(( $(printf '%s\n' ${LSB_HOSTS} | sort -u | wc -l) - 1 )) +jsrun -n ${num_nodes} -a 6 -g 6 -c 6 --bind=packed:${omp} <path/to/executable> <input file> > output.txt diff --git a/Source/Diagnostics/ParticleIO.cpp b/Source/Diagnostics/ParticleIO.cpp index 743df4ce6..8bfa45a59 100644 --- a/Source/Diagnostics/ParticleIO.cpp +++ b/Source/Diagnostics/ParticleIO.cpp @@ -103,17 +103,6 @@ MultiParticleContainer::WritePlotFile (const std::string& dir) const real_names.push_back("theta"); #endif - if (WarpX::do_boosted_frame_diagnostic && pc->DoBoostedFrameDiags()) - { - real_names.push_back("xold"); - real_names.push_back("yold"); - real_names.push_back("zold"); - - real_names.push_back("uxold"); - real_names.push_back("uyold"); - real_names.push_back("uzold"); - } - if(pc->do_field_ionization){ int_names.push_back("ionization_level"); // int_flags specifies, for each integer attribs, whether it is @@ -122,6 +111,7 @@ MultiParticleContainer::WritePlotFile (const std::string& dir) const // when ionization is on. int_flags.resize(1, 1); } + // Convert momentum to SI pc->ConvertUnits(ConvertDirection::WarpX_to_SI); // real_names contains a list of all particle attributes. diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp index 5313fb418..a831b7dab 100644 --- a/Source/Evolve/WarpXEvolveEM.cpp +++ b/Source/Evolve/WarpXEvolveEM.cpp @@ -140,6 +140,14 @@ WarpX::EvolveEM (int numsteps) bool do_insitu = ((step+1) >= insitu_start) && (insitu_int > 0) && ((step+1) % insitu_int == 0); + if (do_boosted_frame_diagnostic) { + std::unique_ptr<MultiFab> cell_centered_data = nullptr; + if (WarpX::do_boosted_frame_fields) { + cell_centered_data = GetCellCenteredData(); + } + myBFD->writeLabFrameData(cell_centered_data.get(), *mypc, geom[0], cur_time, dt[0]); + } + bool move_j = is_synchronized || to_make_plot || do_insitu; // If is_synchronized we need to shift j too so that next step we can evolve E by dt/2. // We might need to move j because we are going to make a plotfile. @@ -173,14 +181,6 @@ WarpX::EvolveEM (int numsteps) t_new[i] = cur_time; } - if (do_boosted_frame_diagnostic) { - std::unique_ptr<MultiFab> cell_centered_data = nullptr; - if (WarpX::do_boosted_frame_fields) { - cell_centered_data = GetCellCenteredData(); - } - myBFD->writeLabFrameData(cell_centered_data.get(), *mypc, geom[0], cur_time, dt[0]); - } - // slice gen // if (to_make_plot || do_insitu || to_make_slice_plot) { diff --git a/Source/Particles/MultiParticleContainer.cpp b/Source/Particles/MultiParticleContainer.cpp index 84108b5dc..7803bdae1 100644 --- a/Source/Particles/MultiParticleContainer.cpp +++ b/Source/Particles/MultiParticleContainer.cpp @@ -42,26 +42,6 @@ MultiParticleContainer::MultiParticleContainer (AmrCore* amr_core) nspecies_boosted_frame_diags += 1; } } - - if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) - { - for (int i = 0; i < nspecies_boosted_frame_diags; ++i) - { - int is = map_species_boosted_frame_diags[i]; - allcontainers[is]->AddRealComp("xold"); - allcontainers[is]->AddRealComp("yold"); - allcontainers[is]->AddRealComp("zold"); - allcontainers[is]->AddRealComp("uxold"); - allcontainers[is]->AddRealComp("uyold"); - allcontainers[is]->AddRealComp("uzold"); - } - pc_tmp->AddRealComp("xold"); - pc_tmp->AddRealComp("yold"); - pc_tmp->AddRealComp("zold"); - pc_tmp->AddRealComp("uxold"); - pc_tmp->AddRealComp("uyold"); - pc_tmp->AddRealComp("uzold"); - } } void diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 9a3b6a565..1513297a1 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -212,17 +212,8 @@ PhysicalParticleContainer::CheckAndAddParticle(Real x, Real y, Real z, { // need to create old values auto& particle_tile = DefineAndReturnParticleTile(0, 0, 0); - if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) - { - particle_tile.push_back_real(particle_comps["xold"], x); - particle_tile.push_back_real(particle_comps["yold"], y); - particle_tile.push_back_real(particle_comps["zold"], z); - - particle_tile.push_back_real(particle_comps["uxold"], u[0]); - particle_tile.push_back_real(particle_comps["uyold"], u[1]); - particle_tile.push_back_real(particle_comps["uzold"], u[2]); - } } + // add particle AddOneParticle(0, 0, 0, x, y, z, attribs); } @@ -301,11 +292,13 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) #ifdef _OPENMP // First touch all tiles in the map in serial for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) { - const int grid_id = mfi.index(); - const int tile_id = mfi.LocalTileIndex(); - GetParticles(lev)[std::make_pair(grid_id, tile_id)]; + auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex()); + GetParticles(lev)[index]; + tmp_particle_data.resize(finestLevel()+1); + // Create map entry if not there + tmp_particle_data[lev][index]; if ( (NumRuntimeRealComps()>0) || (NumRuntimeIntComps()>0) ) { - DefineAndReturnParticleTile(lev, grid_id, tile_id); + DefineAndReturnParticleTile(lev, mfi.index(), mfi.LocalTileIndex()); } } #endif @@ -406,14 +399,16 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) // then how many new particles will be injected is not that simple // We have to shift fine_injection_box because overlap_box has been shifted. Box fine_overlap_box = overlap_box & amrex::shift(fine_injection_box,shifted); - max_new_particles += fine_overlap_box.numPts() * num_ppc - * (AMREX_D_TERM(rrfac,*rrfac,*rrfac)-1); - for (int icell = 0, ncells = overlap_box.numPts(); icell < ncells; ++icell) { - IntVect iv = overlap_box.atOffset(icell); - int r = (fine_overlap_box.contains(iv)) ? AMREX_D_TERM(rrfac,*rrfac,*rrfac) : 1; - for (int ipart = 0; ipart < r; ++ipart) { - cellid_v.push_back(icell); - cellid_v.push_back(ipart); + if (fine_overlap_box.ok()) { + max_new_particles += fine_overlap_box.numPts() * num_ppc + * (AMREX_D_TERM(rrfac,*rrfac,*rrfac)-1); + for (int icell = 0, ncells = overlap_box.numPts(); icell < ncells; ++icell) { + IntVect iv = overlap_box.atOffset(icell); + int r = (fine_overlap_box.contains(iv)) ? AMREX_D_TERM(rrfac,*rrfac,*rrfac) : 1; + for (int ipart = 0; ipart < r; ++ipart) { + cellid_v.push_back(icell); + cellid_v.push_back(ipart); + } } } } @@ -431,12 +426,10 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) const int cpuid = ParallelDescriptor::MyProc(); auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)]; - bool do_boosted = false; + if ( (NumRuntimeRealComps()>0) || (NumRuntimeIntComps()>0) ) { DefineAndReturnParticleTile(lev, grid_id, tile_id); } - do_boosted = WarpX::do_boosted_frame_diagnostic - && do_boosted_frame_diags; auto old_size = particle_tile.GetArrayOfStructs().size(); auto new_size = old_size + max_new_particles; @@ -448,15 +441,7 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) for (int ia = 0; ia < PIdx::nattribs; ++ia) { pa[ia] = soa.GetRealData(ia).data() + old_size; } - GpuArray<Real*,6> pb; - if (do_boosted) { - pb[0] = soa.GetRealData(particle_comps[ "xold"]).data() + old_size; - pb[1] = soa.GetRealData(particle_comps[ "yold"]).data() + old_size; - pb[2] = soa.GetRealData(particle_comps[ "zold"]).data() + old_size; - pb[3] = soa.GetRealData(particle_comps["uxold"]).data() + old_size; - pb[4] = soa.GetRealData(particle_comps["uyold"]).data() + old_size; - pb[5] = soa.GetRealData(particle_comps["uzold"]).data() + old_size; - } + int* pi; if (do_field_ionization) { pi = soa.GetIntData(particle_icomps["ionization_level"]).data() + old_size; @@ -630,15 +615,6 @@ PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) pa[PIdx::uy][ip] = u.y; pa[PIdx::uz][ip] = u.z; - if (do_boosted) { - pb[0][ip] = x; - pb[1][ip] = y; - pb[2][ip] = z; - pb[3][ip] = u.x; - pb[4][ip] = u.y; - pb[5][ip] = u.z; - } - #if (AMREX_SPACEDIM == 3) p.pos(0) = x; p.pos(1) = y; @@ -990,6 +966,19 @@ PhysicalParticleContainer::Evolve (int lev, bool has_buffer = cEx || cjx; + if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) + { + for (WarpXParIter pti(*this, lev); pti.isValid(); ++pti) + { + const auto np = pti.numParticles(); + const auto lev = pti.GetLevel(); + const auto index = pti.GetPairIndex(); + tmp_particle_data.resize(finestLevel()+1); + for (int i = 0; i < TmpIdx::nattribs; ++i) + tmp_particle_data[lev][index][i].resize(np); + } + } + #ifdef _OPENMP #pragma omp parallel #endif @@ -1715,22 +1704,21 @@ PhysicalParticleContainer::PushP (int lev, Real dt, void PhysicalParticleContainer::copy_attribs(WarpXParIter& pti,const Real* xp, const Real* yp, const Real* zp) { - auto& attribs = pti.GetAttribs(); - Real* AMREX_RESTRICT uxp = attribs[PIdx::ux].dataPtr(); Real* AMREX_RESTRICT uyp = attribs[PIdx::uy].dataPtr(); Real* AMREX_RESTRICT uzp = attribs[PIdx::uz].dataPtr(); - Real* AMREX_RESTRICT xpold = pti.GetAttribs(particle_comps["xold"]).dataPtr(); - Real* AMREX_RESTRICT ypold = pti.GetAttribs(particle_comps["yold"]).dataPtr(); - Real* AMREX_RESTRICT zpold = pti.GetAttribs(particle_comps["zold"]).dataPtr(); - Real* AMREX_RESTRICT uxpold = pti.GetAttribs(particle_comps["uxold"]).dataPtr(); - Real* AMREX_RESTRICT uypold = pti.GetAttribs(particle_comps["uyold"]).dataPtr(); - Real* AMREX_RESTRICT uzpold = pti.GetAttribs(particle_comps["uzold"]).dataPtr(); - - const long np = pti.numParticles(); - + const auto np = pti.numParticles(); + const auto lev = pti.GetLevel(); + const auto index = pti.GetPairIndex(); + Real* AMREX_RESTRICT xpold = tmp_particle_data[lev][index][TmpIdx::xold ].dataPtr(); + Real* AMREX_RESTRICT ypold = tmp_particle_data[lev][index][TmpIdx::yold ].dataPtr(); + Real* AMREX_RESTRICT zpold = tmp_particle_data[lev][index][TmpIdx::zold ].dataPtr(); + Real* AMREX_RESTRICT uxpold = tmp_particle_data[lev][index][TmpIdx::uxold].dataPtr(); + Real* AMREX_RESTRICT uypold = tmp_particle_data[lev][index][TmpIdx::uyold].dataPtr(); + Real* AMREX_RESTRICT uzpold = tmp_particle_data[lev][index][TmpIdx::uzold].dataPtr(); + ParallelFor( np, [=] AMREX_GPU_DEVICE (long i) { xpold[i]=xp[i]; @@ -1815,15 +1803,15 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real auto& uyp_new = attribs[PIdx::uy ]; auto& uzp_new = attribs[PIdx::uz ]; - auto& xp_old = pti.GetAttribs(particle_comps["xold"]); - auto& yp_old = pti.GetAttribs(particle_comps["yold"]); - auto& zp_old = pti.GetAttribs(particle_comps["zold"]); - auto& uxp_old = pti.GetAttribs(particle_comps["uxold"]); - auto& uyp_old = pti.GetAttribs(particle_comps["uyold"]); - auto& uzp_old = pti.GetAttribs(particle_comps["uzold"]); + auto& xp_old = tmp_particle_data[lev][index][TmpIdx::xold]; + auto& yp_old = tmp_particle_data[lev][index][TmpIdx::yold]; + auto& zp_old = tmp_particle_data[lev][index][TmpIdx::zold]; + auto& uxp_old = tmp_particle_data[lev][index][TmpIdx::uxold]; + auto& uyp_old = tmp_particle_data[lev][index][TmpIdx::uyold]; + auto& uzp_old = tmp_particle_data[lev][index][TmpIdx::uzold]; const long np = pti.numParticles(); - + Real uzfrm = -WarpX::gamma_boost*WarpX::beta_boost*PhysConst::c; Real inv_c2 = 1.0/PhysConst::c/PhysConst::c; diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp index a87168a75..4893b3294 100644 --- a/Source/Particles/RigidInjectedParticleContainer.cpp +++ b/Source/Particles/RigidInjectedParticleContainer.cpp @@ -239,15 +239,13 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti, Real* const AMREX_RESTRICT Bzp = attribs[PIdx::Bz].dataPtr(); if (!done_injecting_lev) { - if (!(WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)) { - // If the old values are not already saved, create copies here. - xp_save = xp; - yp_save = yp; - zp_save = zp; - uxp_save = uxp; - uyp_save = uyp; - uzp_save = uzp; - } + // If the old values are not already saved, create copies here. + xp_save = xp; + yp_save = yp; + zp_save = zp; + uxp_save = uxp; + uyp_save = uyp; + uzp_save = uzp; // Scale the fields of particles about to cross the injection plane. // This only approximates what should be happening. The particles @@ -275,27 +273,12 @@ RigidInjectedParticleContainer::PushPX(WarpXParIter& pti, if (!done_injecting_lev) { - Real* AMREX_RESTRICT x_save; - Real* AMREX_RESTRICT y_save; - Real* AMREX_RESTRICT z_save; - Real* AMREX_RESTRICT ux_save; - Real* AMREX_RESTRICT uy_save; - Real* AMREX_RESTRICT uz_save; - if (!(WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags)) { - x_save = xp_save.dataPtr(); - y_save = yp_save.dataPtr(); - z_save = zp_save.dataPtr(); - ux_save = uxp_save.dataPtr(); - uy_save = uyp_save.dataPtr(); - uz_save = uzp_save.dataPtr(); - } else { - x_save = pti.GetAttribs(particle_comps["xold"]).dataPtr(); - y_save = pti.GetAttribs(particle_comps["yold"]).dataPtr(); - z_save = pti.GetAttribs(particle_comps["zold"]).dataPtr(); - ux_save = pti.GetAttribs(particle_comps["uxold"]).dataPtr(); - uy_save = pti.GetAttribs(particle_comps["uyold"]).dataPtr(); - uz_save = pti.GetAttribs(particle_comps["uzold"]).dataPtr(); - } + Real* AMREX_RESTRICT x_save = xp_save.dataPtr(); + Real* AMREX_RESTRICT y_save = yp_save.dataPtr(); + Real* AMREX_RESTRICT z_save = zp_save.dataPtr(); + Real* AMREX_RESTRICT ux_save = uxp_save.dataPtr(); + Real* AMREX_RESTRICT uy_save = uyp_save.dataPtr(); + Real* AMREX_RESTRICT uz_save = uzp_save.dataPtr(); // Undo the push for particles not injected yet. // The zp are advanced a fixed amount. diff --git a/Source/Particles/WarpXParticleContainer.H b/Source/Particles/WarpXParticleContainer.H index 1f144f628..bc46a116b 100644 --- a/Source/Particles/WarpXParticleContainer.H +++ b/Source/Particles/WarpXParticleContainer.H @@ -29,6 +29,15 @@ struct DiagIdx }; }; +struct TmpIdx +{ + enum { + xold = 0, + yold, zold, uxold, uyold, uzold, + nattribs + }; +}; + namespace ParticleStringNames { const std::map<std::string, int> to_index = { @@ -326,7 +335,10 @@ protected: amrex::Vector<amrex::FArrayBox> local_jy; amrex::Vector<amrex::FArrayBox> local_jz; - amrex::Vector<amrex::Cuda::ManagedDeviceVector<amrex::Real> > m_xp, m_yp, m_zp, m_giv; + using DataContainer = amrex::Gpu::ManagedDeviceVector<amrex::Real>; + using PairIndex = std::pair<int, int>; + + amrex::Vector<DataContainer> m_xp, m_yp, m_zp, m_giv; // Whether to dump particle quantities. // If true, particle position is always dumped. @@ -336,7 +348,9 @@ protected: amrex::Vector<int> plot_flags; // list of names of attributes to dump. amrex::Vector<std::string> plot_vars; - + + amrex::Vector<std::map<PairIndex, std::array<DataContainer, TmpIdx::nattribs> > > tmp_particle_data; + private: virtual void particlePostLocate(ParticleType& p, const amrex::ParticleLocData& pld, const int lev) override; diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 2cb18f8b5..8d2499a35 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -85,17 +85,6 @@ WarpXParticleContainer::WarpXParticleContainer (AmrCore* amr_core, int ispecies) particle_comps["theta"] = PIdx::theta; #endif - if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) - { - particle_comps["xold"] = PIdx::nattribs; - particle_comps["yold"] = PIdx::nattribs+1; - particle_comps["zold"] = PIdx::nattribs+2; - particle_comps["uxold"] = PIdx::nattribs+3; - particle_comps["uyold"] = PIdx::nattribs+4; - particle_comps["uzold"] = PIdx::nattribs+5; - - } - // Initialize temporary local arrays for charge/current deposition int num_threads = 1; #ifdef _OPENMP @@ -240,12 +229,6 @@ WarpXParticleContainer::AddNParticles (int lev, if ( (NumRuntimeRealComps()>0) || (NumRuntimeIntComps()>0) ){ auto& ptile = DefineAndReturnParticleTile(0, 0, 0); - if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) - { - ptile.push_back_real(particle_comps["xold"], x[i]); - ptile.push_back_real(particle_comps["yold"], y[i]); - ptile.push_back_real(particle_comps["zold"], z[i]); - } } particle_tile.push_back(p); @@ -260,12 +243,6 @@ WarpXParticleContainer::AddNParticles (int lev, if ( (NumRuntimeRealComps()>0) || (NumRuntimeIntComps()>0) ){ auto& ptile = DefineAndReturnParticleTile(0, 0, 0); - if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) - { - ptile.push_back_real(particle_comps["uxold"], vx + ibegin, vx + iend); - ptile.push_back_real(particle_comps["uyold"], vy + ibegin, vy + iend); - ptile.push_back_real(particle_comps["uzold"], vz + ibegin, vz + iend); - } } for (int comp = PIdx::uz+1; comp < PIdx::nattribs; ++comp) diff --git a/Tools/compute_domain.py b/Tools/compute_domain.py new file mode 100644 index 000000000..822d776e8 --- /dev/null +++ b/Tools/compute_domain.py @@ -0,0 +1,114 @@ +import os, shutil, re +import numpy as np +import scipy.constants as scc +import time, copy + +''' +This Python script helps a user to parallelize a WarpX simulation. + +The user specifies the minimal size of the physical domain and the resolution +in each dimension, and the scripts computes: +- the number of cells and physical domain to satify the user-specified domain + size and resolution AND make sure that the number of cells along each + direction is a multiple of max_grid_size. +- a starting point on how to parallelize on Cori KNL (number of nodes, etc.). + +When running in a boosted frame, the script also has the option to +automatically compute the number of cells in z to satisfy dx>dz in the boosted +frame. + +Note that the script has no notion of blocking_factor. It is assumed that +blocking_factor = max_grid_size, and that all boxes have the same size. +''' + +# Update the lines below for your simulation +# ------------------------------------------ +# 2 elements for 2D, 3 elements for 3D +# Lower corner of the box +box_lo0 = np.array([-25.e-6, -25.e-6, -15.e-6]) +# Upper corner of the box +box_hi0 = np.array([ 25.e-6, 25.e-6, 60.e-6]) +# Cell size +dx = 1.e-6 +dz = dx +cell_size = np.array([dx, dx, dz]) +# Use this for simulations in a boosted frame if you +# want to enforce dz < dx / dx_over_dz_boosted_frame +compute_dz_boosted_frame = True +gamma_boost = 30. +dx_over_dz_boosted_frame = 1.1 # >1. is usually more stable +# ------------------------------------------ + +# similar to numpy.ceil, except the output data type is int +def intceil(num): + return np.ceil(num).astype(int) + +# Enlarge simulation boundaries to satisfy three conditions: +# - The resolution must be exactly the one provided by the user +# - The physical domain must cover the domain specified by box_lo0, box_hi0 +# - The number of cells must be a multiple of mgs (max_grid_size). +def adjust_bounds(box_lo0, box_hi0, box_ncell0, mgs): + cell_size = (box_hi0-box_lo0) / box_ncell0 + box_ncell = intceil(box_ncell0/mgs)*mgs + box_lo = box_ncell * cell_size * box_lo0 / (box_hi0 - box_lo0) + box_hi = box_ncell * cell_size * box_hi0 / (box_hi0 - box_lo0) + return box_lo, box_hi, box_ncell + +# Calculate parallelization for the simulation, given numerical parameters +# (number of cells, max_grid_size, number of threads per node etc.) +def nb_nodes_mpi(box_ncell,mgs,threadspernode,ompnumthreads,ngridpernode, ndim): + nmpipernode = threadspernode/ompnumthreads + ngridpermpi = ngridpernode/nmpipernode + box_ngrids = box_ncell/mgs + if ndim == 2: + ngrids = box_ngrids[0] * box_ngrids[1] + elif ndim == 3: + ngrids = np.prod(box_ngrids) + n_mpi = intceil( ngrids/ngridpermpi ) + n_node = intceil( n_mpi/nmpipernode ) + return n_node, n_mpi + +# Get number of dimensions (2 or 3) +ndim = box_lo0.size +if compute_dz_boosted_frame: + # Adjust dz so that dx/dz = dx_over_dz_boosted_frame in simulation frame + cell_size[-1] = cell_size[0] / dx_over_dz_boosted_frame / 2. / gamma_boost +# Given the resolution, compute number of cells a priori +box_ncell0 = ( box_hi0 - box_lo0 ) / cell_size + +if ndim == 2: + # Set of parameters suitable for a 2D simulation on Cori KNL + ngridpernode = 16. + ompnumthreads = 8. + mgs = 1024. + threadspernode = 64. # HyperThreading level = 1: no hyperthreading + distance_between_threads = int(68*4/threadspernode) + c_option = int( ompnumthreads*distance_between_threads ) +elif ndim == 3: + # Set of parameters suitable for a 3D simulation on Cori KNL + ngridpernode = 8. + ompnumthreads = 8. + mgs = 64. + threadspernode = 64. # HyperThreading level = 1: no hyperthreading + distance_between_threads = int(68*4/threadspernode) + c_option = int( ompnumthreads*distance_between_threads ) + +# Adjust simulation bounds +box_lo, box_hi, box_ncell = adjust_bounds(box_lo0, box_hi0, box_ncell0, mgs) + +# Calculate parallelization +n_node,n_mpi = nb_nodes_mpi(box_ncell, mgs, threadspernode, ompnumthreads, ngridpernode, ndim) + +# Print results +string_output = ' ### Parameters used ### \n' +string_output += 'ngridpernode = ' + str(ngridpernode) + '\n' +string_output += 'ompnumthreads = ' + str(ompnumthreads) + '\n' +string_output += 'mgs (max_grid_size) = ' + str(mgs) + '\n' +string_output += 'threadspernode ( = # MPI ranks per node * OMP_NUM_THREADS) = ' + str(threadspernode) + '\n' +string_output += 'ndim = ' + str(ndim) + '\n\n' +string_output += 'box_lo = ' + str(box_lo) + '\n' +string_output += 'box_hi = ' + str(box_hi) + '\n' +string_output += 'box_ncell = ' + str(box_ncell) + '\n' +string_output += 'n_node = ' + str(n_node) + '\n' +string_output += 'n_mpi = ' + str(n_mpi) + '\n' +print(string_output) |