diff options
Diffstat (limited to 'Source/Particles/PhysicalParticleContainer.cpp')
-rw-r--r-- | Source/Particles/PhysicalParticleContainer.cpp | 73 |
1 files changed, 57 insertions, 16 deletions
diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index f37ea5ea3..62d4df594 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -2624,7 +2624,24 @@ PhysicalParticleContainer::PushPX (WarpXParIter& pti, const auto t_do_not_gather = do_not_gather; - amrex::ParallelFor( np_to_push, [=] AMREX_GPU_DEVICE (long ip) + enum exteb_flags : int { no_exteb, has_exteb }; + enum qed_flags : int { no_qed, has_qed }; + + int exteb_runtime_flag = getExternalEB.isNoOp() ? no_exteb : has_exteb; +#ifdef WARPX_QED + int qed_runtime_flag = (local_has_quantum_sync || do_sync) ? has_qed : no_qed; +#else + int qed_runtime_flag = no_qed; +#endif + + // Using this version of ParallelFor with compile time options + // improves performance when qed or external EB are not used by reducing + // register pressure. + amrex::ParallelFor(TypeList<CompileTimeOptions<no_exteb,has_exteb>, + CompileTimeOptions<no_qed ,has_qed>>{}, + {exteb_runtime_flag, qed_runtime_flag}, + np_to_push, [=] AMREX_GPU_DEVICE (long ip, auto exteb_control, + [[maybe_unused]] auto qed_control) { amrex::ParticleReal xp, yp, zp; getPosition(ip, xp, yp, zp); @@ -2650,30 +2667,54 @@ PhysicalParticleContainer::PushPX (WarpXParIter& pti, dx_arr, xyzmin_arr, lo, n_rz_azimuthal_modes, nox, galerkin_interpolation); } - // Externally applied E and B-field in Cartesian co-ordinates - getExternalEB(ip, Exp, Eyp, Ezp, Bxp, Byp, Bzp); + + auto const& externeb_fn = getExternalEB; // Have to do this for nvcc + if constexpr (exteb_control == has_exteb) { + externeb_fn(ip, Exp, Eyp, Ezp, Bxp, Byp, Bzp); + } scaleFields(xp, yp, zp, Exp, Eyp, Ezp, Bxp, Byp, Bzp); - doParticlePush(getPosition, setPosition, copyAttribs, ip, - ux[ip], uy[ip], uz[ip], - Exp, Eyp, Ezp, Bxp, Byp, Bzp, - ion_lev ? ion_lev[ip] : 0, - m, q, pusher_algo, do_crr, do_copy, #ifdef WARPX_QED - do_sync, - t_chi_max, + if (!do_sync) #endif - dt); - + { + doParticlePush<0>(getPosition, setPosition, copyAttribs, ip, + ux[ip], uy[ip], uz[ip], + Exp, Eyp, Ezp, Bxp, Byp, Bzp, + ion_lev ? ion_lev[ip] : 0, + m, q, pusher_algo, do_crr, do_copy, #ifdef WARPX_QED - if (local_has_quantum_sync) { - evolve_opt(ux[ip], uy[ip], uz[ip], - Exp, Eyp, Ezp,Bxp, Byp, Bzp, - dt, p_optical_depth_QSR[ip]); + t_chi_max, +#endif + dt); + } +#ifdef WARPX_QED + else { + if constexpr (qed_control == has_qed) { + doParticlePush<1>(getPosition, setPosition, copyAttribs, ip, + ux[ip], uy[ip], uz[ip], + Exp, Eyp, Ezp, Bxp, Byp, Bzp, + ion_lev ? ion_lev[ip] : 0, + m, q, pusher_algo, do_crr, do_copy, + t_chi_max, + dt); + } } #endif +#ifdef WARPX_QED + auto foo_local_has_quantum_sync = local_has_quantum_sync; + auto foo_podq = p_optical_depth_QSR; + auto& evolve_opt_fn = evolve_opt; // have to do all these for nvcc + if constexpr (qed_control == has_qed) { + if (foo_local_has_quantum_sync) { + evolve_opt_fn(ux[ip], uy[ip], uz[ip], + Exp, Eyp, Ezp,Bxp, Byp, Bzp, + dt, foo_podq[ip]); + } + } +#endif }); } |