diff options
Diffstat (limited to 'Source')
23 files changed, 1621 insertions, 1213 deletions
diff --git a/Source/Initialization/CustomDensityProb.H b/Source/Initialization/CustomDensityProb.H new file mode 100644 index 000000000..b00830e6c --- /dev/null +++ b/Source/Initialization/CustomDensityProb.H @@ -0,0 +1,49 @@ +#ifndef CUSTOM_DENSITY_PROB_H_ +#define CUSTOM_DENSITY_PROB_H_ + +#include <AMReX_ParmParse.H> +#include <AMReX_Arena.H> +#include <AMReX_Gpu.H> +#include <AMReX_Dim3.H> + +// An example of Custom Density Profile + +// struct whose getDensity returns density at a given position computed from +// a custom function, with runtime input parameters. +struct InjectorDensityCustom +{ + InjectorDensityCustom (std::string const& species_name) + : p(nullptr) + { + // Read parameters for custom density profile from file, and + // store them in managed memory. + amrex::ParmParse pp(species_name); + std::vector<amrex::Real> v; + pp.getarr("custom_profile_params", v); + p = static_cast<amrex::Real*> + (amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real)*v.size())); + for (int i = 0; i < static_cast<int>(v.size()); ++i) { + p[i] = v[i]; + } + } + + // Return density at given position, using user-defined parameters + // stored in p. + AMREX_GPU_HOST_DEVICE + amrex::Real + getDensity (amrex::Real, amrex::Real, amrex::Real) const noexcept + { + return p[0]; + } + + // Note that we are not allowed to have non-trivial destructor. + // So we rely on clear() to free memory. + void clear () { + amrex::The_Managed_Arena()->free(p); + } + +private: + amrex::Real* p; +}; + +#endif diff --git a/Source/Initialization/CustomDensityProb.cpp b/Source/Initialization/CustomDensityProb.cpp deleted file mode 100644 index 3efcb13c5..000000000 --- a/Source/Initialization/CustomDensityProb.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include <PlasmaInjector.H> - -#include <iostream> - -using namespace amrex; - -/// -/// This "custom" density profile just does constant -/// -Real CustomDensityProfile::getDensity(Real x, Real y, Real z) const { - return params[0]; -} diff --git a/Source/Initialization/CustomMomentumProb.H b/Source/Initialization/CustomMomentumProb.H new file mode 100644 index 000000000..f8bc29a05 --- /dev/null +++ b/Source/Initialization/CustomMomentumProb.H @@ -0,0 +1,30 @@ +#ifndef CUSTOM_MOMENTUM_PROB_H +#define CUSTOM_MOMENTUM_PROB_H + +#include <AMReX_ParmParse.H> +#include <AMReX_Gpu.H> +#include <AMReX_Arena.H> +#include <AMReX_Dim3.H> + +// An example of Custom Momentum Profile + +// struct whose getDensity returns momentum at a given position computed from +// a custom function. +struct InjectorMomentumCustom +{ + InjectorMomentumCustom (std::string const& /*a_species_name*/) {} + + // Return momentum at given position (illustration: momentum=0). + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getMomentum (amrex::Real, amrex::Real, amrex::Real) const noexcept + { + return {0., 0., 0.}; + } + + // Note that we are not allowed to have non-trivial destructor. + // So we rely on clear() to free memory if needed. + void clear () { } +}; + +#endif diff --git a/Source/Initialization/CustomMomentumProb.cpp b/Source/Initialization/CustomMomentumProb.cpp deleted file mode 100644 index fa21252d0..000000000 --- a/Source/Initialization/CustomMomentumProb.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include <PlasmaInjector.H> - -#include <iostream> - -using namespace amrex; - -/// -/// This "custom" momentum distribution just does 0 momentum -/// -void CustomMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) { - u[0] = 0; - u[1] = 0; - u[2] = 0; -} diff --git a/Source/Initialization/InjectorDensity.H b/Source/Initialization/InjectorDensity.H new file mode 100644 index 000000000..b7f5c26eb --- /dev/null +++ b/Source/Initialization/InjectorDensity.H @@ -0,0 +1,202 @@ +#ifndef INJECTOR_DENSITY_H_ +#define INJECTOR_DENSITY_H_ + +#include <AMReX_Gpu.H> +#include <AMReX_Dim3.H> +#include <GpuParser.H> +#include <CustomDensityProb.H> +#include <WarpXConst.H> + +// struct whose getDensity returns constant density. +struct InjectorDensityConstant +{ + InjectorDensityConstant (amrex::Real a_rho) noexcept : m_rho(a_rho) {} + + AMREX_GPU_HOST_DEVICE + amrex::Real + getDensity (amrex::Real, amrex::Real, amrex::Real) const noexcept + { + return m_rho; + } + +private: + amrex::Real m_rho; +}; + +// struct whose getDensity returns local density computed from parser. +struct InjectorDensityParser +{ + InjectorDensityParser (WarpXParser const& a_parser) noexcept + : m_parser(a_parser) {} + + AMREX_GPU_HOST_DEVICE + amrex::Real + getDensity (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + return m_parser(x,y,z); + } + + // InjectorDensityParser constructs this GpuParser from WarpXParser. + GpuParser m_parser; +}; + +// struct whose getDensity returns local density computed from predefined profile. +struct InjectorDensityPredefined +{ + InjectorDensityPredefined (std::string const& a_species_name) noexcept; + + void clear (); + + AMREX_GPU_HOST_DEVICE + amrex::Real + getDensity (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + // Choices for profile are: + // - parabolic_channel + switch (profile) + { + case Profile::parabolic_channel: + { + amrex::Real z_start = p[0]; + amrex::Real ramp_up = p[1]; + amrex::Real plateau = p[2]; + amrex::Real ramp_down = p[3]; + amrex::Real rc = p[4]; + amrex::Real n0 = p[5]; + amrex::Real n; + amrex::Real kp = PhysConst::q_e/PhysConst::c + *std::sqrt( n0/(PhysConst::m_e*PhysConst::ep0) ); + + if ((z-z_start)>=0 and + (z-z_start)<ramp_up ) { + n = (z-z_start)/ramp_up; + } else if ((z-z_start)>=ramp_up and + (z-z_start)< ramp_up+plateau ) { + n = 1.; + } else if ((z-z_start)>=ramp_up+plateau and + (z-z_start)< ramp_up+plateau+ramp_down) { + n = 1.-((z-z_start)-ramp_up-plateau)/ramp_down; + } else { + n = 0.; + } + n *= n0*(1.+4.*(x*x+y*y)/(kp*kp*rc*rc*rc*rc)); + return n; + } + default: + amrex::Abort("InjectorDensityPredefined: how did we get here?"); + return 0.0; + } + } + +private: + enum struct Profile { null, parabolic_channel }; + Profile profile; + amrex::Real* p; +}; + +// Base struct for density injector. +// InjectorDensity contains a union (called Object) that holds any one +// instance of: +// - InjectorDensityConstant : to generate constant density; +// - InjectorDensityParser : to generate density from parser; +// - InjectorDensityCustom : to generate density from custom profile; +// - InjectorDensityPredefined: to generate density from predefined profile; +// The choice is made at runtime, depending in the constructor called. +// This mimics virtual functions, except the struct is stored in managed memory +// and member functions are made __host__ __device__ to run on CPU and GPU. +// This struct inherits from amrex::Gpu::Managed to provide new and delete +// operators in managed memory when running on GPU. Nothing special on CPU. +struct InjectorDensity + : public amrex::Gpu::Managed +{ + // This constructor stores a InjectorDensityConstant in union object. + InjectorDensity (InjectorDensityConstant* t, amrex::Real a_rho) + : type(Type::constant), + object(t,a_rho) + { } + + // This constructor stores a InjectorDensityParser in union object. + InjectorDensity (InjectorDensityParser* t, WarpXParser const& a_parser) + : type(Type::parser), + object(t,a_parser) + { } + + // This constructor stores a InjectorDensityCustom in union object. + InjectorDensity (InjectorDensityCustom* t, std::string const& a_species_name) + : type(Type::custom), + object(t,a_species_name) + { } + + // This constructor stores a InjectorDensityPredefined in union object. + InjectorDensity (InjectorDensityPredefined* t, std::string const& a_species_name) + : type(Type::predefined), + object(t,a_species_name) + { } + + // Explicitly prevent the compiler from generating copy constructors + // and copy assignment operators. + InjectorDensity (InjectorDensity const&) = delete; + InjectorDensity (InjectorDensity&&) = delete; + void operator= (InjectorDensity const&) = delete; + void operator= (InjectorDensity &&) = delete; + + ~InjectorDensity (); + + std::size_t sharedMemoryNeeded () const noexcept; + + // call getDensity from the object stored in the union + // (the union is called Object, and the instance is called object). + AMREX_GPU_HOST_DEVICE + amrex::Real + getDensity (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + switch (type) + { + case Type::parser: + { + return object.parser.getDensity(x,y,z); + } + case Type::constant: + { + return object.constant.getDensity(x,y,z); + } + case Type::custom: + { + return object.custom.getDensity(x,y,z); + } + case Type::predefined: + { + return object.predefined.getDensity(x,y,z); + } + default: + { + amrex::Abort("InjectorDensity: unknown type"); + return 0.0; + } + } + } + +private: + enum struct Type { constant, custom, predefined, parser }; + Type type; + + // An instance of union Object constructs and stores any one of + // the objects declared (constant or parser or custom or predefined). + union Object { + Object (InjectorDensityConstant*, amrex::Real a_rho) noexcept + : constant(a_rho) {} + Object (InjectorDensityParser*, WarpXParser const& a_parser) noexcept + : parser(a_parser) {} + Object (InjectorDensityCustom*, std::string const& a_species_name) noexcept + : custom(a_species_name) {} + Object (InjectorDensityPredefined*, std::string const& a_species_name) noexcept + : predefined(a_species_name) {} + InjectorDensityConstant constant; + InjectorDensityParser parser; + InjectorDensityCustom custom; + InjectorDensityPredefined predefined; + }; + Object object; +}; + +#endif diff --git a/Source/Initialization/InjectorDensity.cpp b/Source/Initialization/InjectorDensity.cpp new file mode 100644 index 000000000..7fed85b75 --- /dev/null +++ b/Source/Initialization/InjectorDensity.cpp @@ -0,0 +1,77 @@ +#include <PlasmaInjector.H> + +using namespace amrex; + +InjectorDensity::~InjectorDensity () +{ + switch (type) + { + case Type::parser: + { + object.parser.m_parser.clear(); + break; + } + case Type::custom: + { + object.custom.clear(); + break; + } + case Type::predefined: + { + object.predefined.clear(); + break; + } + } +} + +// Compute the amount of memory needed in GPU Shared Memory. +std::size_t +InjectorDensity::sharedMemoryNeeded () const noexcept +{ + switch (type) + { + case Type::parser: + { + // For parser injector, the 3D position of each particle + // is stored in shared memory. + return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 3; + } + default: + return 0; + } +} + +InjectorDensityPredefined::InjectorDensityPredefined ( + std::string const& a_species_name) noexcept + : profile(Profile::null) +{ + ParmParse pp(a_species_name); + + std::vector<amrex::Real> v; + // Read parameters for the predefined plasma profile, + // and store them in managed memory + pp.getarr("predefined_profile_params", v); + p = static_cast<amrex::Real*> + (amrex::The_Managed_Arena()->alloc(sizeof(amrex::Real)*v.size())); + for (int i = 0; i < static_cast<int>(v.size()); ++i) { + p[i] = v[i]; + } + + // Parse predefined profile name, and update member variable profile. + std::string which_profile_s; + pp.query("predefined_profile_name", which_profile_s); + std::transform(which_profile_s.begin(), which_profile_s.end(), + which_profile_s.begin(), ::tolower); + if (which_profile_s == "parabolic_channel"){ + profile = Profile::parabolic_channel; + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(v.size() > 6, + "InjectorDensityPredefined::parabolic_channel: not enough parameters"); + } +} + +// Note that we are not allowed to have non-trivial destructor. +// So we rely on clear() to free memory. +void InjectorDensityPredefined::clear () +{ + amrex::The_Managed_Arena()->free(p); +} diff --git a/Source/Initialization/InjectorMomentum.H b/Source/Initialization/InjectorMomentum.H new file mode 100644 index 000000000..399ee7759 --- /dev/null +++ b/Source/Initialization/InjectorMomentum.H @@ -0,0 +1,223 @@ +#ifndef INJECTOR_MOMENTUM_H_ +#define INJECTOR_MOMENTUM_H_ + +#include <AMReX_Gpu.H> +#include <AMReX_Dim3.H> +#include <GpuParser.H> +#include <CustomMomentumProb.H> + +// struct whose getMomentum returns constant momentum. +struct InjectorMomentumConstant +{ + InjectorMomentumConstant (amrex::Real a_ux, amrex::Real a_uy, amrex::Real a_uz) noexcept + : m_ux(a_ux), m_uy(a_uy), m_uz(a_uz) {} + + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getMomentum (amrex::Real, amrex::Real, amrex::Real) const noexcept + { + return amrex::XDim3{m_ux,m_uy,m_uz}; + } +private: + amrex::Real m_ux, m_uy, m_uz; +}; + +// struct whose getMomentum returns momentum for 1 particle, from random +// gaussian distribution. +struct InjectorMomentumGaussian +{ + InjectorMomentumGaussian (amrex::Real a_ux_m, amrex::Real a_uy_m, + amrex::Real a_uz_m, amrex::Real a_ux_th, + amrex::Real a_uy_th, amrex::Real a_uz_th) noexcept + : m_ux_m(a_ux_m), m_uy_m(a_uy_m), m_uz_m(a_uz_m), + m_ux_th(a_ux_th), m_uy_th(a_uy_th), m_uz_th(a_uz_th) + {} + + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + return amrex::XDim3{amrex::RandomNormal(m_ux_m, m_ux_th), + amrex::RandomNormal(m_uy_m, m_uy_th), + amrex::RandomNormal(m_uz_m, m_uz_th)}; + } +private: + amrex::Real m_ux_m, m_uy_m, m_uz_m; + amrex::Real m_ux_th, m_uy_th, m_uz_th; +}; + +// struct whose getMomentum returns momentum for 1 particle, for +// radial expansion +struct InjectorMomentumRadialExpansion +{ + InjectorMomentumRadialExpansion (amrex::Real a_u_over_r) noexcept + : u_over_r(a_u_over_r) + {} + + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + return {x*u_over_r, y*u_over_r, z*u_over_r}; + } + +private: + amrex::Real u_over_r; +}; + +// struct whose getMomentumm returns local momentum computed from parser. +struct InjectorMomentumParser +{ + InjectorMomentumParser (WarpXParser const& a_ux_parser, + WarpXParser const& a_uy_parser, + WarpXParser const& a_uz_parser) noexcept + : m_ux_parser(a_ux_parser), m_uy_parser(a_uy_parser), + m_uz_parser(a_uz_parser) {} + + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + return amrex::XDim3{m_ux_parser(x,y,z),m_uy_parser(x,y,z),m_uz_parser(x,y,z)}; + } + + GpuParser m_ux_parser, m_uy_parser, m_uz_parser; +}; + +// Base struct for momentum injector. +// InjectorMomentum contains a union (called Object) that holds any one +// instance of: +// - InjectorMomentumConstant : to generate constant density; +// - InjectorMomentumGaussian : to generate gaussian distribution; +// - InjectorMomentumRadialExpansion: to generate radial expansion; +// - InjectorMomentumParser : to generate momentum from parser; +// The choice is made at runtime, depending in the constructor called. +// This mimics virtual functions, except the struct is stored in managed memory +// and member functions are made __host__ __device__ to run on CPU and GPU. +// This struct inherits from amrex::Gpu::Managed to provide new and delete +// operators in managed memory when running on GPU. Nothing special on CPU. +struct InjectorMomentum + : public amrex::Gpu::Managed +{ + // This constructor stores a InjectorMomentumConstant in union object. + InjectorMomentum (InjectorMomentumConstant* t, + amrex::Real a_ux, amrex::Real a_uy, amrex::Real a_uz) + : type(Type::constant), + object(t, a_ux, a_uy, a_uz) + { } + + // This constructor stores a InjectorMomentumParser in union object. + InjectorMomentum (InjectorMomentumParser* t, + WarpXParser const& a_ux_parser, + WarpXParser const& a_uy_parser, + WarpXParser const& a_uz_parser) + : type(Type::parser), + object(t, a_ux_parser, a_uy_parser, a_uz_parser) + { } + + // This constructor stores a InjectorMomentumGaussian in union object. + InjectorMomentum (InjectorMomentumGaussian* t, + amrex::Real a_ux_m, amrex::Real a_uy_m, amrex::Real a_uz_m, + amrex::Real a_ux_th, amrex::Real a_uy_th, amrex::Real a_uz_th) + : type(Type::gaussian), + object(t,a_ux_m,a_uy_m,a_uz_m,a_ux_th,a_uy_th,a_uz_th) + { } + + // This constructor stores a InjectorMomentumCustom in union object. + InjectorMomentum (InjectorMomentumCustom* t, + std::string const& a_species_name) + : type(Type::custom), + object(t, a_species_name) + { } + + // This constructor stores a InjectorMomentumRadialExpansion in union object. + InjectorMomentum (InjectorMomentumRadialExpansion* t, + amrex::Real u_over_r) + : type(Type::radial_expansion), + object(t, u_over_r) + { } + + // Explicitly prevent the compiler from generating copy constructors + // and copy assignment operators. + InjectorMomentum (InjectorMomentum const&) = delete; + InjectorMomentum (InjectorMomentum&&) = delete; + void operator= (InjectorMomentum const&) = delete; + void operator= (InjectorMomentum &&) = delete; + + ~InjectorMomentum (); + + std::size_t sharedMemoryNeeded () const noexcept; + + // call getMomentum from the object stored in the union + // (the union is called Object, and the instance is called object). + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + switch (type) + { + case Type::parser: + { + return object.parser.getMomentum(x,y,z); + } + case Type::gaussian: + { + return object.gaussian.getMomentum(x,y,z); + } + case Type::constant: + { + return object.constant.getMomentum(x,y,z); + } + case Type::radial_expansion: + { + return object.radial_expansion.getMomentum(x,y,z); + } + case Type::custom: + { + return object.custom.getMomentum(x,y,z); + } + default: + { + amrex::Abort("InjectorMomentum: unknown type"); + return {0.0,0.0,0.0}; + } + } + } + +private: + enum struct Type { constant, custom, gaussian, radial_expansion, parser }; + Type type; + + // An instance of union Object constructs and stores any one of + // the objects declared (constant or custom or gaussian or + // radial_expansion or parser). + union Object { + Object (InjectorMomentumConstant*, + amrex::Real a_ux, amrex::Real a_uy, amrex::Real a_uz) noexcept + : constant(a_ux,a_uy,a_uz) {} + Object (InjectorMomentumCustom*, + std::string const& a_species_name) noexcept + : custom(a_species_name) {} + Object (InjectorMomentumGaussian*, + amrex::Real a_ux_m, amrex::Real a_uy_m, + amrex::Real a_uz_m, amrex::Real a_ux_th, + amrex::Real a_uy_th, amrex::Real a_uz_th) noexcept + : gaussian(a_ux_m,a_uy_m,a_uz_m,a_ux_th,a_uy_th,a_uz_th) {} + Object (InjectorMomentumRadialExpansion*, + amrex::Real u_over_r) noexcept + : radial_expansion(u_over_r) {} + Object (InjectorMomentumParser*, + WarpXParser const& a_ux_parser, + WarpXParser const& a_uy_parser, + WarpXParser const& a_uz_parser) noexcept + : parser(a_ux_parser, a_uy_parser, a_uz_parser) {} + InjectorMomentumConstant constant; + InjectorMomentumCustom custom; + InjectorMomentumGaussian gaussian; + InjectorMomentumRadialExpansion radial_expansion; + InjectorMomentumParser parser; + }; + Object object; +}; + +#endif diff --git a/Source/Initialization/InjectorMomentum.cpp b/Source/Initialization/InjectorMomentum.cpp new file mode 100644 index 000000000..a197b5bef --- /dev/null +++ b/Source/Initialization/InjectorMomentum.cpp @@ -0,0 +1,40 @@ +#include <PlasmaInjector.H> + +using namespace amrex; + +InjectorMomentum::~InjectorMomentum () +{ + switch (type) + { + case Type::parser: + { + object.parser.m_ux_parser.clear(); + object.parser.m_uy_parser.clear(); + object.parser.m_uz_parser.clear(); + break; + } + case Type::custom: + { + object.custom.clear(); + break; + } + } +} + +// Compute the amount of memory needed in GPU Shared Memory. +std::size_t +InjectorMomentum::sharedMemoryNeeded () const noexcept +{ + switch (type) + { + case Type::parser: + { + // For parser injector, the 3D position of each particle + // is stored in shared memory. + return amrex::Gpu::numThreadsPerBlockParallelFor() * sizeof(double) * 3; + } + default: + return 0; + } +} + diff --git a/Source/Initialization/InjectorPosition.H b/Source/Initialization/InjectorPosition.H new file mode 100644 index 000000000..19bb092dd --- /dev/null +++ b/Source/Initialization/InjectorPosition.H @@ -0,0 +1,146 @@ +#ifndef INJECTOR_POSITION_H_ +#define INJECTOR_POSITION_H_ + +#include <AMReX_Gpu.H> +#include <AMReX_Dim3.H> +#include <AMReX_Utility.H> + +// struct whose getPositionUnitBox returns x, y and z for a particle with +// random distribution inside a unit cell. +struct InjectorPositionRandom +{ + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getPositionUnitBox (int i_part, int ref_fac=1) const noexcept + { + return amrex::XDim3{amrex::Random(), amrex::Random(), amrex::Random()}; + } +}; + +// struct whose getPositionUnitBox returns x, y and z for a particle with +// regular distribution inside a unit cell. +struct InjectorPositionRegular +{ + InjectorPositionRegular (amrex::Dim3 const& a_ppc) noexcept : ppc(a_ppc) {} + + // i_part: particle number within the cell, required to evenly space + // particles within the cell. + // ref_fac: the number of particles evenly-spaced within a cell + // is a_ppc*(ref_fac**AMREX_SPACEDIM). + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getPositionUnitBox (int i_part, int ref_fac=1) const noexcept + { + int nx = ref_fac*ppc.x; + int ny = ref_fac*ppc.y; +#if (AMREX_SPACEDIM == 3) + int nz = ref_fac*ppc.z; +#else + int nz = 1; +#endif + int ix_part = i_part/(ny*nz); // written this way backward compatibility + int iz_part = (i_part-ix_part*(ny*nz)) / ny; + int iy_part = (i_part-ix_part*(ny*nz)) - ny*iz_part; + return amrex::XDim3{(0.5+ix_part)/nx, (0.5+iy_part)/ny, (0.5+iz_part) / nz}; + } +private: + amrex::Dim3 ppc; +}; + +// Base struct for position injector. +// InjectorPosition contains a union (called Object) that holds any one +// instance of: +// - InjectorPositionRandom : to generate random distribution; +// - InjectorPositionRegular: to generate regular distribution. +// The choice is made at runtime, depending in the constructor called. +// This mimics virtual functions, except the struct is stored in managed memory +// and member functions are made __host__ __device__ to run on CPU and GPU. +// This struct inherits from amrex::Gpu::Managed to provide new and delete +// operators in managed memory when running on GPU. Nothing special on CPU. +struct InjectorPosition + : public amrex::Gpu::Managed +{ + // This constructor stores a InjectorPositionRandom in union object. + InjectorPosition (InjectorPositionRandom* t, + amrex::Real a_xmin, amrex::Real a_xmax, + amrex::Real a_ymin, amrex::Real a_ymax, + amrex::Real a_zmin, amrex::Real a_zmax) + : type(Type::random), + object(t), + xmin(a_xmin), xmax(a_xmax), + ymin(a_ymin), ymax(a_ymax), + zmin(a_zmin), zmax(a_zmax) + { } + + // This constructor stores a InjectorPositionRegular in union object. + InjectorPosition (InjectorPositionRegular* t, + amrex::Real a_xmin, amrex::Real a_xmax, + amrex::Real a_ymin, amrex::Real a_ymax, + amrex::Real a_zmin, amrex::Real a_zmax, + amrex::Dim3 const& a_ppc) + : type(Type::regular), + object(t, a_ppc), + xmin(a_xmin), xmax(a_xmax), + ymin(a_ymin), ymax(a_ymax), + zmin(a_zmin), zmax(a_zmax) + { } + + // Explicitly prevent the compiler from generating copy constructors + // and copy assignment operators. + InjectorPosition (InjectorPosition const&) = delete; + InjectorPosition (InjectorPosition&&) = delete; + void operator= (InjectorPosition const&) = delete; + void operator= (InjectorPosition &&) = delete; + + std::size_t sharedMemoryNeeded () const noexcept { return 0; } + + // call getPositionUnitBox from the object stored in the union + // (the union is called Object, and the instance is called object). + AMREX_GPU_HOST_DEVICE + amrex::XDim3 + getPositionUnitBox (int i_part, int ref_fac=1) const noexcept + { + switch (type) + { + case Type::regular: + { + return object.regular.getPositionUnitBox(i_part, ref_fac); + } + default: + { + return object.random.getPositionUnitBox(i_part, ref_fac); + } + }; + } + + // bool: whether position specified is within bounds. + AMREX_GPU_HOST_DEVICE + bool + insideBounds (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept + { + return (x < xmax and x >= xmin and + y < ymax and y >= ymin and + z < zmax and z >= zmin); + } + +private: + enum struct Type { random, regular }; + Type type; + + // An instance of union Object constructs and stores any one of + // the objects declared (random or regular). + union Object { + Object (InjectorPositionRandom*) noexcept : random() {} + Object (InjectorPositionRegular*, amrex::Dim3 const& a_ppc) noexcept + : regular(a_ppc) {} + InjectorPositionRandom random; + InjectorPositionRegular regular; + }; + Object object; + + amrex::Real xmin, xmax; + amrex::Real ymin, ymax; + amrex::Real zmin, zmax; +}; + +#endif diff --git a/Source/Initialization/Make.package b/Source/Initialization/Make.package index edcf402c9..2c6458b6d 100644 --- a/Source/Initialization/Make.package +++ b/Source/Initialization/Make.package @@ -1,9 +1,18 @@ -CEXE_sources += CustomDensityProb.cpp -CEXE_sources += PlasmaProfiles.cpp CEXE_sources += WarpXInitData.cpp -CEXE_sources += CustomMomentumProb.cpp + CEXE_sources += PlasmaInjector.cpp CEXE_headers += PlasmaInjector.H +CEXE_headers += InjectorPosition.H + +CEXE_headers += InjectorDensity.H +CEXE_sources += InjectorDensity.cpp + +CEXE_headers += InjectorMomentum.H +CEXE_sources += InjectorMomentum.cpp + +CEXE_headers += CustomDensityProb.H +CEXE_headers += CustomMomentumProb.H + INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Initialization VPATH_LOCATIONS += $(WARPX_HOME)/Source/Initialization diff --git a/Source/Initialization/PlasmaInjector.H b/Source/Initialization/PlasmaInjector.H index f998e217e..f7e86bff5 100644 --- a/Source/Initialization/PlasmaInjector.H +++ b/Source/Initialization/PlasmaInjector.H @@ -1,250 +1,16 @@ #ifndef PLASMA_INJECTOR_H_ #define PLASMA_INJECTOR_H_ -#include <array> +#include <InjectorPosition.H> +#include <InjectorDensity.H> +#include <InjectorMomentum.H> -#include "AMReX_REAL.H" +#include <array> #include <AMReX_Vector.H> #include <WarpXConst.H> #include <WarpXParser.H> -#include "AMReX_ParmParse.H" -#include "AMReX_Utility.H" - -enum class predefined_profile_flag { Null, parabolic_channel }; - -/// -/// PlasmaDensityProfile describes how the charge density -/// is set in particle initialization. Subclasses must define a -/// getDensity function that describes the charge density as a -/// function of x, y, and z. -/// -class PlasmaDensityProfile -{ -public: - virtual ~PlasmaDensityProfile() {}; - virtual amrex::Real getDensity(amrex::Real x, - amrex::Real y, - amrex::Real z) const = 0; -protected: - std::string _species_name; -}; - -/// -/// This describes a constant density distribution. -/// -class ConstantDensityProfile : public PlasmaDensityProfile -{ -public: - ConstantDensityProfile(amrex::Real _density); - virtual amrex::Real getDensity(amrex::Real x, - amrex::Real y, - amrex::Real z) const override; - -private: - amrex::Real _density; -}; - -/// -/// This describes a custom density distribution. Users can supply -/// in their problem directory. -/// -/// -class CustomDensityProfile : public PlasmaDensityProfile -{ -public: - CustomDensityProfile(const std::string& species_name); - virtual amrex::Real getDensity(amrex::Real x, - amrex::Real y, - amrex::Real z) const override; -private: - amrex::Vector<amrex::Real> params; -}; - -/// -/// This describes predefined density distributions. -/// -class PredefinedDensityProfile : public PlasmaDensityProfile -{ -public: - PredefinedDensityProfile(const std::string& species_name); - virtual amrex::Real getDensity(amrex::Real x, - amrex::Real y, - amrex::Real z) const override; - amrex::Real ParabolicChannel(amrex::Real x, - amrex::Real y, - amrex::Real z) const; -private: - predefined_profile_flag which_profile = predefined_profile_flag::Null; - amrex::Vector<amrex::Real> params; -}; - -/// -/// This describes a density function parsed in the input file. -/// -class ParseDensityProfile : public PlasmaDensityProfile -{ -public: - ParseDensityProfile(const std::string _parse_density_function); - virtual amrex::Real getDensity(amrex::Real x, - amrex::Real y, - amrex::Real z) const override; -private: - std::string _parse_density_function; - WarpXParser parser_density; -}; - -/// -/// PlasmaMomentumDistribution describes how the particle momenta -/// are set. Subclasses must define a getMomentum method that fills -/// a u with the 3 components of the particle momentum -/// -class PlasmaMomentumDistribution -{ -public: - using vec3 = std::array<amrex::Real, 3>; - virtual ~PlasmaMomentumDistribution() {}; - virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) = 0; -}; - -/// -/// This is a constant momentum distribution - all particles will -/// have the same ux, uy, and uz -/// -class ConstantMomentumDistribution : public PlasmaMomentumDistribution -{ -public: - ConstantMomentumDistribution(amrex::Real ux, - amrex::Real uy, - amrex::Real uz); - virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override; - -private: - amrex::Real _ux; - amrex::Real _uy; - amrex::Real _uz; -}; - -/// -/// This describes a custom momentum distribution. Users can supply -/// in their problem directory. -/// -/// -class CustomMomentumDistribution : public PlasmaMomentumDistribution -{ -public: - CustomMomentumDistribution(const std::string& species_name); - virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override; - -private: - amrex::Vector<amrex::Real> params; -}; - - -/// -/// This is a Gaussian Random momentum distribution. -/// Particles will get random momenta, drawn from a normal. -/// ux_m, ux_y, and ux_z describe the mean components in the x, y, and z -/// directions, while u_th is the standard deviation of the random -/// component. -/// -class GaussianRandomMomentumDistribution : public PlasmaMomentumDistribution -{ -public: - GaussianRandomMomentumDistribution(amrex::Real ux_m, - amrex::Real uy_m, - amrex::Real uz_m, - amrex::Real ux_th, - amrex::Real uy_th, - amrex::Real uz_th); - virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override; -private: - amrex::Real _ux_m; - amrex::Real _uy_m; - amrex::Real _uz_m; - amrex::Real _ux_th; - amrex::Real _uy_th; - amrex::Real _uz_th; -}; - -/// -/// This is a radially expanding momentum distribution -/// Particles will have a radial momentum proportional to their -/// radius, with proportionality constant u_over_r -class RadialExpansionMomentumDistribution : public PlasmaMomentumDistribution -{ -public: - RadialExpansionMomentumDistribution( amrex::Real u_over_r ); - virtual void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z) override; -private: - amrex::Real _u_over_r; -}; - -/// -/// This describes a momentum distribution function parsed in the input file. -/// -class ParseMomentumFunction : public PlasmaMomentumDistribution -{ -public: - ParseMomentumFunction(const std::string _parse_momentum_function_ux, - const std::string _parse_momentum_function_uy, - const std::string _parse_momentum_function_uz); - virtual void getMomentum(vec3& u, - amrex::Real x, - amrex::Real y, - amrex::Real z) override; -private: - std::string _parse_momentum_function_ux; - std::string _parse_momentum_function_uy; - std::string _parse_momentum_function_uz; - WarpXParser parser_ux; - WarpXParser parser_uy; - WarpXParser parser_uz; -}; - - -/// -/// PlasmaParticlePosition describes how particles are initialized -/// into each cell box. Subclasses must define a -/// getPositionUnitBox function that returns the position of -/// particle number i_part in a unitary box. -/// -class PlasmaParticlePosition{ -public: - using vec3 = std::array<amrex::Real, 3>; - virtual ~PlasmaParticlePosition() {}; - virtual void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1) = 0; -}; - -/// -/// Particles are initialized with a random uniform -/// distribution inside each cell -/// -class RandomPosition : public PlasmaParticlePosition{ -public: - RandomPosition(int num_particles_per_cell); - virtual void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1) override; -private: - amrex::Real _x; - amrex::Real _y; - amrex::Real _z; - int _num_particles_per_cell; -}; - -/// -/// Particles are regularly distributed inside each cell. The user provides -/// a 3d (resp. 2d) vector num_particles_per_cell_each_dim that contains -/// the number of particles per cell along each dimension. -/// -class RegularPosition : public PlasmaParticlePosition{ -public: - RegularPosition(const amrex::Vector<int>& num_particles_per_cell_each_dim); - virtual void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1) override; -private: - amrex::Real _x; - amrex::Real _y; - amrex::Real _z; - amrex::Vector<int> _num_particles_per_cell_each_dim; -}; +#include <AMReX_ParmParse.H> +#include <AMReX_Utility.H> /// /// The PlasmaInjector class parses and stores information about the plasma @@ -256,28 +22,23 @@ class PlasmaInjector public: - using vec3 = std::array<amrex::Real, 3>; - - PlasmaInjector(); - - PlasmaInjector(int ispecies, const std::string& name); + PlasmaInjector (); - amrex::Real getDensity(amrex::Real x, amrex::Real y, amrex::Real z); + PlasmaInjector (int ispecies, const std::string& name); - bool insideBounds(amrex::Real x, amrex::Real y, amrex::Real z); + bool insideBounds (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept; int num_particles_per_cell; amrex::Vector<int> num_particles_per_cell_each_dim; - void getMomentum(vec3& u, amrex::Real x, amrex::Real y, amrex::Real z); + // gamma * beta + amrex::XDim3 getMomentum (amrex::Real x, amrex::Real y, amrex::Real z) const noexcept; - void getPositionUnitBox(vec3& r, int i_part, int ref_fac=1); + amrex::Real getCharge () {return charge;} + amrex::Real getMass () {return mass;} - amrex::Real getCharge() {return charge;} - amrex::Real getMass() {return mass;} - - bool doInjection() { return part_pos != NULL;} + bool doInjection () const noexcept { return inj_pos != NULL;} bool add_single_particle = false; amrex::Vector<amrex::Real> single_particle_pos; @@ -305,6 +66,21 @@ public: amrex::Real xmin, xmax; amrex::Real ymin, ymax; amrex::Real zmin, zmax; + amrex::Real density_min = 0; + amrex::Real density_max = std::numeric_limits<amrex::Real>::max(); + + InjectorPosition* getInjectorPosition (); + InjectorDensity* getInjectorDensity (); + InjectorMomentum* getInjectorMomentum (); + + // When running on GPU, injector for position, momentum and density store + // particle 3D positions in shared memory IF using the parser. + std::size_t + sharedMemoryNeeded () const noexcept { + return amrex::max(inj_pos->sharedMemoryNeeded(), + inj_rho->sharedMemoryNeeded(), + inj_mom->sharedMemoryNeeded()); + } protected: @@ -315,13 +91,12 @@ protected: int species_id; std::string species_name; - std::unique_ptr<PlasmaDensityProfile> rho_prof; - std::unique_ptr<PlasmaMomentumDistribution> mom_dist; - std::unique_ptr<PlasmaParticlePosition> part_pos; - - void parseDensity(amrex::ParmParse pp); - void parseMomentum(amrex::ParmParse pp); + std::unique_ptr<InjectorPosition> inj_pos; + std::unique_ptr<InjectorDensity > inj_rho; + std::unique_ptr<InjectorMomentum> inj_mom; + void parseDensity (amrex::ParmParse& pp); + void parseMomentum (amrex::ParmParse& pp); }; #endif diff --git a/Source/Initialization/PlasmaInjector.cpp b/Source/Initialization/PlasmaInjector.cpp index f9642d1b6..280f9e58a 100644 --- a/Source/Initialization/PlasmaInjector.cpp +++ b/Source/Initialization/PlasmaInjector.cpp @@ -55,192 +55,34 @@ namespace { } } -ConstantDensityProfile::ConstantDensityProfile(Real density) - : _density(density) -{} +PlasmaInjector::PlasmaInjector () {} -Real ConstantDensityProfile::getDensity(Real x, Real y, Real z) const -{ - return _density; -} - -CustomDensityProfile::CustomDensityProfile(const std::string& species_name) +PlasmaInjector::PlasmaInjector (int ispecies, const std::string& name) + : species_id(ispecies), species_name(name) { ParmParse pp(species_name); - pp.getarr("custom_profile_params", params); -} -PredefinedDensityProfile::PredefinedDensityProfile(const std::string& species_name) -{ - ParmParse pp(species_name); - std::string which_profile_s; - pp.getarr("predefined_profile_params", params); - pp.query("predefined_profile_name", which_profile_s); - if (which_profile_s == "parabolic_channel"){ - which_profile = predefined_profile_flag::parabolic_channel; - } -} - -ParseDensityProfile::ParseDensityProfile(std::string parse_density_function) - : _parse_density_function(parse_density_function) -{ - parser_density.define(parse_density_function); - parser_density.registerVariables({"x","y","z"}); - - ParmParse pp("my_constants"); - std::set<std::string> symbols = parser_density.symbols(); - symbols.erase("x"); - symbols.erase("y"); - symbols.erase("z"); // after removing variables, we are left with constants - for (auto it = symbols.begin(); it != symbols.end(); ) { - Real v; - if (pp.query(it->c_str(), v)) { - parser_density.setConstant(*it, v); - it = symbols.erase(it); - } else { - ++it; - } - } - for (auto const& s : symbols) { // make sure there no unknown symbols - amrex::Abort("ParseDensityProfile: Unknown symbol "+s); - } -} - -Real ParseDensityProfile::getDensity(Real x, Real y, Real z) const -{ - return parser_density.eval(x,y,z); -} - -ConstantMomentumDistribution::ConstantMomentumDistribution(Real ux, - Real uy, - Real uz) - : _ux(ux), _uy(uy), _uz(uz) -{} - -void ConstantMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) { - u[0] = _ux; - u[1] = _uy; - u[2] = _uz; -} - -CustomMomentumDistribution::CustomMomentumDistribution(const std::string& species_name) -{ - ParmParse pp(species_name); - pp.getarr("custom_momentum_params", params); -} - -GaussianRandomMomentumDistribution::GaussianRandomMomentumDistribution(Real ux_m, - Real uy_m, - Real uz_m, - Real ux_th, - Real uy_th, - Real uz_th) - : _ux_m(ux_m), _uy_m(uy_m), _uz_m(uz_m), _ux_th(ux_th), _uy_th(uy_th), _uz_th(uz_th) -{ -} - -void GaussianRandomMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) { - Real ux_th = amrex::RandomNormal(0.0, _ux_th); - Real uy_th = amrex::RandomNormal(0.0, _uy_th); - Real uz_th = amrex::RandomNormal(0.0, _uz_th); - - u[0] = _ux_m + ux_th; - u[1] = _uy_m + uy_th; - u[2] = _uz_m + uz_th; -} -RadialExpansionMomentumDistribution::RadialExpansionMomentumDistribution(Real u_over_r) : _u_over_r( u_over_r ) -{ -} - -void RadialExpansionMomentumDistribution::getMomentum(vec3& u, Real x, Real y, Real z) { - u[0] = _u_over_r * x; - u[1] = _u_over_r * y; - u[2] = _u_over_r * z; -} - -ParseMomentumFunction::ParseMomentumFunction(std::string parse_momentum_function_ux, - std::string parse_momentum_function_uy, - std::string parse_momentum_function_uz) - : _parse_momentum_function_ux(parse_momentum_function_ux), - _parse_momentum_function_uy(parse_momentum_function_uy), - _parse_momentum_function_uz(parse_momentum_function_uz) -{ - parser_ux.define(parse_momentum_function_ux); - parser_uy.define(parse_momentum_function_uy); - parser_uz.define(parse_momentum_function_uz); - - amrex::Array<std::reference_wrapper<WarpXParser>,3> parsers{parser_ux, parser_uy, parser_uz}; - ParmParse pp("my_constants"); - for (auto& p : parsers) { - auto& parser = p.get(); - parser.registerVariables({"x","y","z"}); - std::set<std::string> symbols = parser.symbols(); - symbols.erase("x"); - symbols.erase("y"); - symbols.erase("z"); // after removing variables, we are left with constants - for (auto it = symbols.begin(); it != symbols.end(); ) { - Real v; - if (pp.query(it->c_str(), v)) { - parser.setConstant(*it, v); - it = symbols.erase(it); - } else { - ++it; - } - } - for (auto const& s : symbols) { // make sure there no unknown symbols - amrex::Abort("ParseMomentumFunction: Unknown symbol "+s); - } - } -} - -void ParseMomentumFunction::getMomentum(vec3& u, Real x, Real y, Real z) -{ - u[0] = parser_ux.eval(x,y,z); - u[1] = parser_uy.eval(x,y,z); - u[2] = parser_uz.eval(x,y,z); -} - -RandomPosition::RandomPosition(int num_particles_per_cell): - _num_particles_per_cell(num_particles_per_cell) -{} - -void RandomPosition::getPositionUnitBox(vec3& r, int i_part, int ref_fac){ - r[0] = amrex::Random(); - r[1] = amrex::Random(); - r[2] = amrex::Random(); -} - -RegularPosition::RegularPosition(const amrex::Vector<int>& num_particles_per_cell_each_dim) - : _num_particles_per_cell_each_dim(num_particles_per_cell_each_dim) -{} + pp.query("radially_weighted", radially_weighted); + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(radially_weighted, "ERROR: Only radially_weighted=true is supported"); -void RegularPosition::getPositionUnitBox(vec3& r, int i_part, int ref_fac) -{ - int nx = ref_fac*_num_particles_per_cell_each_dim[0]; - int ny = ref_fac*_num_particles_per_cell_each_dim[1]; -#if AMREX_SPACEDIM == 3 - int nz = ref_fac*_num_particles_per_cell_each_dim[2]; -#else - int nz = 1; -#endif - - int ix_part = i_part/(ny * nz); - int iy_part = (i_part % (ny * nz)) % ny; - int iz_part = (i_part % (ny * nz)) / ny; + // parse plasma boundaries + xmin = std::numeric_limits<amrex::Real>::lowest(); + ymin = std::numeric_limits<amrex::Real>::lowest(); + zmin = std::numeric_limits<amrex::Real>::lowest(); - r[0] = (0.5+ix_part)/nx; - r[1] = (0.5+iy_part)/ny; - r[2] = (0.5+iz_part)/nz; -} + xmax = std::numeric_limits<amrex::Real>::max(); + ymax = std::numeric_limits<amrex::Real>::max(); + zmax = std::numeric_limits<amrex::Real>::max(); -PlasmaInjector::PlasmaInjector(){ - part_pos = NULL; -} + pp.query("xmin", xmin); + pp.query("ymin", ymin); + pp.query("zmin", zmin); + pp.query("xmax", xmax); + pp.query("ymax", ymax); + pp.query("zmax", zmax); -PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name) - : species_id(ispecies), species_name(name) -{ - ParmParse pp(species_name); + pp.query("density_min", density_min); + pp.query("density_max", density_max); // parse charge and mass std::string charge_s; @@ -290,9 +132,14 @@ PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name) gaussian_beam = true; parseMomentum(pp); } + // Depending on injection type at runtime, initialize inj_pos + // so that inj_pos->getPositionUnitBox calls + // InjectorPosition[Random or Regular].getPositionUnitBox. else if (part_pos_s == "nrandompercell") { pp.query("num_particles_per_cell", num_particles_per_cell); - part_pos.reset(new RandomPosition(num_particles_per_cell)); + // Construct InjectorPosition with InjectorPositionRandom. + inj_pos.reset(new InjectorPosition((InjectorPositionRandom*)nullptr, + xmin, xmax, ymin, ymax, zmin, zmax)); parseDensity(pp); parseMomentum(pp); } else if (part_pos_s == "nuniformpercell") { @@ -301,7 +148,12 @@ PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name) #if ( AMREX_SPACEDIM == 2 ) num_particles_per_cell_each_dim[2] = 1; #endif - part_pos.reset(new RegularPosition(num_particles_per_cell_each_dim)); + // Construct InjectorPosition from InjectorPositionRegular. + inj_pos.reset(new InjectorPosition((InjectorPositionRegular*)nullptr, + xmin, xmax, ymin, ymax, zmin, zmax, + Dim3{num_particles_per_cell_each_dim[0], + num_particles_per_cell_each_dim[1], + num_particles_per_cell_each_dim[2]})); num_particles_per_cell = num_particles_per_cell_each_dim[0] * num_particles_per_cell_each_dim[1] * num_particles_per_cell_each_dim[2]; @@ -310,52 +162,71 @@ PlasmaInjector::PlasmaInjector(int ispecies, const std::string& name) } else { StringParseAbortMessage("Injection style", part_pos_s); } +} - pp.query("radially_weighted", radially_weighted); - AMREX_ALWAYS_ASSERT_WITH_MESSAGE(radially_weighted, "ERROR: Only radially_weighted=true is supported"); - - // parse plasma boundaries - xmin = std::numeric_limits<amrex::Real>::lowest(); - ymin = std::numeric_limits<amrex::Real>::lowest(); - zmin = std::numeric_limits<amrex::Real>::lowest(); - - xmax = std::numeric_limits<amrex::Real>::max(); - ymax = std::numeric_limits<amrex::Real>::max(); - zmax = std::numeric_limits<amrex::Real>::max(); +namespace { +WarpXParser makeParser (std::string const& parse_function) +{ + WarpXParser parser(parse_function); + parser.registerVariables({"x","y","z"}); - pp.query("xmin", xmin); - pp.query("ymin", ymin); - pp.query("zmin", zmin); - pp.query("xmax", xmax); - pp.query("ymax", ymax); - pp.query("zmax", zmax); + ParmParse pp("my_constants"); + std::set<std::string> symbols = parser.symbols(); + symbols.erase("x"); + symbols.erase("y"); + symbols.erase("z"); // after removing variables, we are left with constants + for (auto it = symbols.begin(); it != symbols.end(); ) { + Real v; + if (pp.query(it->c_str(), v)) { + parser.setConstant(*it, v); + it = symbols.erase(it); + } else { + ++it; + } + } + for (auto const& s : symbols) { // make sure there no unknown symbols + amrex::Abort("PlasmaInjector::makeParser: Unknown symbol "+s); + } + return parser; +} } -void PlasmaInjector::parseDensity(ParmParse pp){ +// Depending on injection type at runtime, initialize inj_rho +// so that inj_rho->getDensity calls +// InjectorPosition[Constant or Custom or etc.].getDensity. +void PlasmaInjector::parseDensity (ParmParse& pp) +{ // parse density information std::string rho_prof_s; pp.get("profile", rho_prof_s); - std::transform(rho_prof_s.begin(), - rho_prof_s.end(), - rho_prof_s.begin(), - ::tolower); + std::transform(rho_prof_s.begin(), rho_prof_s.end(), + rho_prof_s.begin(), ::tolower); if (rho_prof_s == "constant") { pp.get("density", density); - rho_prof.reset(new ConstantDensityProfile(density)); + // Construct InjectorDensity with InjectorDensityConstant. + inj_rho.reset(new InjectorDensity((InjectorDensityConstant*)nullptr, density)); } else if (rho_prof_s == "custom") { - rho_prof.reset(new CustomDensityProfile(species_name)); + // Construct InjectorDensity with InjectorDensityCustom. + inj_rho.reset(new InjectorDensity((InjectorDensityCustom*)nullptr, species_name)); } else if (rho_prof_s == "predefined") { - rho_prof.reset(new PredefinedDensityProfile(species_name)); + // Construct InjectorDensity with InjectorDensityPredefined. + inj_rho.reset(new InjectorDensity((InjectorDensityPredefined*)nullptr,species_name)); } else if (rho_prof_s == "parse_density_function") { pp.get("density_function(x,y,z)", str_density_function); - rho_prof.reset(new ParseDensityProfile(str_density_function)); + // Construct InjectorDensity with InjectorDensityParser. + inj_rho.reset(new InjectorDensity((InjectorDensityParser*)nullptr, + makeParser(str_density_function))); } else { StringParseAbortMessage("Density profile type", rho_prof_s); } } -void PlasmaInjector::parseMomentum(ParmParse pp){ +// Depending on injection type at runtime, initialize inj_mom +// so that inj_mom->getMomentum calls +// InjectorMomentum[Constant or Custom or etc.].getMomentum. +void PlasmaInjector::parseMomentum (ParmParse& pp) +{ // parse momentum information std::string mom_dist_s; pp.get("momentum_distribution_type", mom_dist_s); @@ -370,9 +241,11 @@ void PlasmaInjector::parseMomentum(ParmParse pp){ pp.query("ux", ux); pp.query("uy", uy); pp.query("uz", uz); - mom_dist.reset(new ConstantMomentumDistribution(ux, uy, uz)); + // Construct InjectorMomentum with InjectorMomentumConstant. + inj_mom.reset(new InjectorMomentum((InjectorMomentumConstant*)nullptr, ux,uy, uz)); } else if (mom_dist_s == "custom") { - mom_dist.reset(new CustomMomentumDistribution(species_name)); + // Construct InjectorMomentum with InjectorMomentumCustom. + inj_mom.reset(new InjectorMomentum((InjectorMomentumCustom*)nullptr, species_name)); } else if (mom_dist_s == "gaussian") { Real ux_m = 0.; Real uy_m = 0.; @@ -386,42 +259,56 @@ void PlasmaInjector::parseMomentum(ParmParse pp){ pp.query("ux_th", ux_th); pp.query("uy_th", uy_th); pp.query("uz_th", uz_th); - mom_dist.reset(new GaussianRandomMomentumDistribution(ux_m, uy_m, uz_m, - ux_th, uy_th, uz_th)); + // Construct InjectorMomentum with InjectorMomentumGaussian. + inj_mom.reset(new InjectorMomentum((InjectorMomentumGaussian*)nullptr, + ux_m, uy_m, uz_m, ux_th, uy_th, uz_th)); } else if (mom_dist_s == "radial_expansion") { Real u_over_r = 0.; pp.query("u_over_r", u_over_r); - mom_dist.reset(new RadialExpansionMomentumDistribution(u_over_r)); + // Construct InjectorMomentum with InjectorMomentumRadialExpansion. + inj_mom.reset(new InjectorMomentum + ((InjectorMomentumRadialExpansion*)nullptr, u_over_r)); } else if (mom_dist_s == "parse_momentum_function") { pp.get("momentum_function_ux(x,y,z)", str_momentum_function_ux); pp.get("momentum_function_uy(x,y,z)", str_momentum_function_uy); pp.get("momentum_function_uz(x,y,z)", str_momentum_function_uz); - mom_dist.reset(new ParseMomentumFunction(str_momentum_function_ux, - str_momentum_function_uy, - str_momentum_function_uz)); + // Construct InjectorMomentum with InjectorMomentumParser. + inj_mom.reset(new InjectorMomentum((InjectorMomentumParser*)nullptr, + makeParser(str_momentum_function_ux), + makeParser(str_momentum_function_uy), + makeParser(str_momentum_function_uz))); } else { StringParseAbortMessage("Momentum distribution type", mom_dist_s); } } -void PlasmaInjector::getPositionUnitBox(vec3& r, int i_part, int ref_fac) { - return part_pos->getPositionUnitBox(r, i_part, ref_fac); +XDim3 PlasmaInjector::getMomentum (Real x, Real y, Real z) const noexcept +{ + return inj_mom->getMomentum(x, y, z); // gamma*beta +} + +bool PlasmaInjector::insideBounds (Real x, Real y, Real z) const noexcept +{ + return (x < xmax and x >= xmin and + y < ymax and y >= ymin and + z < zmax and z >= zmin); } -void PlasmaInjector::getMomentum(vec3& u, Real x, Real y, Real z) { - mom_dist->getMomentum(u, x, y, z); - u[0] *= PhysConst::c; - u[1] *= PhysConst::c; - u[2] *= PhysConst::c; +InjectorPosition* +PlasmaInjector::getInjectorPosition () +{ + return inj_pos.get(); } -bool PlasmaInjector::insideBounds(Real x, Real y, Real z) { - if (x >= xmax || x < xmin || - y >= ymax || y < ymin || - z >= zmax || z < zmin ) return false; - return true; +InjectorDensity* +PlasmaInjector::getInjectorDensity () +{ + return inj_rho.get(); } -Real PlasmaInjector::getDensity(Real x, Real y, Real z) { - return rho_prof->getDensity(x, y, z); +InjectorMomentum* +PlasmaInjector::getInjectorMomentum () +{ + return inj_mom.get(); } + diff --git a/Source/Initialization/PlasmaProfiles.cpp b/Source/Initialization/PlasmaProfiles.cpp deleted file mode 100644 index d9d207f7e..000000000 --- a/Source/Initialization/PlasmaProfiles.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include <PlasmaInjector.H> -#include <cmath> -#include <iostream> -#include <WarpXConst.H> - -using namespace amrex; - -Real PredefinedDensityProfile::getDensity(Real x, Real y, Real z) const { - Real n; - if ( which_profile == predefined_profile_flag::parabolic_channel ) { - n = ParabolicChannel(x,y,z); - } - return n; -} - -/// -/// plateau between linear upramp and downramp, and parab transverse profile -/// -Real PredefinedDensityProfile::ParabolicChannel(Real x, Real y, Real z) const { - // params = [z_start ramp_up plateau ramp_down rc n0] - Real z_start = params[0]; - Real ramp_up = params[1]; - Real plateau = params[2]; - Real ramp_down = params[3]; - Real rc = params[4]; - Real n0 = params[5]; - Real n; - Real kp = PhysConst::q_e/PhysConst::c*sqrt( n0/(PhysConst::m_e*PhysConst::ep0) ); - - if ((z-z_start)>=0 and (z-z_start)<ramp_up ) { - n = (z-z_start)/ramp_up; - } else if ((z-z_start)>=ramp_up and (z-z_start)<ramp_up+plateau ) { - n = 1; - } else if ((z-z_start)>=ramp_up+plateau and (z-z_start)<ramp_up+plateau+ramp_down) { - n = 1-((z-z_start)-ramp_up-plateau)/ramp_down; - } else { - n = 0; - } - n *= n0*(1+4*(x*x+y*y)/(kp*kp*std::pow(rc,4))); - return n; -} diff --git a/Source/Parser/GpuParser.H b/Source/Parser/GpuParser.H new file mode 100644 index 000000000..1533ee6b9 --- /dev/null +++ b/Source/Parser/GpuParser.H @@ -0,0 +1,72 @@ +#ifndef WARPX_GPU_PARSER_H_ +#define WARPX_GPU_PARSER_H_ + +#include <WarpXParser.H> +#include <AMReX_Gpu.H> + +// When compiled for CPU, wrap WarpXParser and enable threading. +// When compiled for GPU, store one copy of the parser in +// CUDA managed memory for __device__ code, and one copy of the parser +// in CUDA managed memory for __host__ code. This way, the parser can be +// efficiently called from both host and device. +class GpuParser +{ +public: + GpuParser (WarpXParser const& wp); + void clear (); + + AMREX_GPU_HOST_DEVICE + double + operator() (double x, double y, double z) const noexcept + { +#ifdef AMREX_USE_GPU + +#ifdef AMREX_DEVICE_COMPILE +// WarpX compiled for GPU, function compiled for __device__ + // the 3D position of each particle is stored in shared memory. + amrex::Gpu::SharedMemory<double> gsm; + double* p = gsm.dataPtr(); + int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y); + p[tid*3] = x; + p[tid*3+1] = y; + p[tid*3+2] = z; + return wp_ast_eval(m_gpu_parser.ast); +#else +// WarpX compiled for GPU, function compiled for __host__ + m_var.x = x; + m_var.y = y; + m_var.z = z; + return wp_ast_eval(m_cpu_parser.ast); +#endif + +#else +// WarpX compiled for CPU +#ifdef _OPENMP + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + m_var[tid].x = x; + m_var[tid].y = y; + m_var[tid].z = z; + return wp_ast_eval(m_parser[tid]->ast); +#endif + } + +private: + +#ifdef AMREX_USE_GPU + // Copy of the parser running on __device__ + struct wp_parser m_gpu_parser; + // Copy of the parser running on __host__ + struct wp_parser m_cpu_parser; + mutable amrex::XDim3 m_var; +#else + // Only one parser + struct wp_parser** m_parser; + mutable amrex::XDim3* m_var; + int nthreads; +#endif +}; + +#endif diff --git a/Source/Parser/GpuParser.cpp b/Source/Parser/GpuParser.cpp new file mode 100644 index 000000000..97b96d465 --- /dev/null +++ b/Source/Parser/GpuParser.cpp @@ -0,0 +1,73 @@ +#include <GpuParser.H> + +GpuParser::GpuParser (WarpXParser const& wp) +{ +#ifdef AMREX_USE_GPU + + struct wp_parser* a_wp = wp.m_parser; + // Initialize GPU parser: allocate memory in CUDA managed memory, + // copy all data needed on GPU to m_gpu_parser + m_gpu_parser.sz_mempool = wp_ast_size((struct wp_node*)a_wp); + m_gpu_parser.p_root = (struct wp_node*) + amrex::The_Managed_Arena()->alloc(m_gpu_parser.sz_mempool); + m_gpu_parser.p_free = m_gpu_parser.p_root; + // 0: don't free the source + m_gpu_parser.ast = wp_parser_ast_dup(&m_gpu_parser, a_wp->ast, 0); + wp_parser_regvar_gpu(&m_gpu_parser, "x", 0); + wp_parser_regvar_gpu(&m_gpu_parser, "y", 1); + wp_parser_regvar_gpu(&m_gpu_parser, "z", 2); + + // Initialize CPU parser: allocate memory in CUDA managed memory, + // copy all data needed on CPU to m_cpu_parser + m_cpu_parser.sz_mempool = wp_ast_size((struct wp_node*)a_wp); + m_cpu_parser.p_root = (struct wp_node*) + amrex::The_Managed_Arena()->alloc(m_cpu_parser.sz_mempool); + m_cpu_parser.p_free = m_cpu_parser.p_root; + // 0: don't free the source + m_cpu_parser.ast = wp_parser_ast_dup(&m_cpu_parser, a_wp->ast, 0); + wp_parser_regvar(&m_cpu_parser, "x", &(m_var.x)); + wp_parser_regvar(&m_cpu_parser, "y", &(m_var.y)); + wp_parser_regvar(&m_cpu_parser, "z", &(m_var.z)); + +#else // not defined AMREX_USE_GPU + +#ifdef _OPENMP + nthreads = omp_get_max_threads(); +#else // _OPENMP + nthreads = 1; +#endif // _OPENMP + + m_parser = ::new struct wp_parser*[nthreads]; + m_var = ::new amrex::XDim3[nthreads]; + + for (int tid = 0; tid < nthreads; ++tid) + { +#ifdef _OPENMP + m_parser[tid] = wp_parser_dup(wp.m_parser[tid]); +#else // _OPENMP + m_parser[tid] = wp_parser_dup(wp.m_parser); +#endif // _OPENMP + wp_parser_regvar(m_parser[tid], "x", &(m_var[tid].x)); + wp_parser_regvar(m_parser[tid], "y", &(m_var[tid].y)); + wp_parser_regvar(m_parser[tid], "z", &(m_var[tid].z)); + } + +#endif // AMREX_USE_GPU +} + +void +GpuParser::clear () +{ +#ifdef AMREX_USE_GPU + amrex::The_Managed_Arena()->free(m_gpu_parser.ast); + amrex::The_Managed_Arena()->free(m_cpu_parser.ast); +#else + for (int tid = 0; tid < nthreads; ++tid) + { + wp_parser_delete(m_parser[tid]); + } + ::delete[] m_parser; + ::delete[] m_var; +#endif +} + diff --git a/Source/Parser/Make.package b/Source/Parser/Make.package index 26ef4fb43..5ce02cbda 100644 --- a/Source/Parser/Make.package +++ b/Source/Parser/Make.package @@ -3,6 +3,8 @@ cEXE_sources += wp_parser_y.c wp_parser.tab.c wp_parser.lex.c wp_parser_c.c cEXE_headers += wp_parser_y.h wp_parser.tab.h wp_parser.lex.h wp_parser_c.h CEXE_sources += WarpXParser.cpp CEXE_headers += WarpXParser.H +CEXE_headers += GpuParser.H +CEXE_sources += GpuParser.cpp INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Parser VPATH_LOCATIONS += $(WARPX_HOME)/Source/Parser diff --git a/Source/Parser/WarpXParser.H b/Source/Parser/WarpXParser.H index 046491e29..ffa61e457 100644 --- a/Source/Parser/WarpXParser.H +++ b/Source/Parser/WarpXParser.H @@ -13,6 +13,8 @@ #include <omp.h> #endif +class GpuParser; + class WarpXParser { public: @@ -46,6 +48,8 @@ public: std::set<std::string> symbols () const; + friend class GpuParser; + private: void clear (); diff --git a/Source/Parser/wp_parser_c.h b/Source/Parser/wp_parser_c.h index d810bd685..3aafdec65 100644 --- a/Source/Parser/wp_parser_c.h +++ b/Source/Parser/wp_parser_c.h @@ -2,6 +2,8 @@ #define WP_PARSER_C_H_ #include "wp_parser_y.h" +#include <AMReX_GpuQualifiers.H> +#include <AMReX_Extension.H> #ifdef __cplusplus extern "C" { @@ -18,71 +20,167 @@ extern "C" { #include <set> #include <string> -inline -double +AMREX_GPU_HOST_DEVICE +inline double wp_ast_eval (struct wp_node* node) { double result; +#ifdef AMREX_DEVICE_COMPILE + extern __shared__ double extern_xyz[]; + int tid = threadIdx.x + threadIdx.y*blockDim.x + threadIdx.z*(blockDim.x*blockDim.y); + double* x = extern_xyz + tid*3; +#endif + switch (node->type) { case WP_NUMBER: + { result = ((struct wp_number*)node)->value; break; + } case WP_SYMBOL: - result = *(((struct wp_symbol*)node)->pointer); + { +#ifdef AMREX_DEVICE_COMPILE + int i =((struct wp_symbol*)node)->ip.i; + result = x[i]; +#else + result = *(((struct wp_symbol*)node)->ip.p); +#endif break; + } case WP_ADD: + { result = wp_ast_eval(node->l) + wp_ast_eval(node->r); break; + } case WP_SUB: + { result = wp_ast_eval(node->l) - wp_ast_eval(node->r); break; + } case WP_MUL: + { result = wp_ast_eval(node->l) * wp_ast_eval(node->r); break; + } case WP_DIV: + { result = wp_ast_eval(node->l) / wp_ast_eval(node->r); break; + } case WP_NEG: + { result = -wp_ast_eval(node->l); break; + } case WP_F1: + { result = wp_call_f1(((struct wp_f1*)node)->ftype, wp_ast_eval(((struct wp_f1*)node)->l)); break; + } case WP_F2: + { result = wp_call_f2(((struct wp_f2*)node)->ftype, wp_ast_eval(((struct wp_f2*)node)->l), wp_ast_eval(((struct wp_f2*)node)->r)); break; + } case WP_ADD_VP: - result = node->lvp.v + *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->rip.i; + result = node->lvp.v + x[i]; +#else + result = node->lvp.v + *(node->rip.p); +#endif break; + } case WP_ADD_PP: - result = *(node->lvp.p) + *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->lvp.ip.i; + int j = node->rip.i; + result = x[i] + x[j]; +#else + result = *(node->lvp.ip.p) + *(node->rip.p); +#endif break; + } case WP_SUB_VP: - result = node->lvp.v - *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->rip.i; + result = node->lvp.v - x[i]; +#else + result = node->lvp.v - *(node->rip.p); +#endif break; + } case WP_SUB_PP: - result = *(node->lvp.p) - *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->lvp.ip.i; + int j = node->rip.i; + result = x[i] - x[j]; +#else + result = *(node->lvp.ip.p) - *(node->rip.p); +#endif break; + } case WP_MUL_VP: - result = node->lvp.v * *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->rip.i; + result = node->lvp.v * x[i]; +#else + result = node->lvp.v * *(node->rip.p); +#endif break; + } case WP_MUL_PP: - result = *(node->lvp.p) * *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->lvp.ip.i; + int j = node->rip.i; + result = x[i] * x[j]; +#else + result = *(node->lvp.ip.p) * *(node->rip.p); +#endif break; + } case WP_DIV_VP: - result = node->lvp.v / *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->rip.i; + result = node->lvp.v / x[i]; +#else + result = node->lvp.v / *(node->rip.p); +#endif break; + } case WP_DIV_PP: - result = *(node->lvp.p) / *(node->rp); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->lvp.ip.i; + int j = node->rip.i; + result = x[i] / x[j]; +#else + result = *(node->lvp.ip.p) / *(node->rip.p); +#endif break; + } case WP_NEG_P: - result = -*(node->lvp.p); + { +#ifdef AMREX_DEVICE_COMPILE + int i = node->rip.i; + result = -x[i]; +#else + result = -*(node->lvp.ip.p); +#endif break; + } default: yyerror("wp_ast_eval: unknown node type %d\n", node->type); } diff --git a/Source/Parser/wp_parser_y.c b/Source/Parser/wp_parser_y.c index 46cb199db..259f9368b 100644 --- a/Source/Parser/wp_parser_y.c +++ b/Source/Parser/wp_parser_y.c @@ -6,6 +6,8 @@ #include "wp_parser_y.h" #include "wp_parser.tab.h" +#include <AMReX_GpuQualifiers.H> + static struct wp_node* wp_root = NULL; /* This is called by a bison rule to store the original AST in a @@ -33,7 +35,7 @@ wp_makesymbol (char* name) struct wp_symbol* symbol = (struct wp_symbol*) malloc(sizeof(struct wp_symbol)); symbol->type = WP_SYMBOL; symbol->name = strdup(name); - symbol->pointer = NULL; + symbol->ip.p = NULL; return symbol; } @@ -74,13 +76,19 @@ wp_newf2 (enum wp_f2_t ftype, struct wp_node* l, struct wp_node* r) return (struct wp_node*) tmp; } +AMREX_GPU_HOST_DEVICE void yyerror (char const *s, ...) { va_list vl; va_start(vl, s); +#ifdef AMREX_DEVICE_COMPILE + printf(s,"\n"); + assert(0); +#else vfprintf(stderr, s, vl); fprintf(stderr, "\n"); +#endif va_end(vl); } @@ -97,7 +105,7 @@ wp_parser_new (void) my_parser->ast = wp_parser_ast_dup(my_parser, wp_root,1); /* 1: free the source wp_root */ - if (my_parser->p_root + my_parser->sz_mempool != my_parser->p_free) { + if ((char*)my_parser->p_root + my_parser->sz_mempool != (char*)my_parser->p_free) { yyerror("wp_parser_new: error in memory size"); exit(1); } @@ -145,6 +153,7 @@ wp_parser_dup (struct wp_parser* source) return dest; } +AMREX_GPU_HOST_DEVICE double wp_call_f1 (enum wp_f1_t type, double a) { @@ -175,6 +184,7 @@ wp_call_f1 (enum wp_f1_t type, double a) } } +AMREX_GPU_HOST_DEVICE double wp_call_f2 (enum wp_f2_t type, double a, double b) { @@ -346,23 +356,23 @@ wp_parser_ast_dup (struct wp_parser* my_parser, struct wp_node* node, int move) #define WP_MOVEUP_R(node, v) \ struct wp_node* n = node->r->r; \ - double* p = node->r->rp; \ + double* p = node->r->rip.p; \ node->r = n; \ node->lvp.v = v; \ - node->rp = p; + node->rip.p = p; #define WP_MOVEUP_L(node, v) \ struct wp_node* n = node->l->r; \ - double* p = node->l->rp; \ + double* p = node->l->rip.p; \ node->r = n; \ node->lvp.v = v; \ - node->rp = p; + node->rip.p = p; #define WP_EVAL_R(node) node->r->lvp.v #define WP_EVAL_L(node) node->l->lvp.v #define WP_NEG_MOVEUP(node) \ node->r = node->l->r; \ node->lvp.v = -node->l->lvp.v; \ - node->rp = node->l->rp; + node->rip.p = node->l->rip.p; void wp_ast_optimize (struct wp_node* node) @@ -391,22 +401,22 @@ wp_ast_optimize (struct wp_node* node) node->r->type == WP_SYMBOL) { node->lvp.v = ((struct wp_number*)(node->l))->value; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_ADD_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_NUMBER) { node->lvp.v = ((struct wp_number*)(node->r))->value; - node->rp = ((struct wp_symbol*)(node->l))->pointer; + node->rip.p = ((struct wp_symbol*)(node->l))->ip.p; node->r = node->l; node->type = WP_ADD_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_SYMBOL) { - node->lvp.p = ((struct wp_symbol*)(node->l))->pointer; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_ADD_PP; } else if (node->l->type == WP_NUMBER && @@ -454,22 +464,22 @@ wp_ast_optimize (struct wp_node* node) node->r->type == WP_SYMBOL) { node->lvp.v = ((struct wp_number*)(node->l))->value; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_SUB_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_NUMBER) { node->lvp.v = -((struct wp_number*)(node->r))->value; - node->rp = ((struct wp_symbol*)(node->l))->pointer; + node->rip.p = ((struct wp_symbol*)(node->l))->ip.p; node->r = node->l; node->type = WP_ADD_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_SYMBOL) { - node->lvp.p = ((struct wp_symbol*)(node->l))->pointer; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_SUB_PP; } else if (node->l->type == WP_NUMBER && @@ -517,22 +527,22 @@ wp_ast_optimize (struct wp_node* node) node->r->type == WP_SYMBOL) { node->lvp.v = ((struct wp_number*)(node->l))->value; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_MUL_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_NUMBER) { node->lvp.v = ((struct wp_number*)(node->r))->value; - node->rp = ((struct wp_symbol*)(node->l))->pointer; + node->rip.p = ((struct wp_symbol*)(node->l))->ip.p; node->r = node->l; node->type = WP_MUL_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_SYMBOL) { - node->lvp.p = ((struct wp_symbol*)(node->l))->pointer; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_MUL_PP; } else if (node->l->type == WP_NUMBER && @@ -580,22 +590,22 @@ wp_ast_optimize (struct wp_node* node) node->r->type == WP_SYMBOL) { node->lvp.v = ((struct wp_number*)(node->l))->value; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_DIV_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_NUMBER) { node->lvp.v = 1./((struct wp_number*)(node->r))->value; - node->rp = ((struct wp_symbol*)(node->l))->pointer; + node->rip.p = ((struct wp_symbol*)(node->l))->ip.p; node->r = node->l; node->type = WP_MUL_VP; } else if (node->l->type == WP_SYMBOL && node->r->type == WP_SYMBOL) { - node->lvp.p = ((struct wp_symbol*)(node->l))->pointer; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; node->type = WP_DIV_PP; } else if (node->l->type == WP_NUMBER && @@ -637,7 +647,7 @@ wp_ast_optimize (struct wp_node* node) } else if (node->l->type == WP_SYMBOL) { - node->lvp.p = ((struct wp_symbol*)(node->l))->pointer; + node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p; node->type = WP_NEG_P; } else if (node->l->type == WP_ADD_VP) @@ -936,7 +946,7 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p) break; case WP_SYMBOL: if (strcmp(name, ((struct wp_symbol*)node)->name) == 0) { - ((struct wp_symbol*)node)->pointer = p; + ((struct wp_symbol*)node)->ip.p = p; } break; case WP_ADD: @@ -961,11 +971,11 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p) case WP_MUL_VP: case WP_DIV_VP: wp_ast_regvar(node->r, name, p); - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; break; case WP_NEG_P: wp_ast_regvar(node->l, name, p); - node->lvp.p = ((struct wp_symbol*)(node->l))->pointer; + node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p; break; case WP_ADD_PP: case WP_SUB_PP: @@ -973,8 +983,8 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p) case WP_DIV_PP: wp_ast_regvar(node->l, name, p); wp_ast_regvar(node->r, name, p); - node->lvp.p = ((struct wp_symbol*)(node->l))->pointer; - node->rp = ((struct wp_symbol*)(node->r))->pointer; + node->lvp.ip.p = ((struct wp_symbol*)(node->l))->ip.p; + node->rip.p = ((struct wp_symbol*)(node->r))->ip.p; break; default: yyerror("wp_ast_regvar: unknown node type %d\n", node->type); @@ -982,6 +992,61 @@ wp_ast_regvar (struct wp_node* node, char const* name, double* p) } } +void +wp_ast_regvar_gpu (struct wp_node* node, char const* name, int i) +{ + switch (node->type) + { + case WP_NUMBER: + break; + case WP_SYMBOL: + if (strcmp(name, ((struct wp_symbol*)node)->name) == 0) { + ((struct wp_symbol*)node)->ip.i = i; + } + break; + case WP_ADD: + case WP_SUB: + case WP_MUL: + case WP_DIV: + wp_ast_regvar_gpu(node->l, name, i); + wp_ast_regvar_gpu(node->r, name, i); + break; + case WP_NEG: + wp_ast_regvar_gpu(node->l, name, i); + break; + case WP_F1: + wp_ast_regvar_gpu(node->l, name, i); + break; + case WP_F2: + wp_ast_regvar_gpu(node->l, name, i); + wp_ast_regvar_gpu(node->r, name, i); + break; + case WP_ADD_VP: + case WP_SUB_VP: + case WP_MUL_VP: + case WP_DIV_VP: + wp_ast_regvar_gpu(node->r, name, i); + node->rip.i = ((struct wp_symbol*)(node->r))->ip.i; + break; + case WP_NEG_P: + wp_ast_regvar_gpu(node->l, name, i); + node->lvp.ip.i = ((struct wp_symbol*)(node->l))->ip.i; + break; + case WP_ADD_PP: + case WP_SUB_PP: + case WP_MUL_PP: + case WP_DIV_PP: + wp_ast_regvar_gpu(node->l, name, i); + wp_ast_regvar_gpu(node->r, name, i); + node->lvp.ip.i = ((struct wp_symbol*)(node->l))->ip.i; + node->rip.i = ((struct wp_symbol*)(node->r))->ip.i; + break; + default: + yyerror("wp_ast_regvar_gpu: unknown node type %d\n", node->type); + exit(1); + } +} + void wp_ast_setconst (struct wp_node* node, char const* name, double c) { switch (node->type) @@ -1040,6 +1105,12 @@ wp_parser_regvar (struct wp_parser* parser, char const* name, double* p) } void +wp_parser_regvar_gpu (struct wp_parser* parser, char const* name, int i) +{ + wp_ast_regvar_gpu(parser->ast, name, i); +} + +void wp_parser_setconst (struct wp_parser* parser, char const* name, double c) { wp_ast_setconst(parser->ast, name, c); diff --git a/Source/Parser/wp_parser_y.h b/Source/Parser/wp_parser_y.h index 4a3aeda40..8c9f8e4e4 100644 --- a/Source/Parser/wp_parser_y.h +++ b/Source/Parser/wp_parser_y.h @@ -1,6 +1,8 @@ #ifndef WP_PARSER_Y_H_ #define WP_PARSER_Y_H_ +#include <AMReX_GpuQualifiers.H> + #ifdef __cplusplus #include <cstdlib> extern "C" { @@ -73,17 +75,22 @@ enum wp_node_t { * wp_node_t type can be safely checked to determine their real type. */ -union wp_vp { - double v; +union wp_ip { + int i; double* p; }; +union wp_vp { + double v; + union wp_ip ip; +}; + struct wp_node { enum wp_node_t type; struct wp_node* l; struct wp_node* r; union wp_vp lvp; // After optimization, this may store left value/pointer. - double* rp; // this may store right pointer. + union wp_ip rip; // this may store right pointer. }; struct wp_number { @@ -94,7 +101,7 @@ struct wp_number { struct wp_symbol { enum wp_node_t type; char* name; - double* pointer; + union wp_ip ip; }; struct wp_f1 { /* Builtin functions with one argument */ @@ -124,6 +131,7 @@ struct wp_node* wp_newf1 (enum wp_f1_t ftype, struct wp_node* l); struct wp_node* wp_newf2 (enum wp_f2_t ftype, struct wp_node* l, struct wp_node* r); +AMREX_GPU_HOST_DEVICE void yyerror (char const *s, ...); /*******************************************************************/ @@ -146,6 +154,7 @@ struct wp_parser* wp_parser_dup (struct wp_parser* source); struct wp_node* wp_parser_ast_dup (struct wp_parser* parser, struct wp_node* src, int move); void wp_parser_regvar (struct wp_parser* parser, char const* name, double* p); +void wp_parser_regvar_gpu (struct wp_parser* parser, char const* name, int i); void wp_parser_setconst (struct wp_parser* parser, char const* name, double c); /* We need to walk the tree in these functions */ @@ -153,10 +162,11 @@ void wp_ast_optimize (struct wp_node* node); size_t wp_ast_size (struct wp_node* node); void wp_ast_print (struct wp_node* node); void wp_ast_regvar (struct wp_node* node, char const* name, double* p); +void wp_ast_regvar_gpu (struct wp_node* node, char const* name, int i); void wp_ast_setconst (struct wp_node* node, char const* name, double c); -double wp_call_f1 (enum wp_f1_t type, double a); -double wp_call_f2 (enum wp_f2_t type, double a, double b); +AMREX_GPU_HOST_DEVICE double wp_call_f1 (enum wp_f1_t type, double a); +AMREX_GPU_HOST_DEVICE double wp_call_f2 (enum wp_f2_t type, double a, double b); #ifdef __cplusplus } diff --git a/Source/Particles/PhysicalParticleContainer.H b/Source/Particles/PhysicalParticleContainer.H index 33572c87d..b80619733 100644 --- a/Source/Particles/PhysicalParticleContainer.H +++ b/Source/Particles/PhysicalParticleContainer.H @@ -107,11 +107,8 @@ public: // Inject particles in Box 'part_box' virtual void AddParticles (int lev); + void AddPlasma(int lev, amrex::RealBox part_realbox = amrex::RealBox()); - void AddPlasmaCPU (int lev, amrex::RealBox part_realbox); -#ifdef AMREX_USE_GPU - void AddPlasmaGPU (int lev, amrex::RealBox part_realbox); -#endif void MapParticletoBoostedFrame(amrex::Real& x, amrex::Real& y, amrex::Real& z, std::array<amrex::Real, 3>& u); @@ -140,16 +137,8 @@ protected: bool boost_adjust_transverse_positions = false; bool do_backward_propagation = false; - long NumParticlesToAdd (const amrex::Box& overlap_box, - const amrex::RealBox& overlap_realbox, - const amrex::RealBox& tile_real_box, - const amrex::RealBox& particle_real_box); - - int GetRefineFac(const amrex::Real x, const amrex::Real y, const amrex::Real z); - std::unique_ptr<amrex::IArrayBox> m_refined_injection_mask = nullptr; - // Inject particles during the whole simulation - void ContinuousInjection(const amrex::RealBox& injection_box) override; + void ContinuousInjection (const amrex::RealBox& injection_box) override; }; diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index d1f399f9a..7d63bd8e7 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -17,62 +17,6 @@ using namespace amrex; -long PhysicalParticleContainer:: -NumParticlesToAdd(const Box& overlap_box, const RealBox& overlap_realbox, - const RealBox& tile_realbox, const RealBox& particle_real_box) -{ - const int lev = 0; - const Geometry& geom = Geom(lev); - int num_ppc = plasma_injector->num_particles_per_cell; - const Real* dx = geom.CellSize(); - - long np = 0; - const auto& overlap_corner = overlap_realbox.lo(); - for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv)) - { - int fac; - if (do_continuous_injection) { -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; - Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; -#endif - fac = GetRefineFac(x, y, z); - } else { - fac = 1.0; - } - - int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac); - for (int i_part=0; i_part<ref_num_ppc;i_part++) { - std::array<Real, 3> r; - plasma_injector->getPositionUnitBox(r, i_part, fac); -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1]; - Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1]; -#endif - // If the new particle is not inside the tile box, - // go to the next generated particle. -#if ( AMREX_SPACEDIM == 3 ) - if(!tile_realbox.contains( RealVect{x, y, z} )) continue; -#elif ( AMREX_SPACEDIM == 2 ) - if(!tile_realbox.contains( RealVect{x, z} )) continue; -#endif - ++np; - } - } - - return np; -} - PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core, int ispecies, const std::string& name) : WarpXParticleContainer(amr_core, ispecies), @@ -134,9 +78,7 @@ PhysicalParticleContainer::PhysicalParticleContainer (AmrCore* amr_core) void PhysicalParticleContainer::InitData() { AddParticles(0); // Note - add on level 0 - if (maxLevel() > 0) { - Redistribute(); // We then redistribute - } + Redistribute(); // We then redistribute } void PhysicalParticleContainer::MapParticletoBoostedFrame(Real& x, Real& y, Real& z, std::array<Real, 3>& u) @@ -200,8 +142,6 @@ PhysicalParticleContainer::AddGaussianBeam(Real x_m, Real y_m, Real z_m, std::normal_distribution<double> distz(z_m, z_rms); if (ParallelDescriptor::IOProcessor()) { - std::array<Real, 3> u; - Real weight; // If do_symmetrize, create 4x fewer particles, and // Replicate each particle 4 times (x,y) (-x,y) (x,-y) (-x,-y) if (do_symmetrize){ @@ -209,36 +149,29 @@ PhysicalParticleContainer::AddGaussianBeam(Real x_m, Real y_m, Real z_m, } for (long i = 0; i < npart; ++i) { #if ( AMREX_SPACEDIM == 3 | WARPX_RZ) - weight = q_tot/npart/charge; + Real weight = q_tot/npart/charge; Real x = distx(mt); Real y = disty(mt); Real z = distz(mt); #elif ( AMREX_SPACEDIM == 2 ) - weight = q_tot/npart/charge/y_rms; + Real weight = q_tot/npart/charge/y_rms; Real x = distx(mt); Real y = 0.; Real z = distz(mt); #endif if (plasma_injector->insideBounds(x, y, z)) { - plasma_injector->getMomentum(u, x, y, z); + XDim3 u = plasma_injector->getMomentum(x, y, z); + u.x *= PhysConst::c; + u.y *= PhysConst::c; + u.z *= PhysConst::c; if (do_symmetrize){ - std::array<Real, 3> u_tmp; - Real x_tmp, y_tmp; // Add four particles to the beam: - // (x,ux,y,uy) (-x,-ux,y,uy) (x,ux,-y,-uy) (-x,-ux,-y,-uy) - for (int ix=0; ix<2; ix++){ - for (int iy=0; iy<2; iy++){ - u_tmp = u; - x_tmp = x*std::pow(-1,ix); - u_tmp[0] *= std::pow(-1,ix); - y_tmp = y*std::pow(-1,iy); - u_tmp[1] *= std::pow(-1,iy); - CheckAndAddParticle(x_tmp, y_tmp, z, - u_tmp, weight/4); - } - } + CheckAndAddParticle( x, y, z, { u.x, u.y, u.z}, weight/4. ); + CheckAndAddParticle( x,-y, z, { u.x,-u.y, u.z}, weight/4. ); + CheckAndAddParticle(-x, y, z, {-u.x, u.y, u.z}, weight/4. ); + CheckAndAddParticle(-x,-y, z, {-u.x,-u.y, u.z}, weight/4. ); } else { - CheckAndAddParticle(x, y, z, u, weight); + CheckAndAddParticle(x, y, z, {u.x,u.y,u.z}, weight); } } } @@ -329,17 +262,7 @@ PhysicalParticleContainer::AddParticles (int lev) void PhysicalParticleContainer::AddPlasma (int lev, RealBox part_realbox) { -#ifdef AMREX_USE_GPU - AddPlasmaGPU(lev, part_realbox); -#else - AddPlasmaCPU(lev, part_realbox); -#endif -} - -void -PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox) -{ - BL_PROFILE("PhysicalParticleContainer::AddPlasmaCPU"); + BL_PROFILE("PhysicalParticleContainer::AddPlasma"); // If no part_realbox is provided, initialize particles in the whole domain const Geometry& geom = Geom(lev); @@ -350,7 +273,8 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox) Real rmax = std::min(plasma_injector->xmax, part_realbox.hi(0)); #endif - const Real* dx = geom.CellSize(); + const auto dx = geom.CellSizeArray(); + const auto problo = geom.ProbLoArray(); Real scale_fac; #if AMREX_SPACEDIM==3 @@ -365,490 +289,341 @@ PhysicalParticleContainer::AddPlasmaCPU (int lev, RealBox part_realbox) const int grid_id = mfi.index(); const int tile_id = mfi.LocalTileIndex(); GetParticles(lev)[std::make_pair(grid_id, tile_id)]; + if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) { + DefineAndReturnParticleTile(lev, grid_id, tile_id); + } } #endif MultiFab* cost = WarpX::getCosts(lev); - if ( (not m_refined_injection_mask) and WarpX::do_moving_window) + const int nlevs = numLevels(); + static bool refine_injection = false; + static Box fine_injection_box; + static int rrfac = 1; + // This does not work if the mesh is dynamic. But in that case, we should + // not use refined injected either. We also assume there is only one fine level. + if (WarpX::do_moving_window and WarpX::refine_plasma + and do_continuous_injection and nlevs == 2) { - Box mask_box = geom.Domain(); - mask_box.setSmall(WarpX::moving_window_dir, 0); - mask_box.setBig(WarpX::moving_window_dir, 0); - m_refined_injection_mask.reset( new IArrayBox(mask_box)); - m_refined_injection_mask->setVal(-1); + refine_injection = true; + fine_injection_box = ParticleBoxArray(1).minimalBox(); + fine_injection_box.setSmall(WarpX::moving_window_dir, std::numeric_limits<int>::lowest()); + fine_injection_box.setBig(WarpX::moving_window_dir, std::numeric_limits<int>::max()); + rrfac = m_gdb->refRatio(0)[0]; + fine_injection_box.coarsen(rrfac); } + InjectorPosition* inj_pos = plasma_injector->getInjectorPosition(); + InjectorDensity* inj_rho = plasma_injector->getInjectorDensity(); + InjectorMomentum* inj_mom = plasma_injector->getInjectorMomentum(); + Real gamma_boost = WarpX::gamma_boost; + Real beta_boost = WarpX::beta_boost; + Real t = WarpX::GetInstance().gett_new(lev); + Real density_min = plasma_injector->density_min; + Real density_max = plasma_injector->density_max; + +#ifdef WARPX_RZ + bool radially_weighted = plasma_injector->radially_weighted; +#endif + MFItInfo info; - if (do_tiling) { + if (do_tiling && Gpu::notInLaunchRegion()) { info.EnableTiling(tile_size); } - info.SetDynamic(true); - #ifdef _OPENMP + info.SetDynamic(true); #pragma omp parallel if (not WarpX::serialize_ics) #endif + for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) { - std::array<Real,PIdx::nattribs> attribs; - attribs.fill(0.0); - - // Loop through the tiles - for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) { - - Real wt = amrex::second(); - - const Box& tile_box = mfi.tilebox(); - const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev); - - // Find the cells of part_box that overlap with tile_realbox - // If there is no overlap, just go to the next tile in the loop - RealBox overlap_realbox; - Box overlap_box; - Real ncells_adjust; - bool no_overlap = 0; - - for (int dir=0; dir<AMREX_SPACEDIM; dir++) { - if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) { - ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] ); - overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]); - } else { - no_overlap = 1; break; - } - if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) { - ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] ); - overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]); - } else { - no_overlap = 1; break; - } - // Count the number of cells in this direction in overlap_realbox - overlap_box.setSmall( dir, 0 ); - overlap_box.setBig( dir, - int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1); + Real wt = amrex::second(); + + const Box& tile_box = mfi.tilebox(); + const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev); + + // Find the cells of part_box that overlap with tile_realbox + // If there is no overlap, just go to the next tile in the loop + RealBox overlap_realbox; + Box overlap_box; + IntVect shifted; + bool no_overlap = false; + + for (int dir=0; dir<AMREX_SPACEDIM; dir++) { + if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) { + Real ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] ); + overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]); + } else { + no_overlap = true; break; } - if (no_overlap == 1) { - continue; // Go to the next tile + if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) { + Real ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] ); + overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]); + } else { + no_overlap = true; break; } + // Count the number of cells in this direction in overlap_realbox + overlap_box.setSmall( dir, 0 ); + overlap_box.setBig( dir, + int( std::round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir)) + /dx[dir] )) - 1); + shifted[dir] = std::round((overlap_realbox.lo(dir)-problo[dir])/dx[dir]); + // shifted is exact in non-moving-window direction. That's all we care. + } + if (no_overlap == 1) { + continue; // Go to the next tile + } - const int grid_id = mfi.index(); - const int tile_id = mfi.LocalTileIndex(); - - // Loop through the cells of overlap_box and inject - // the corresponding particles - const auto& overlap_corner = overlap_realbox.lo(); - for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv)) - { - int fac; - if (do_continuous_injection) { -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; - Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; -#endif - fac = GetRefineFac(x, y, z); - } else { - fac = 1.0; - } - - int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac); - for (int i_part=0; i_part<ref_num_ppc;i_part++) { - std::array<Real, 3> r; - plasma_injector->getPositionUnitBox(r, i_part, fac); -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1]; - Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1]; -#endif - // If the new particle is not inside the tile box, - // go to the next generated particle. -#if ( AMREX_SPACEDIM == 3 ) - if(!tile_realbox.contains( RealVect{x, y, z} )) continue; -#elif ( AMREX_SPACEDIM == 2 ) - if(!tile_realbox.contains( RealVect{x, z} )) continue; -#endif - - // Save the x and y values to use in the insideBounds checks. - // This is needed with WARPX_RZ since x and y are modified. - Real xb = x; - Real yb = y; + const int grid_id = mfi.index(); + const int tile_id = mfi.LocalTileIndex(); -#ifdef WARPX_RZ - // Replace the x and y, choosing the angle randomly. - // These x and y are used to get the momentum and density - Real theta = 2.*MathConst::pi*amrex::Random(); - y = x*std::sin(theta); - x = x*std::cos(theta); -#endif + // Max number of new particles, if particles are created in the whole + // overlap_box. All of them are created, and invalid ones are then + // discaded + int max_new_particles = overlap_box.numPts() * num_ppc; - Real dens; - std::array<Real, 3> u; - if (WarpX::gamma_boost == 1.){ - // Lab-frame simulation - // If the particle is not within the species's - // xmin, xmax, ymin, ymax, zmin, zmax, go to - // the next generated particle. - if (!plasma_injector->insideBounds(xb, yb, z)) continue; - plasma_injector->getMomentum(u, x, y, z); - dens = plasma_injector->getDensity(x, y, z); - } else { - // Boosted-frame simulation - Real c = PhysConst::c; - Real gamma_boost = WarpX::gamma_boost; - Real beta_boost = WarpX::beta_boost; - // Since the user provides the density distribution - // at t_lab=0 and in the lab-frame coordinates, - // we need to find the lab-frame position of this - // particle at t_lab=0, from its boosted-frame coordinates - // Assuming ballistic motion, this is given by: - // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) ) - // where betaz_lab is the speed of the particle in the lab frame - // - // In order for this equation to be solvable, betaz_lab - // is explicitly assumed to have no dependency on z0_lab - plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency - // At this point u is the lab-frame momentum - // => Apply the above formula for z0_lab - Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) ); - Real betaz_lab = u[2]/gamma_lab/c; - Real t = WarpX::GetInstance().gett_new(lev); - Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) ); - // If the particle is not within the lab-frame zmin, zmax, etc. - // go to the next generated particle. - if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue; - // call `getDensity` with lab-frame parameters - dens = plasma_injector->getDensity(x, y, z0_lab); - // At this point u and dens are the lab-frame quantities - // => Perform Lorentz transform - dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab ); - u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab ); - } - Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac)); -#ifdef WARPX_RZ - if (plasma_injector->radially_weighted) { - weight *= 2*MathConst::pi*xb; - } else { - // This is not correct since it might shift the particle - // out of the local grid - x = std::sqrt(xb*rmax); - weight *= dx[0]; - } -#endif - attribs[PIdx::w ] = weight; - attribs[PIdx::ux] = u[0]; - attribs[PIdx::uy] = u[1]; - attribs[PIdx::uz] = u[2]; - - if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) - { - auto& particle_tile = DefineAndReturnParticleTile(lev, grid_id, tile_id); - particle_tile.push_back_real(particle_comps["xold"], x); - particle_tile.push_back_real(particle_comps["yold"], y); - particle_tile.push_back_real(particle_comps["zold"], z); - - particle_tile.push_back_real(particle_comps["uxold"], u[0]); - particle_tile.push_back_real(particle_comps["uyold"], u[1]); - particle_tile.push_back_real(particle_comps["uzold"], u[2]); - } - - AddOneParticle(lev, grid_id, tile_id, x, y, z, attribs); + // If refine injection, build pointer dp_cellid that holds pointer to + // array of refined cell IDs. + Vector<int> cellid_v; + if (refine_injection and lev == 0) + { + // then how many new particles will be injected is not that simple + // We have to shift fine_injection_box because overlap_box has been shifted. + Box fine_overlap_box = overlap_box & amrex::shift(fine_injection_box,shifted); + max_new_particles += fine_overlap_box.numPts() * num_ppc + * (AMREX_D_TERM(rrfac,*rrfac,*rrfac)-1); + for (int icell = 0, ncells = overlap_box.numPts(); icell < ncells; ++icell) { + IntVect iv = overlap_box.atOffset(icell); + int r = (fine_overlap_box.contains(iv)) ? AMREX_D_TERM(rrfac,*rrfac,*rrfac) : 1; + for (int ipart = 0; ipart < r; ++ipart) { + cellid_v.push_back(icell); + cellid_v.push_back(ipart); } } + } + int const* hp_cellid = (cellid_v.empty()) ? nullptr : cellid_v.data(); + amrex::AsyncArray<int> cellid_aa(hp_cellid, cellid_v.size()); + int const* dp_cellid = cellid_aa.data(); - if (cost) { - wt = (amrex::second() - wt) / tile_box.d_numPts(); - Array4<Real> const& costarr = cost->array(mfi); - amrex::ParallelFor(tile_box, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - costarr(i,j,k) += wt; - }); - } + // Update NextID to include particles created in this function + int pid; +#pragma omp critical (add_plasma_nextid) + { + pid = ParticleType::NextID(); + ParticleType::NextID(pid+max_new_particles); } - } -} + const int cpuid = ParallelDescriptor::MyProc(); -#ifdef AMREX_USE_GPU -void -PhysicalParticleContainer::AddPlasmaGPU (int lev, RealBox part_realbox) -{ - BL_PROFILE("PhysicalParticleContainer::AddPlasmaGPU"); + auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)]; + bool do_boosted = false; + if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) { + do_boosted = true; + DefineAndReturnParticleTile(lev, grid_id, tile_id); + } + auto old_size = particle_tile.GetArrayOfStructs().size(); + auto new_size = old_size + max_new_particles; + particle_tile.resize(new_size); + + ParticleType* pp = particle_tile.GetArrayOfStructs()().data() + old_size; + auto& soa = particle_tile.GetStructOfArrays(); + GpuArray<Real*,PIdx::nattribs> pa; + for (int ia = 0; ia < PIdx::nattribs; ++ia) { + pa[ia] = soa.GetRealData(ia).data() + old_size; + } + GpuArray<Real*,6> pb; + if (do_boosted) { + pb[0] = soa.GetRealData(particle_comps[ "xold"]).data() + old_size; + pb[1] = soa.GetRealData(particle_comps[ "yold"]).data() + old_size; + pb[2] = soa.GetRealData(particle_comps[ "zold"]).data() + old_size; + pb[3] = soa.GetRealData(particle_comps["uxold"]).data() + old_size; + pb[4] = soa.GetRealData(particle_comps["uyold"]).data() + old_size; + pb[5] = soa.GetRealData(particle_comps["uzold"]).data() + old_size; + } - // If no part_realbox is provided, initialize particles in the whole domain - const Geometry& geom = Geom(lev); - if (!part_realbox.ok()) part_realbox = geom.ProbDomain(); + const GpuArray<Real,AMREX_SPACEDIM> overlap_corner + {AMREX_D_DECL(overlap_realbox.lo(0), + overlap_realbox.lo(1), + overlap_realbox.lo(2))}; - int num_ppc = plasma_injector->num_particles_per_cell; -#ifdef WARPX_RZ - Real rmax = std::min(plasma_injector->xmax, part_realbox.hi(0)); -#endif + std::size_t shared_mem_bytes = plasma_injector->sharedMemoryNeeded(); + int lrrfac = rrfac; - const Real* dx = geom.CellSize(); + // Loop over all new particles and inject them (creates too many + // particles, in particular does not consider xmin, xmax etc.). + // The invalid ones are given negative ID and are deleted during the + // next redistribute. + amrex::For(max_new_particles, [=] AMREX_GPU_DEVICE (int ip) noexcept + { + ParticleType& p = pp[ip]; + p.id() = pid+ip; + p.cpu() = cpuid; + + int cellid, i_part; + Real fac; + if (dp_cellid == nullptr) { + cellid = ip/num_ppc; + i_part = ip - cellid*num_ppc; + fac = 1.0; + } else { + cellid = dp_cellid[2*ip]; + i_part = dp_cellid[2*ip+1]; + fac = lrrfac; + } - Real scale_fac; -#if AMREX_SPACEDIM==3 - scale_fac = dx[0]*dx[1]*dx[2]/num_ppc; -#elif AMREX_SPACEDIM==2 - scale_fac = dx[0]*dx[1]/num_ppc; -#endif + IntVect iv = overlap_box.atOffset(cellid); -#ifdef _OPENMP - // First touch all tiles in the map in serial - for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) { - const int grid_id = mfi.index(); - const int tile_id = mfi.LocalTileIndex(); - GetParticles(lev)[std::make_pair(grid_id, tile_id)]; - } + const XDim3 r = inj_pos->getPositionUnitBox(i_part, fac); +#if (AMREX_SPACEDIM == 3) + Real x = overlap_corner[0] + (iv[0]+r.x)*dx[0]; + Real y = overlap_corner[1] + (iv[1]+r.y)*dx[1]; + Real z = overlap_corner[2] + (iv[2]+r.z)*dx[2]; +#else + Real x = overlap_corner[0] + (iv[0]+r.x)*dx[0]; + Real y = 0.0; + Real z = overlap_corner[1] + (iv[1]+r.y)*dx[1]; #endif - MultiFab* cost = WarpX::getCosts(lev); - - if ( (not m_refined_injection_mask) and WarpX::do_moving_window) - { - Box mask_box = geom.Domain(); - mask_box.setSmall(WarpX::moving_window_dir, 0); - mask_box.setBig(WarpX::moving_window_dir, 0); - m_refined_injection_mask.reset( new IArrayBox(mask_box)); - m_refined_injection_mask->setVal(-1); - } - - MFItInfo info; - if (do_tiling) { - info.EnableTiling(tile_size); - } - info.SetDynamic(true); - -#ifdef _OPENMP -#pragma omp parallel if (not WarpX::serialize_ics) +#if (AMREX_SPACEDIM == 3) + if (!tile_realbox.contains(XDim3{x,y,z})) { + p.id() = -1; + return; + } +#else + if (!tile_realbox.contains(XDim3{x,z,0.0})) { + p.id() = -1; + return; + } #endif - { - std::array<Real,PIdx::nattribs> attribs; - attribs.fill(0.0); - // Loop through the tiles - for (MFIter mfi = MakeMFIter(lev, info); mfi.isValid(); ++mfi) { + // Save the x and y values to use in the insideBounds checks. + // This is needed with WARPX_RZ since x and y are modified. + Real xb = x; + Real yb = y; - Real wt = amrex::second(); - - const Box& tile_box = mfi.tilebox(); - const RealBox tile_realbox = WarpX::getRealBox(tile_box, lev); - - // Find the cells of part_box that overlap with tile_realbox - // If there is no overlap, just go to the next tile in the loop - RealBox overlap_realbox; - Box overlap_box; - Real ncells_adjust; - bool no_overlap = 0; +#ifdef WARPX_RZ + // Replace the x and y, choosing the angle randomly. + // These x and y are used to get the momentum and density + Real theta = 2.*MathConst::pi*amrex::Random(); + x = xb*std::cos(theta); + y = xb*std::sin(theta); +#endif - for (int dir=0; dir<AMREX_SPACEDIM; dir++) { - if ( tile_realbox.lo(dir) <= part_realbox.hi(dir) ) { - ncells_adjust = std::floor( (tile_realbox.lo(dir) - part_realbox.lo(dir))/dx[dir] ); - overlap_realbox.setLo( dir, part_realbox.lo(dir) + std::max(ncells_adjust, 0.) * dx[dir]); - } else { - no_overlap = 1; break; + Real dens; + XDim3 u; + if (gamma_boost == 1.) { + // Lab-frame simulation + // If the particle is not within the species's + // xmin, xmax, ymin, ymax, zmin, zmax, go to + // the next generated particle. + if (!inj_pos->insideBounds(xb, yb, z)) { + p.id() = -1; + return; } - if ( tile_realbox.hi(dir) >= part_realbox.lo(dir) ) { - ncells_adjust = std::floor( (part_realbox.hi(dir) - tile_realbox.hi(dir))/dx[dir] ); - overlap_realbox.setHi( dir, part_realbox.hi(dir) - std::max(ncells_adjust, 0.) * dx[dir]); - } else { - no_overlap = 1; break; + u = inj_mom->getMomentum(x, y, z); + dens = inj_rho->getDensity(x, y, z); + // Remove particle if density below threshold + if ( dens < density_min ){ + p.id() = -1; + return; } - // Count the number of cells in this direction in overlap_realbox - overlap_box.setSmall( dir, 0 ); - overlap_box.setBig( dir, - int( round((overlap_realbox.hi(dir)-overlap_realbox.lo(dir))/dx[dir] )) - 1); - } - if (no_overlap == 1) { - continue; // Go to the next tile - } - - const int grid_id = mfi.index(); - const int tile_id = mfi.LocalTileIndex(); - - Cuda::HostVector<ParticleType> host_particles; - std::array<Cuda::HostVector<Real>, PIdx::nattribs> host_attribs; - - // Loop through the cells of overlap_box and inject - // the corresponding particles - const auto& overlap_corner = overlap_realbox.lo(); - for (IntVect iv = overlap_box.smallEnd(); iv <= overlap_box.bigEnd(); overlap_box.next(iv)) - { - int fac; - if (do_continuous_injection) { -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; - Real z = overlap_corner[2] + (iv[2] + 0.5)*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + 0.5)*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + 0.5)*dx[1]; -#endif - fac = GetRefineFac(x, y, z); - } else { - fac = 1.0; + // Cut density if above threshold + dens = amrex::min(dens, density_max); + } else { + // Boosted-frame simulation + // Since the user provides the density distribution + // at t_lab=0 and in the lab-frame coordinates, + // we need to find the lab-frame position of this + // particle at t_lab=0, from its boosted-frame coordinates + // Assuming ballistic motion, this is given by: + // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) ) + // where betaz_lab is the speed of the particle in the lab frame + // + // In order for this equation to be solvable, betaz_lab + // is explicitly assumed to have no dependency on z0_lab + u = inj_mom->getMomentum(x, y, 0.); // No z0_lab dependency + // At this point u is the lab-frame momentum + // => Apply the above formula for z0_lab + Real gamma_lab = std::sqrt( 1.+(u.x*u.x+u.y*u.y+u.z*u.z) ); + Real betaz_lab = u.z/(gamma_lab); + Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) + - PhysConst::c*t*(betaz_lab-beta_boost) ); + // If the particle is not within the lab-frame zmin, zmax, etc. + // go to the next generated particle. + if (!inj_pos->insideBounds(xb, yb, z0_lab)) { + p.id() = -1; + return; } + // call `getDensity` with lab-frame parameters + dens = inj_rho->getDensity(x, y, z0_lab); + // Remove particle if density below threshold + if ( dens < density_min ){ + p.id() = -1; + return; + } + // Cut density if above threshold + dens = amrex::min(dens, density_max); + // At this point u and dens are the lab-frame quantities + // => Perform Lorentz transform + dens = gamma_boost * dens * ( 1.0 - beta_boost*betaz_lab ); + u.z = gamma_boost * ( u.z -beta_boost*gamma_lab ); + } - int ref_num_ppc = num_ppc * AMREX_D_TERM(fac, *fac, *fac); - for (int i_part=0; i_part<ref_num_ppc;i_part++) { - std::array<Real, 3> r; - plasma_injector->getPositionUnitBox(r, i_part, fac); -#if ( AMREX_SPACEDIM == 3 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = overlap_corner[1] + (iv[1] + r[1])*dx[1]; - Real z = overlap_corner[2] + (iv[2] + r[2])*dx[2]; -#elif ( AMREX_SPACEDIM == 2 ) - Real x = overlap_corner[0] + (iv[0] + r[0])*dx[0]; - Real y = 0; - Real z = overlap_corner[1] + (iv[1] + r[1])*dx[1]; -#endif - // If the new particle is not inside the tile box, - // go to the next generated particle. -#if ( AMREX_SPACEDIM == 3 ) - if(!tile_realbox.contains( RealVect{x, y, z} )) continue; -#elif ( AMREX_SPACEDIM == 2 ) - if(!tile_realbox.contains( RealVect{x, z} )) continue; -#endif - - // Save the x and y values to use in the insideBounds checks. - // This is needed with WARPX_RZ since x and y are modified. - Real xb = x; - Real yb = y; + u.x *= PhysConst::c; + u.y *= PhysConst::c; + u.z *= PhysConst::c; + // Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac)); + Real weight = dens * scale_fac; #ifdef WARPX_RZ - // Replace the x and y, choosing the angle randomly. - // These x and y are used to get the momentum and density - Real theta = 2.*MathConst::pi*amrex::Random(); - x = xb*std::cos(theta); - y = xb*std::sin(theta); -#endif - - Real dens; - std::array<Real, 3> u; - if (WarpX::gamma_boost == 1.){ - // Lab-frame simulation - // If the particle is not within the species's - // xmin, xmax, ymin, ymax, zmin, zmax, go to - // the next generated particle. - if (!plasma_injector->insideBounds(xb, yb, z)) continue; - plasma_injector->getMomentum(u, x, y, z); - dens = plasma_injector->getDensity(x, y, z); - } else { - // Boosted-frame simulation - Real c = PhysConst::c; - Real gamma_boost = WarpX::gamma_boost; - Real beta_boost = WarpX::beta_boost; - // Since the user provides the density distribution - // at t_lab=0 and in the lab-frame coordinates, - // we need to find the lab-frame position of this - // particle at t_lab=0, from its boosted-frame coordinates - // Assuming ballistic motion, this is given by: - // z0_lab = gamma*( z_boost*(1-beta*betaz_lab) - ct_boost*(betaz_lab-beta) ) - // where betaz_lab is the speed of the particle in the lab frame - // - // In order for this equation to be solvable, betaz_lab - // is explicitly assumed to have no dependency on z0_lab - plasma_injector->getMomentum(u, x, y, 0.); // No z0_lab dependency - // At this point u is the lab-frame momentum - // => Apply the above formula for z0_lab - Real gamma_lab = std::sqrt( 1 + (u[0]*u[0] + u[1]*u[1] + u[2]*u[2])/(c*c) ); - Real betaz_lab = u[2]/gamma_lab/c; - Real t = WarpX::GetInstance().gett_new(lev); - Real z0_lab = gamma_boost * ( z*(1-beta_boost*betaz_lab) - c*t*(betaz_lab-beta_boost) ); - // If the particle is not within the lab-frame zmin, zmax, etc. - // go to the next generated particle. - if (!plasma_injector->insideBounds(xb, yb, z0_lab)) continue; - // call `getDensity` with lab-frame parameters - dens = plasma_injector->getDensity(x, y, z0_lab); - // At this point u and dens are the lab-frame quantities - // => Perform Lorentz transform - dens = gamma_boost * dens * ( 1 - beta_boost*betaz_lab ); - u[2] = gamma_boost * ( u[2] -beta_boost*c*gamma_lab ); - } - Real weight = dens * scale_fac / (AMREX_D_TERM(fac, *fac, *fac)); -#ifdef WARPX_RZ - if (plasma_injector->radially_weighted) { - weight *= 2*MathConst::pi*xb; - } else { - // This is not correct since it might shift the particle - // out of the local grid - x = std::sqrt(xb*rmax); - weight *= dx[0]; - } + if (radially_weighted) { + weight *= 2.*MathConst::pi*xb; + } else { + // This is not correct since it might shift the particle + // out of the local grid + x = std::sqrt(xb*rmax); + weight *= dx[0]; + } #endif - attribs[PIdx::w ] = weight; - attribs[PIdx::ux] = u[0]; - attribs[PIdx::uy] = u[1]; - attribs[PIdx::uz] = u[2]; - - // note - this will be slow on the GPU, need to revisit - if (WarpX::do_boosted_frame_diagnostic && do_boosted_frame_diags) - { - auto& particle_tile = DefineAndReturnParticleTile(lev, grid_id, tile_id); - particle_tile.push_back_real(particle_comps["xold"], x); - particle_tile.push_back_real(particle_comps["yold"], y); - particle_tile.push_back_real(particle_comps["zold"], z); - - particle_tile.push_back_real(particle_comps["uxold"], u[0]); - particle_tile.push_back_real(particle_comps["uyold"], u[1]); - particle_tile.push_back_real(particle_comps["uzold"], u[2]); - } + pa[PIdx::w ][ip] = weight; + pa[PIdx::ux][ip] = u.x; + pa[PIdx::uy][ip] = u.y; + pa[PIdx::uz][ip] = u.z; + + if (do_boosted) { + pb[0][ip] = x; + pb[1][ip] = y; + pb[2][ip] = z; + pb[3][ip] = u.x; + pb[4][ip] = u.y; + pb[5][ip] = u.z; + } - ParticleType p; - p.id() = ParticleType::NextID(); - p.cpu() = ParallelDescriptor::MyProc(); #if (AMREX_SPACEDIM == 3) - p.pos(0) = x; - p.pos(1) = y; - p.pos(2) = z; + p.pos(0) = x; + p.pos(1) = y; + p.pos(2) = z; #elif (AMREX_SPACEDIM == 2) #ifdef WARPX_RZ - attribs[PIdx::theta] = theta; + pa[PIdx::theta][ip] = theta; #endif - p.pos(0) = xb; - p.pos(1) = z; + p.pos(0) = xb; + p.pos(1) = z; #endif - - host_particles.push_back(p); - for (int kk = 0; kk < PIdx::nattribs; ++kk) - host_attribs[kk].push_back(attribs[kk]); - } - } - - auto& particle_tile = GetParticles(lev)[std::make_pair(grid_id,tile_id)]; - auto old_size = particle_tile.GetArrayOfStructs().size(); - auto new_size = old_size + host_particles.size(); - particle_tile.resize(new_size); - - Cuda::thrust_copy(host_particles.begin(), - host_particles.end(), - particle_tile.GetArrayOfStructs().begin() + old_size); - - for (int kk = 0; kk < PIdx::nattribs; ++kk) { - Cuda::thrust_copy(host_attribs[kk].begin(), - host_attribs[kk].end(), - particle_tile.GetStructOfArrays().GetRealData(kk).begin() + old_size); - } - - if (cost) { - wt = (amrex::second() - wt) / tile_box.d_numPts(); - Array4<Real> const& costarr = cost->array(mfi); - amrex::ParallelFor(tile_box, - [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - costarr(i,j,k) += wt; - }); - } - } + }, shared_mem_bytes); + + if (cost) { + wt = (amrex::second() - wt) / tile_box.d_numPts(); + Array4<Real> const& costarr = cost->array(mfi); + amrex::ParallelFor(tile_box, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + costarr(i,j,k) += wt; + }); + } } + + // The function that calls this is responsible for redistributing particles. } -#endif #ifdef WARPX_DO_ELECTROSTATIC void @@ -1882,6 +1657,7 @@ PhysicalParticleContainer::PushP (int lev, Real dt, // pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); +#ifdef WARPX_RZ const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); const int* ixyzmin_grid = box.loVect(); @@ -1907,6 +1683,12 @@ PhysicalParticleContainer::PushP (int lev, Real dt, BL_TO_FORTRAN_ANYD(bzfab), &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal, &lvect_fieldgathe, &WarpX::field_gathering_algo); +#else + int e_is_nodal = Ex.is_nodal() and Ey.is_nodal() and Ez.is_nodal(); + FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp, + &exfab, &eyfab, &ezfab, &bxfab, &byfab, &bzfab, + Ex.nGrow(), e_is_nodal, 0, np, thread_num, lev, lev); +#endif // This wraps the momentum advance so that inheritors can modify the call. // Extract pointers to the different particle quantities @@ -2105,74 +1887,6 @@ void PhysicalParticleContainer::GetParticleSlice(const int direction, const Real } } -int PhysicalParticleContainer::GetRefineFac(const Real x, const Real y, const Real z) -{ - if (finestLevel() == 0) return 1; - if (not WarpX::refine_plasma) return 1; - - IntVect iv; - const Geometry& geom = Geom(0); - - std::array<Real, 3> offset; - -#if ( AMREX_SPACEDIM == 3) - offset[0] = geom.ProbLo(0); - offset[1] = geom.ProbLo(1); - offset[2] = geom.ProbLo(2); -#elif ( AMREX_SPACEDIM == 2 ) - offset[0] = geom.ProbLo(0); - offset[1] = 0.0; - offset[2] = geom.ProbLo(1); -#endif - - AMREX_D_TERM(iv[0]=static_cast<int>(floor((x-offset[0])*geom.InvCellSize(0)));, - iv[1]=static_cast<int>(floor((y-offset[1])*geom.InvCellSize(1)));, - iv[2]=static_cast<int>(floor((z-offset[2])*geom.InvCellSize(2)));); - - iv += geom.Domain().smallEnd(); - - const int dir = WarpX::moving_window_dir; - - IntVect iv2 = iv; - iv2[dir] = 0; - - if ( (*m_refined_injection_mask)(iv2) != -1) return (*m_refined_injection_mask)(iv2); - - int ref_fac = 1; - for (int lev = 0; lev < finestLevel(); ++lev) - { - const IntVect rr = m_gdb->refRatio(lev); - const BoxArray& fine_ba = this->ParticleBoxArray(lev+1); - const int num_boxes = fine_ba.size(); - Vector<Box> stretched_boxes; - const int safety_factor = 4; - for (int i = 0; i < num_boxes; ++i) - { - Box bx = fine_ba[i]; - bx.coarsen(ref_fac*rr[dir]); - bx.setSmall(dir, std::numeric_limits<int>::min()/safety_factor); - bx.setBig(dir, std::numeric_limits<int>::max()/safety_factor); - stretched_boxes.push_back(bx); - } - - BoxArray stretched_ba(stretched_boxes.dataPtr(), stretched_boxes.size()); - - const int num_ghost = 0; - if ( stretched_ba.intersects(Box(iv, iv), num_ghost) ) - { - ref_fac *= rr[dir]; - } - else - { - break; - } - } - - (*m_refined_injection_mask)(iv2) = ref_fac; - - return ref_fac; -} - /* \brief Inject particles during the simulation * \param injection_box: domain where particles should be injected. */ diff --git a/Source/Particles/RigidInjectedParticleContainer.cpp b/Source/Particles/RigidInjectedParticleContainer.cpp index 79396e6af..83556e69f 100644 --- a/Source/Particles/RigidInjectedParticleContainer.cpp +++ b/Source/Particles/RigidInjectedParticleContainer.cpp @@ -421,6 +421,7 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt, // pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); +#ifdef WARPX_RZ const std::array<Real,3>& xyzmin_grid = WarpX::LowerCorner(box, lev); const int* ixyzmin_grid = box.loVect(); @@ -446,6 +447,12 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt, BL_TO_FORTRAN_ANYD(bzfab), &ll4symtry, &WarpX::l_lower_order_in_v, &WarpX::do_nodal, &lvect_fieldgathe, &WarpX::field_gathering_algo); +#else + int e_is_nodal = Ex.is_nodal() and Ey.is_nodal() and Ez.is_nodal(); + FieldGather(pti, Exp, Eyp, Ezp, Bxp, Byp, Bzp, + &exfab, &eyfab, &ezfab, &bxfab, &byfab, &bzfab, + Ex.nGrow(), e_is_nodal, 0, np, thread_num, lev, lev); +#endif // Save the position and momenta, making copies auto uxp_save = uxp; @@ -459,9 +466,6 @@ RigidInjectedParticleContainer::PushP (int lev, Real dt, Real* const AMREX_RESTRICT uxpp = uxp.dataPtr(); Real* const AMREX_RESTRICT uypp = uyp.dataPtr(); Real* const AMREX_RESTRICT uzpp = uzp.dataPtr(); - const Real* const AMREX_RESTRICT uxp_savep = uxp_save.dataPtr(); - const Real* const AMREX_RESTRICT uyp_savep = uyp_save.dataPtr(); - const Real* const AMREX_RESTRICT uzp_savep = uzp_save.dataPtr(); const Real* const AMREX_RESTRICT Expp = Exp.dataPtr(); const Real* const AMREX_RESTRICT Eypp = Eyp.dataPtr(); const Real* const AMREX_RESTRICT Ezpp = Ezp.dataPtr(); |