diff options
Diffstat (limited to 'Source/FieldSolver/SpectralSolver/SpectralHankelTransform')
4 files changed, 35 insertions, 25 deletions
diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H index c7a816c61..8f569ec4e 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H +++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/BesselRoots.H @@ -51,7 +51,7 @@ void SecantRootFinder(int n, int nitmx, amrex::Real tol, amrex::Real *zeroj, int * abramowitz m. & stegun irene a. * handbook of mathematical functions */ -void GetBesselRoots(int n, int nk, HankelTransform::RealVector& roots, amrex::Vector<int> &ier) { +void GetBesselRoots(int n, int nk, amrex::Vector<amrex::Real>& roots, amrex::Vector<int> &ier) { amrex::Real zeroj; int ierror, ik, k; diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.H b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.H index 46fb28447..5a87031c4 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.H +++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.H @@ -21,7 +21,7 @@ class HankelTransform { public: - using RealVector = amrex::Gpu::ManagedVector<amrex::Real>; + using RealVector = amrex::Gpu::DeviceVector<amrex::Real>; // Constructor HankelTransform(const int hankel_order, @@ -43,8 +43,8 @@ class HankelTransform RealVector m_kr; - RealVector invM; - RealVector M; + RealVector m_invM; + RealVector m_M; }; #endif diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp index c5249d54f..24e9d7f81 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/HankelTransform.cpp @@ -25,7 +25,7 @@ HankelTransform::HankelTransform (int const hankel_order, AMREX_ALWAYS_ASSERT_WITH_MESSAGE(hankel_order-1 <= azimuthal_mode && azimuthal_mode <= hankel_order+1, "azimuthal_mode must be either hankel_order-1, hankel_order or hankel_order+1"); - RealVector alphas; + amrex::Vector<amrex::Real> alphas; amrex::Vector<int> alpha_errors; GetBesselRoots(azimuthal_mode, m_nk, alphas, alpha_errors); @@ -33,13 +33,13 @@ HankelTransform::HankelTransform (int const hankel_order, AMREX_ALWAYS_ASSERT(std::all_of(alpha_errors.begin(), alpha_errors.end(), [](int i) { return i == 0; })); // Calculate the spectral grid - m_kr.resize(m_nk); + amrex::Vector<amrex::Real> kr(m_nk); for (int ik=0 ; ik < m_nk ; ik++) { - m_kr[ik] = alphas[ik]/rmax; + kr[ik] = alphas[ik]/rmax; } // Calculate the spatial grid (Uniform grid with a half-cell offset) - RealVector rmesh(m_nr); + amrex::Vector<amrex::Real> rmesh(m_nr); amrex::Real dr = rmax/m_nr; for (int ir=0 ; ir < m_nr ; ir++) { rmesh[ir] = dr*(ir + 0.5_rt); @@ -57,22 +57,22 @@ HankelTransform::HankelTransform (int const hankel_order, p_denom = hankel_order; } - RealVector denom(m_nk); + amrex::Vector<amrex::Real> denom(m_nk); for (int ik=0 ; ik < m_nk ; ik++) { const amrex::Real jna = jn(p_denom, alphas[ik]); denom[ik] = MathConst::pi*rmax*rmax*jna*jna; } - RealVector num(m_nk*m_nr); + amrex::Vector<amrex::Real> num(m_nk*m_nr); for (int ir=0 ; ir < m_nr ; ir++) { for (int ik=0 ; ik < m_nk ; ik++) { int const ii = ik + ir*m_nk; - num[ii] = jn(hankel_order, rmesh[ir]*m_kr[ik]); + num[ii] = jn(hankel_order, rmesh[ir]*kr[ik]); } } // Get the inverse matrix - invM.resize(m_nk*m_nr); + amrex::Vector<amrex::Real> invM(m_nk*m_nr); if (azimuthal_mode > 0) { for (int ir=0 ; ir < m_nr ; ir++) { for (int ik=1 ; ik < m_nk ; ik++) { @@ -107,18 +107,20 @@ HankelTransform::HankelTransform (int const hankel_order, } } + amrex::Vector<amrex::Real> M; + // Calculate the matrix M by inverting invM if (azimuthal_mode !=0 && hankel_order != azimuthal_mode-1) { // In this case, invM is singular, thus we calculate the pseudo-inverse. // The Moore-Penrose psuedo-inverse is calculated using the SVD method. M.resize(m_nk*m_nr, 0.); - RealVector invMcopy(invM); - RealVector sdiag(m_nk-1, 0.); - RealVector u((m_nk-1)*(m_nk-1), 0.); - RealVector vt((m_nr)*(m_nr), 0.); - RealVector sp((m_nr)*(m_nk-1), 0.); - RealVector temp((m_nr)*(m_nk-1), 0.); + amrex::Vector<amrex::Real> invMcopy(invM); + amrex::Vector<amrex::Real> sdiag(m_nk-1, 0.); + amrex::Vector<amrex::Real> u((m_nk-1)*(m_nk-1), 0.); + amrex::Vector<amrex::Real> vt((m_nr)*(m_nr), 0.); + amrex::Vector<amrex::Real> sp((m_nr)*(m_nk-1), 0.); + amrex::Vector<amrex::Real> temp((m_nr)*(m_nk-1), 0.); // Calculate the singlular-value-decomposition of invM (leaving out the first row). // invM = u*sdiag*vt @@ -169,6 +171,13 @@ HankelTransform::HankelTransform (int const hankel_order, } + m_kr.resize(kr.size()); + m_invM.resize(invM.size()); + m_M.resize(M.size()); + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, kr.begin(), kr.end(), m_kr.begin()); + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, invM.begin(), invM.end(), m_invM.begin()); + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, M.begin(), M.end(), m_M.begin()); + amrex::Gpu::synchronize(); } void @@ -193,7 +202,7 @@ HankelTransform::HankelForwardTransform (amrex::FArrayBox const& F, int const F_ // Note that M is flagged to be transposed since it has dimensions (m_nr, m_nk) blas::gemm(blas::Layout::ColMajor, blas::Op::Trans, blas::Op::NoTrans, m_nk, nz, m_nr, 1._rt, - M.dataPtr(), m_nk, + m_M.dataPtr(), m_nk, F.dataPtr(F_icomp)+ngr, nrF, 0._rt, G.dataPtr(G_icomp), m_nk); @@ -202,13 +211,13 @@ HankelTransform::HankelForwardTransform (amrex::FArrayBox const& F, int const F_ // It is not clear if the GPU gemm wasn't build properly, it is cycling data out and back // in to the device, or if it is because gemm is launching its own threads. - amrex::Real const * M_arr = M.dataPtr(); + amrex::Real const * M_arr = m_M.dataPtr(); amrex::Array4<const amrex::Real> const & F_arr = F.array(); amrex::Array4< amrex::Real> const & G_arr = G.array(); int const nr = m_nr; - ParallelFor(G_box, + amrex::ParallelFor(G_box, [=] AMREX_GPU_DEVICE(int ik, int iz, int inotused) noexcept { G_arr(ik,iz,G_icomp) = 0.; for (int ir=0 ; ir < nr ; ir++) { @@ -240,10 +249,10 @@ HankelTransform::HankelInverseTransform (amrex::FArrayBox const& G, int const G_ #ifndef AMREX_USE_GPU // On CPU, the blas::gemm is significantly faster - // Note that invM is flagged to be transposed since it has dimensions (m_nk, m_nr) + // Note that m_invM is flagged to be transposed since it has dimensions (m_nk, m_nr) blas::gemm(blas::Layout::ColMajor, blas::Op::Trans, blas::Op::NoTrans, m_nr, nz, m_nk, 1._rt, - invM.dataPtr(), m_nr, + m_invM.dataPtr(), m_nr, G.dataPtr(G_icomp), m_nk, 0._rt, F.dataPtr(F_icomp)+ngr, nrF); @@ -252,13 +261,13 @@ HankelTransform::HankelInverseTransform (amrex::FArrayBox const& G, int const G_ // It is not clear if the GPU gemm wasn't build properly, it is cycling data out and back // in to the device, or if it is because gemm is launching its own threads. - amrex::Real const * invM_arr = invM.dataPtr(); + amrex::Real const * invM_arr = m_invM.dataPtr(); amrex::Array4<const amrex::Real> const & G_arr = G.array(); amrex::Array4< amrex::Real> const & F_arr = F.array(); int const nk = m_nk; - ParallelFor(G_box, + amrex::ParallelFor(G_box, [=] AMREX_GPU_DEVICE(int ir, int iz, int inotused) noexcept { F_arr(ir,iz,F_icomp) = 0.; for (int ik=0 ; ik < nk ; ik++) { diff --git a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/SpectralHankelTransformer.cpp b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/SpectralHankelTransformer.cpp index 7dd297418..518eea03e 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/SpectralHankelTransformer.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralHankelTransform/SpectralHankelTransformer.cpp @@ -49,6 +49,7 @@ SpectralHankelTransformer::ExtractKrArray () kr_array[ii] = kr_m_array[ir]; }); } + amrex::Gpu::synchronize(); } /* \brief Converts a scalar field from the physical to the spectral space for all modes */ |