aboutsummaryrefslogtreecommitdiff
path: root/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
diff options
context:
space:
mode:
authorGravatar WeiqunZhang <WeiqunZhang@lbl.gov> 2020-09-01 14:11:35 -0700
committerGravatar GitHub <noreply@github.com> 2020-09-01 14:11:35 -0700
commit3c2eadfde21745eae87c7d13ebaacb1732a8619d (patch)
tree730bc2878800058a66fa308a67e4ca53c0715e71 /Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
parent1652cfa796c9f258832254906ef4148a32768c2b (diff)
downloadWarpX-3c2eadfde21745eae87c7d13ebaacb1732a8619d.tar.gz
WarpX-3c2eadfde21745eae87c7d13ebaacb1732a8619d.tar.zst
WarpX-3c2eadfde21745eae87c7d13ebaacb1732a8619d.zip
Remove ManagedVector from spectral solver (#1270)
Diffstat (limited to 'Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp')
-rw-r--r--Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp147
1 files changed, 81 insertions, 66 deletions
diff --git a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
index 2c60cdc38..dd12fb2b8 100644
--- a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
+++ b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp
@@ -75,17 +75,18 @@ SpectralKSpace::getKComponent( const DistributionMapping& dm,
const int i_dim,
const bool only_positive_k ) const
{
- // Initialize an empty ManagedVector in each box
+ // Initialize an empty DeviceVector in each box
KVectorComponent k_comp(spectralspace_ba, dm);
- // Loop over boxes and allocate the corresponding ManagedVector
+ // Loop over boxes and allocate the corresponding DeviceVector
// for each box owned by the local MPI proc
for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
Box bx = spectralspace_ba[mfi];
- ManagedVector<Real>& k = k_comp[mfi];
+ DeviceVector<Real>& k = k_comp[mfi];
// Allocate k to the right size
int N = bx.length( i_dim );
k.resize( N );
+ Real* pk = k.data();
// Fill the k vector
IntVect fft_size = realspace_ba[mfi].length();
@@ -97,21 +98,24 @@ SpectralKSpace::getKComponent( const DistributionMapping& dm,
if (only_positive_k){
// Fill the full axis with positive k values
// (typically: first axis, in a real-to-complex FFT)
- for (int i=0; i<N; i++ ){
- k[i] = i*dk;
- }
+ amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
+ {
+ pk[i] = i*dk;
+ });
} else {
const int mid_point = (N+1)/2;
- // Fill positive values of k
- // (FFT conventions: first half is positive)
- for (int i=0; i<mid_point; i++ ){
- k[i] = i*dk;
- }
- // Fill negative values of k
- // (FFT conventions: second half is negative)
- for (int i=mid_point; i<N; i++){
- k[i] = (i-N)*dk;
- }
+ amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
+ {
+ if (i < mid_point) {
+ // Fill positive values of k
+ // (FFT conventions: first half is positive)
+ pk[i] = i*dk;
+ } else {
+ // Fill negative values of k
+ // (FFT conventions: second half is negative)
+ pk[i] = (i-N)*dk;
+ }
+ });
}
}
return k_comp;
@@ -131,16 +135,19 @@ SpectralKSpace::getSpectralShiftFactor( const DistributionMapping& dm,
const int i_dim,
const int shift_type ) const
{
- // Initialize an empty ManagedVector in each box
+ // Initialize an empty DeviceVector in each box
SpectralShiftFactor shift_factor( spectralspace_ba, dm );
- // Loop over boxes and allocate the corresponding ManagedVector
+ // Loop over boxes and allocate the corresponding DeviceVector
// for each box owned by the local MPI proc
for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
- const ManagedVector<Real>& k = k_vec[i_dim][mfi];
- ManagedVector<Complex>& shift = shift_factor[mfi];
+ const DeviceVector<Real>& k = k_vec[i_dim][mfi];
+ DeviceVector<Complex>& shift = shift_factor[mfi];
// Allocate shift coefficients
- shift.resize( k.size() );
+ const int N = k.size();
+ shift.resize(N);
+ Real const* pk = k.data();
+ Complex* pshift = shift.data();
// Fill the shift coefficients
Real sign = 0;
@@ -149,11 +156,10 @@ SpectralKSpace::getSpectralShiftFactor( const DistributionMapping& dm,
case ShiftType::TransformToCellCentered: sign = 1.;
}
const Complex I{0,1};
- int i = 0;
- for (auto const& kv : k){
- shift[i] = exp( I*sign*kv*0.5_rt*dx[i_dim]);
- i++;
- }
+ amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
+ {
+ pshift[i] = amrex::exp( I*sign*pk[i]*0.5_rt*dx[i_dim]);
+ });
}
return shift_factor;
}
@@ -177,72 +183,81 @@ SpectralKSpace::getModifiedKComponent( const DistributionMapping& dm,
const int n_order,
const bool nodal ) const
{
- // Initialize an empty ManagedVector in each box
+ // Initialize an empty DeviceVector in each box
KVectorComponent modified_k_comp(spectralspace_ba, dm);
if (n_order == -1) { // Infinite-order case
for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
- const ManagedVector<Real>& k = k_vec[i_dim][mfi];
- ManagedVector<Real>& modified_k = modified_k_comp[mfi];
+ const DeviceVector<Real>& k = k_vec[i_dim][mfi];
+ DeviceVector<Real>& modified_k = modified_k_comp[mfi];
// Allocate modified_k to the same size as k
+ const int N = k.size();
modified_k.resize( k.size() );
// Fill the modified k vector
- int i = 0;
- for (auto const& kv : k){
- modified_k[i] = k[i]; // infinite-order case.
- i++;
- }
+ Gpu::copyAsync(Gpu::deviceToDevice, k.begin(), k.end(), modified_k.begin());
}
} else {
// Compute real-space stencil coefficients
- Vector<Real> stencil_coef = getFonbergStencilCoefficients(n_order, nodal);
+ Vector<Real> h_stencil_coef = getFonbergStencilCoefficients(n_order, nodal);
+ DeviceVector<Real> d_stencil_coef(h_stencil_coef.size());
+ Gpu::copyAsync(Gpu::hostToDevice, h_stencil_coef.begin(), h_stencil_coef.end(),
+ d_stencil_coef.begin());
+ Gpu::synchronize();
+ const int nstencil = d_stencil_coef.size();
+ Real const* p_stencil_coef = d_stencil_coef.data();
- // Loop over boxes and allocate the corresponding ManagedVector
+ // Loop over boxes and allocate the corresponding DeviceVector
// for each box owned by the local MPI proc
for ( MFIter mfi(spectralspace_ba, dm); mfi.isValid(); ++mfi ){
Real delta_x = dx[i_dim];
- const ManagedVector<Real>& k = k_vec[i_dim][mfi];
- ManagedVector<Real>& modified_k = modified_k_comp[mfi];
+ const DeviceVector<Real>& k = k_vec[i_dim][mfi];
+ DeviceVector<Real>& modified_k = modified_k_comp[mfi];
// Allocate modified_k to the same size as k
- modified_k.resize( k.size() );
+ const int N = k.size();
+ modified_k.resize(N);
+ Real const* p_k = k.data();
+ Real * p_modified_k = modified_k.data();
// Fill the modified k vector
- int i = 0;
- for (auto const& kv : k){
- modified_k[i] = 0;
- for (int n=1; n<stencil_coef.size(); n++){
+ amrex::ParallelFor(N, [=] AMREX_GPU_DEVICE (int i) noexcept
+ {
+ p_modified_k[i] = 0;
+ for (int n=1; n<nstencil; n++){
if (nodal){
- modified_k[i] += stencil_coef[n]* \
- std::sin( k[i]*n*delta_x )/( n*delta_x );
+ p_modified_k[i] += p_stencil_coef[n]*
+ std::sin( p_k[i]*n*delta_x )/( n*delta_x );
} else {
- modified_k[i] += stencil_coef[n]* \
- std::sin( k[i]*(n-0.5)*delta_x )/( (n-0.5)*delta_x );
+ p_modified_k[i] += p_stencil_coef[n]* \
+ std::sin( p_k[i]*(n-0.5)*delta_x )/( (n-0.5)*delta_x );
}
}
- i++;
- }
- // By construction, at finite order and for a nodal grid,
- // the *modified* k corresponding to the Nyquist frequency
- // (i.e. highest *real* k) is 0. However, the above calculation
- // based on stencil coefficients does not give 0 to machine precision.
- // Therefore, we need to enforce the fact that the modified k be 0 here.
- if (nodal){
- if (i_dim == 0){
- // Because of the real-to-complex FFTs, the first axis (idim=0)
- // contains only the positive k, and the Nyquist frequency is
- // the last element of the array.
- modified_k[k.size()-1] = 0.0_rt;
- } else {
- // The other axes contains both positive and negative k ;
- // the Nyquist frequency is in the middle of the array.
- modified_k[k.size()/2] = 0.0_rt;
- }
- }
+ // By construction, at finite order and for a nodal grid,
+ // the *modified* k corresponding to the Nyquist frequency
+ // (i.e. highest *real* k) is 0. However, the above calculation
+ // based on stencil coefficients does not give 0 to machine precision.
+ // Therefore, we need to enforce the fact that the modified k be 0 here.
+ if (nodal){
+ if (i_dim == 0){
+ // Because of the real-to-complex FFTs, the first axis (idim=0)
+ // contains only the positive k, and the Nyquist frequency is
+ // the last element of the array.
+ if (i == N-1) {
+ p_modified_k[i] = 0.0_rt;
+ }
+ } else {
+ // The other axes contains both positive and negative k ;
+ // the Nyquist frequency is in the middle of the array.
+ if (i == N/2) {
+ p_modified_k[i] = 0.0_rt;
+ }
+ }
+ }
+ });
}
}
return modified_k_comp;