From 4a34f2ea9ab825a0af92fc0c03017043951032e7 Mon Sep 17 00:00:00 2001 From: Revathi Jambunathan Date: Mon, 6 May 2019 18:13:13 -0400 Subject: Added cuFFT kernels -- debugging error in rho values before forward transform --- .../SpectralSolver/SpectralFieldData.cpp | 79 +++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) (limited to 'Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp') diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp index 02fa2015f..7c2061f8d 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp @@ -54,6 +54,29 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, IntVect fft_size = realspace_ba[mfi].length(); #ifdef AMREX_USE_GPU // Add cuFFT-specific code + // Creating 3D plan for real to complex -- double precision + cufftResult result; + result = cufftPlan3d( &forward_plan[mfi], fft_size[2], + fft_size[1],fft_size[0], CUFFT_D2Z); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " cufftplan3d forward failed! \n"; + } + if ( result == CUFFT_SUCCESS ) { + amrex::Print() << " created cufft forward plan\n"; + } + // Add 2D cuFFT-spacific code for D2Z + // Note that D2Z is inherently forward plan + + result = cufftPlan3d( &backward_plan[mfi], fft_size[2], + fft_size[1], fft_size[0], CUFFT_Z2D); + // Add 2D cuFFT-specific code for Z2D + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " cufftplan3d backward failed! \n"; + } + if ( result == CUFFT_SUCCESS ) { + amrex::Print() << " created cufft backward plan\n"; + } + #else // Create FFTW plans forward_plan[mfi] = @@ -87,6 +110,8 @@ SpectralFieldData::~SpectralFieldData() for ( MFIter mfi(tmpRealField); mfi.isValid(); ++mfi ){ #ifdef AMREX_USE_GPU // Add cuFFT-specific code + cufftDestroy( forward_plan[mfi] ); + cufftDestroy( backward_plan[mfi] ); #else // Destroy FFTW plans fftw_destroy_plan( forward_plan[mfi] ); @@ -129,16 +154,41 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, Array4 tmp_arr = tmpRealField[mfi].array(); ParallelFor( realspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { - tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp); + tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp); }); +#ifdef AMREX_USE_GPU + cudaDeviceSynchronize(); +#endif + amrex::Print() << " in forward trans icomp " << i_comp << " " << tmp_arr(0,0,0) << " mf arr " ; + amrex::Print() << " " << mf_arr(0,0,0,0); + amrex::Print() << " " << mf_arr(15,15,15,0); + amrex::Print() << " " << mf_arr(0,0,0,1); + amrex::Print() << " " << mf_arr(15,15,15,1); + amrex::Print() << "\n"; } // Perform Fourier transform from `tmpRealField` to `tmpSpectralField` #ifdef AMREX_USE_GPU // Add cuFFT-specific code ; make sure that this is done on the same // GPU stream as the above copy + cudaDeviceSynchronize(); + cufftResult result; + //cudaStream_t stream = amrex::Cuda::Device::cudaStream(); + //cufftSetStream ( forward_plan[mfi], stream); + result = cufftExecD2Z( forward_plan[mfi], + tmpRealField[mfi].dataPtr(), + reinterpret_cast( + tmpSpectralField[mfi].dataPtr()) ); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " cufftplan3d execute failed ! \n"; + } + if ( result == CUFFT_SUCCESS ) { + amrex::Print() << " created cufft forward transform\n"; + } + cudaDeviceSynchronize(); #else fftw_execute( forward_plan[mfi] ); + amrex::Print() << " forward fft on cpu\n"; #endif // Copy the spectral-space field `tmpSpectralField` to the appropriate @@ -169,6 +219,8 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, // Copy field into the right index fields_arr(i,j,k,field_index) = spectral_field_value; }); +// amrex::Print() << " in forward trans after D2Z" << fields_arr(0,0,0,0) ; + amrex::Print() << "\n"; } } } @@ -227,8 +279,24 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, #ifdef AMREX_USE_GPU // Add cuFFT-specific code ; make sure that this is done on the same // GPU stream as the above copy + cudaDeviceSynchronize(); + cufftResult result; + //cudaStream_t stream = amrex::Cuda::Device::cudaStream(); + //cufftSetStream ( backward_plan[mfi], stream); + result = cufftExecZ2D( backward_plan[mfi], + reinterpret_cast( + tmpSpectralField[mfi].dataPtr()), + tmpRealField[mfi].dataPtr() ); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " cufftplan3d execute inverse failed ! \n"; + } + if ( result == CUFFT_SUCCESS ) { + amrex::Print() << " created cufft inverse transform\n"; + } + cudaDeviceSynchronize(); #else fftw_execute( backward_plan[mfi] ); + amrex::Print() << " cpu inverse done\n"; #endif // Copy the temporary field `tmpRealField` to the real-space field `mf` @@ -245,6 +313,15 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, // Copy and normalize field mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k); }); +#ifdef AMREX_USE_GPU + cudaDeviceSynchronize(); +#endif + amrex::Print() << " after backward plan in real space 0,0,0 " << mf_arr(0,0,0,0) << " tmp " << tmp_arr(0,0,0) << "\n"; + amrex::Print() << " after backward plan in real space 15, 15, 15 " << mf_arr(15,15,15,0) << " tmp " << tmp_arr(0,0,0) << "\n"; + amrex::Print() << "\n"; +#ifdef AMREX_USE_GPU + cudaDeviceSynchronize(); +#endif } } } -- cgit v1.2.3 From eb76cadbc5e76111679f6189e6728e462af41f66 Mon Sep 17 00:00:00 2001 From: Revathi Jambunathan Date: Wed, 8 May 2019 16:21:18 -0400 Subject: temporary print files for debugging --- GNUmakefile | 2 +- Source/Evolve/WarpXEvolveEM.cpp | 58 +++--------------- .../SpectralSolver/SpectralFieldData.cpp | 52 ++++++---------- Source/FieldSolver/WarpXFFT.cpp | 70 +++++++--------------- Source/Initialization/WarpXInitData.cpp | 20 ++++--- Source/Parallelization/WarpXComm.cpp | 7 ++- Source/Particles/WarpXParticleContainer.cpp | 2 +- Source/WarpX.cpp | 2 + Source/main.cpp | 3 + 9 files changed, 72 insertions(+), 144 deletions(-) (limited to 'Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp') diff --git a/GNUmakefile b/GNUmakefile index c662281b3..e3ed55d80 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -29,7 +29,7 @@ USE_ASCENT_INSITU = FALSE WarpxBinDir = Bin -USE_PSATD = TRUE +USE_PSATD = FALSE USE_RZ = FALSE DO_ELECTROSTATIC = FALSE diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp index 025ff993a..1c5dc0104 100644 --- a/Source/Evolve/WarpXEvolveEM.cpp +++ b/Source/Evolve/WarpXEvolveEM.cpp @@ -69,27 +69,20 @@ WarpX::EvolveEM (int numsteps) // At the beginning, we have B^{n} and E^{n}. // Particles have p^{n} and x^{n}. // is_synchronized is true. + amrex::Print() << " in evolve before fill boundary \n"; if (is_synchronized) { FillBoundaryE(); FillBoundaryB(); UpdateAuxilaryData(); // on first step, push p by -0.5*dt + amrex::Print() << " in evolve before pushP \n"; for (int lev = 0; lev <= finest_level; ++lev) { mypc->PushP(lev, -0.5*dt[lev], *Efield_aux[lev][0],*Efield_aux[lev][1],*Efield_aux[lev][2], *Bfield_aux[lev][0],*Bfield_aux[lev][1],*Bfield_aux[lev][2]); } is_synchronized = false; - - for (MFIter mfi(*rho_fp[0]); mfi.isValid(); ++mfi) - { - MultiFab &mf = *rho_fp[0]; - Box realspace_bx = mf[mfi].box(); // Copy the box - Array4 mf_arr = mf[mfi].array(); - amrex::Print() << " at if synchronized rho " << mf_arr(0,0,0,0) ; - amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n"; - } - + amrex::Print() << " in evolve after pushP \n"; } else { // Beyond one step, we have E^{n} and B^{n}. @@ -98,15 +91,8 @@ WarpX::EvolveEM (int numsteps) FillBoundaryB(); UpdateAuxilaryData(); - for (MFIter mfi(*rho_fp[0]); mfi.isValid(); ++mfi) - { - MultiFab &mf = *rho_fp[0]; - Box realspace_bx = mf[mfi].box(); // Copy the box - Array4 mf_arr = mf[mfi].array(); - amrex::Print() << " at if synchronized rho " << mf_arr(0,0,0,0) ; - amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n"; - } } + amrex::Print() << " in evolve after fill boundary \n"; if (do_subcycling == 0 || finest_level == 0) { OneStep_nosub(cur_time); @@ -116,6 +102,7 @@ WarpX::EvolveEM (int numsteps) amrex::Print() << "Error: do_subcycling = " << do_subcycling << std::endl; amrex::Abort("Unsupported do_subcycling type"); } + amrex::Print() << " in evolve after onestep no sub \n"; #ifdef WARPX_USE_PY if (warpx_py_beforeEsolve) warpx_py_beforeEsolve(); @@ -279,52 +266,23 @@ WarpX::OneStep_nosub (Real cur_time) if (warpx_py_beforedeposition) warpx_py_beforedeposition(); #endif - for (MFIter mfi(*rho_fp[0]); mfi.isValid(); ++mfi) - { - MultiFab &mf = *rho_fp[0]; - Box realspace_bx = mf[mfi].box(); // Copy the box - Array4 mf_arr = mf[mfi].array(); - amrex::Print() << " at before pushf particles and depose rho " << mf_arr(0,0,0,0) ; - amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n"; - } PushParticlesandDepose(cur_time); - for (MFIter mfi(*rho_fp[0]); mfi.isValid(); ++mfi) - { - MultiFab &mf = *rho_fp[0]; - Box realspace_bx = mf[mfi].box(); // Copy the box - Array4 mf_arr = mf[mfi].array(); - amrex::Print() << " at after pushf particles and depose rho " << mf_arr(0,0,0,0) ; - amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n"; - } + #ifdef WARPX_USE_PY if (warpx_py_afterdeposition) warpx_py_afterdeposition(); #endif SyncCurrent(); - for (MFIter mfi(*rho_fp[0]); mfi.isValid(); ++mfi) - { - MultiFab &mf = *rho_fp[0]; - Box realspace_bx = mf[mfi].box(); // Copy the box - Array4 mf_arr = mf[mfi].array(); - amrex::Print() << " before sync rho " << mf_arr(0,0,0,0) ; - amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n"; - } SyncRho(rho_fp, rho_cp); - for (MFIter mfi(*rho_fp[0]); mfi.isValid(); ++mfi) - { - MultiFab &mf = *rho_fp[0]; - Box realspace_bx = mf[mfi].box(); // Copy the box - Array4 mf_arr = mf[mfi].array(); - amrex::Print() << " after sync rho " << mf_arr(0,0,0,0) ; - amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n"; - } // Push E and B from {n} to {n+1} // (And update guard cells immediately afterwards) #ifdef WARPX_USE_PSATD PushPSATD(dt[0]); + amrex::Print() << " before fill bndry E \n"; FillBoundaryE(); + amrex::Print() << " before fill bndry B \n"; FillBoundaryB(); #else EvolveF(0.5*dt[0], DtType::FirstHalf); diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp index 7c2061f8d..5998bdd2b 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp @@ -55,15 +55,13 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, #ifdef AMREX_USE_GPU // Add cuFFT-specific code // Creating 3D plan for real to complex -- double precision + cudaDeviceSynchronize(); cufftResult result; result = cufftPlan3d( &forward_plan[mfi], fft_size[2], fft_size[1],fft_size[0], CUFFT_D2Z); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d forward failed! \n"; } - if ( result == CUFFT_SUCCESS ) { - amrex::Print() << " created cufft forward plan\n"; - } // Add 2D cuFFT-spacific code for D2Z // Note that D2Z is inherently forward plan @@ -73,9 +71,7 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d backward failed! \n"; } - if ( result == CUFFT_SUCCESS ) { - amrex::Print() << " created cufft backward plan\n"; - } + cudaDeviceSynchronize(); #else // Create FFTW plans @@ -156,15 +152,9 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp); }); -#ifdef AMREX_USE_GPU - cudaDeviceSynchronize(); -#endif - amrex::Print() << " in forward trans icomp " << i_comp << " " << tmp_arr(0,0,0) << " mf arr " ; - amrex::Print() << " " << mf_arr(0,0,0,0); - amrex::Print() << " " << mf_arr(15,15,15,0); - amrex::Print() << " " << mf_arr(0,0,0,1); - amrex::Print() << " " << mf_arr(15,15,15,1); - amrex::Print() << "\n"; +//#ifdef AMREX_USE_GPU +// cudaDeviceSynchronize(); +//#endif } // Perform Fourier transform from `tmpRealField` to `tmpSpectralField` @@ -173,8 +163,9 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, // GPU stream as the above copy cudaDeviceSynchronize(); cufftResult result; - //cudaStream_t stream = amrex::Cuda::Device::cudaStream(); - //cufftSetStream ( forward_plan[mfi], stream); + cudaStream_t stream = amrex::Cuda::Device::cudaStream(); + amrex::Print() << " stream is " << stream << "\n"; + cufftSetStream ( forward_plan[mfi], stream); result = cufftExecD2Z( forward_plan[mfi], tmpRealField[mfi].dataPtr(), reinterpret_cast( @@ -182,13 +173,9 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d execute failed ! \n"; } - if ( result == CUFFT_SUCCESS ) { - amrex::Print() << " created cufft forward transform\n"; - } cudaDeviceSynchronize(); #else fftw_execute( forward_plan[mfi] ); - amrex::Print() << " forward fft on cpu\n"; #endif // Copy the spectral-space field `tmpSpectralField` to the appropriate @@ -219,8 +206,6 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, // Copy field into the right index fields_arr(i,j,k,field_index) = spectral_field_value; }); -// amrex::Print() << " in forward trans after D2Z" << fields_arr(0,0,0,0) ; - amrex::Print() << "\n"; } } } @@ -279,10 +264,11 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, #ifdef AMREX_USE_GPU // Add cuFFT-specific code ; make sure that this is done on the same // GPU stream as the above copy - cudaDeviceSynchronize(); + cudaDeviceSynchronize(); cufftResult result; - //cudaStream_t stream = amrex::Cuda::Device::cudaStream(); - //cufftSetStream ( backward_plan[mfi], stream); + cudaStream_t stream = amrex::Cuda::Device::cudaStream(); + amrex::Print() << " stream is " << stream << "\n"; + cufftSetStream ( backward_plan[mfi], stream); result = cufftExecZ2D( backward_plan[mfi], reinterpret_cast( tmpSpectralField[mfi].dataPtr()), @@ -293,10 +279,9 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, if ( result == CUFFT_SUCCESS ) { amrex::Print() << " created cufft inverse transform\n"; } - cudaDeviceSynchronize(); + cudaDeviceSynchronize(); #else fftw_execute( backward_plan[mfi] ); - amrex::Print() << " cpu inverse done\n"; #endif // Copy the temporary field `tmpRealField` to the real-space field `mf` @@ -313,15 +298,14 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, // Copy and normalize field mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k); }); +//#ifdef AMREX_USE_GPU +// cudaDeviceSynchronize(); +//#endif + #ifdef AMREX_USE_GPU cudaDeviceSynchronize(); #endif - amrex::Print() << " after backward plan in real space 0,0,0 " << mf_arr(0,0,0,0) << " tmp " << tmp_arr(0,0,0) << "\n"; - amrex::Print() << " after backward plan in real space 15, 15, 15 " << mf_arr(15,15,15,0) << " tmp " << tmp_arr(0,0,0) << "\n"; - amrex::Print() << "\n"; -#ifdef AMREX_USE_GPU - cudaDeviceSynchronize(); -#endif + amrex::Print() << " divided by 1/N \n"; } } } diff --git a/Source/FieldSolver/WarpXFFT.cpp b/Source/FieldSolver/WarpXFFT.cpp index ffeb237c0..c4e0461f9 100644 --- a/Source/FieldSolver/WarpXFFT.cpp +++ b/Source/FieldSolver/WarpXFFT.cpp @@ -405,20 +405,6 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) rho_fp_fft[lev]->ParallelCopy(*rho_fp[lev], 0, 0, 2, 0, 0, period_fp); BL_PROFILE_VAR_STOP(blp_copy); - - for (MFIter mfi(*rho_fp_fft[lev]); mfi.isValid(); ++mfi) - { - MultiFab &mf = *rho_fp_fft[lev]; - Box realspace_bx = mf[mfi].box(); // Copy the box - Array4 mf_arr = mf[mfi].array(); - amrex::Print() << " at begin Push PSATD rho " << mf_arr(0,0,0,0) ; - amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n"; - MultiFab &mf_orig = *rho_fp[lev]; - Array4 mforig_arr = mf_orig[mfi].array(); - amrex::Print() << " at begin Push PSATD rho " << mforig_arr(0,0,0,0) ; - amrex::Print() << " new rho " << mforig_arr(0,0,0,1) << "\n"; - } - BL_PROFILE_VAR_START(blp_push_eb); if (fft_hybrid_mpi_decomposition){ if (Efield_fp_fft[lev][0]->local_size() == 1) @@ -465,44 +451,29 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) // Not using the hybrid decomposition auto& solver = *spectral_solver_fp[lev]; - //// Perform forward Fourier transform - //amrex::Print() << " FTT of Ex \n"; - //solver.ForwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex); - //amrex::Print() << " FTT of Ey \n"; - //solver.ForwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey); - //amrex::Print() << " FTT of Ez \n"; - //solver.ForwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez); - //amrex::Print() << " FTT of Bx \n"; - //solver.ForwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx); - //amrex::Print() << " FTT of By \n"; - //solver.ForwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By); - //amrex::Print() << " FTT of Bz \n"; - //solver.ForwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz); - //amrex::Print() << " FTT of Jx \n"; - //solver.ForwardTransform(*current_fp_fft[lev][0], SpectralFieldIndex::Jx); - //amrex::Print() << " FTT of Jy \n"; - //solver.ForwardTransform(*current_fp_fft[lev][1], SpectralFieldIndex::Jy); - //amrex::Print() << " FTT of Jz \n"; - //solver.ForwardTransform(*current_fp_fft[lev][2], SpectralFieldIndex::Jz); + // Perform forward Fourier transform + solver.ForwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex); + solver.ForwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey); + solver.ForwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez); + solver.ForwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx); + solver.ForwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By); + solver.ForwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz); + solver.ForwardTransform(*current_fp_fft[lev][0], SpectralFieldIndex::Jx); + solver.ForwardTransform(*current_fp_fft[lev][1], SpectralFieldIndex::Jy); + solver.ForwardTransform(*current_fp_fft[lev][2], SpectralFieldIndex::Jz); solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_old, 0); solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_new, 1); - ////// Advance fields in spectral space - //solver.pushSpectralFields(); - - ////// Perform backward Fourier Transform - //amrex::Print() << " BT of Ex \n"; - //solver.BackwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex); - //amrex::Print() << " BT of Ey \n"; - //solver.BackwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey); - //amrex::Print() << " BT of Ez \n"; - //solver.BackwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez); - //amrex::Print() << " BT of Bx \n"; - //solver.BackwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx); - //amrex::Print() << " BT of By \n"; - //solver.BackwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By); - //amrex::Print() << " BT of Bz \n"; - //solver.BackwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz); + // Advance fields in spectral space + solver.pushSpectralFields(); + + // Perform backward Fourier Transform + solver.BackwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex); + solver.BackwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey); + solver.BackwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez); + solver.BackwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx); + solver.BackwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By); + solver.BackwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz); } BL_PROFILE_VAR_STOP(blp_push_eb); @@ -519,4 +490,5 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) { amrex::Abort("WarpX::PushPSATD: TODO"); } + amrex::Print() << " coped data from fft to valid \n"; } diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp index 589ee168c..b9f27f07e 100644 --- a/Source/Initialization/WarpXInitData.cpp +++ b/Source/Initialization/WarpXInitData.cpp @@ -318,16 +318,28 @@ WarpX::InitLevelData (int lev, Real time) void WarpX::InitLevelDataFFT (int lev, Real time) { + + amrex::Print() << " print out pointer " << &Efield_fp_fft[lev][0] << "\n"; Efield_fp_fft[lev][0]->setVal(0.0); + amrex::Print() << " ex set \n"; Efield_fp_fft[lev][1]->setVal(0.0); + amrex::Print() << " ey set \n"; Efield_fp_fft[lev][2]->setVal(0.0); + amrex::Print() << " ez set \n"; Bfield_fp_fft[lev][0]->setVal(0.0); + amrex::Print() << " bx set \n"; Bfield_fp_fft[lev][1]->setVal(0.0); + amrex::Print() << " by set \n"; Bfield_fp_fft[lev][2]->setVal(0.0); + amrex::Print() << " bz set \n"; current_fp_fft[lev][0]->setVal(0.0); + amrex::Print() << " jx set \n"; current_fp_fft[lev][1]->setVal(0.0); + amrex::Print() << " jy set \n"; current_fp_fft[lev][2]->setVal(0.0); + amrex::Print() << " jz set \n"; rho_fp_fft[lev]->setVal(0.0); + amrex::Print() << " rhofp set \n"; if (lev > 0) { @@ -343,14 +355,6 @@ WarpX::InitLevelDataFFT (int lev, Real time) rho_cp_fft[lev]->setVal(0.0); } - for (MFIter mfi(*rho_fp_fft[lev]); mfi.isValid(); ++mfi) - { - MultiFab &mf = *rho_fp_fft[lev]; - Box realspace_bx = mf[mfi].box(); // Copy the box - Array4 mf_arr = mf[mfi].array(); - amrex::Print() << " at initialization rho " << mf_arr(0,0,0,0) ; - amrex::Print() << " new rho " << mf_arr(0,0,0,1) << "\n"; - } } #endif diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 28971eb0c..23d97133b 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -221,7 +221,9 @@ WarpX::FillBoundaryE () { for (int lev = 0; lev <= finest_level; ++lev) { + amrex::Print() << " lev " << lev << " in fill bndry E \n"; FillBoundaryE(lev); + amrex::Print() << " lev " << lev << " after fill bndry E \n"; } } @@ -245,7 +247,7 @@ void WarpX::FillBoundaryE (int lev, PatchType patch_type) { if (patch_type == PatchType::fine) - { + { if (do_pml && pml[lev]->ok()) { pml[lev]->ExchangeE(patch_type, @@ -270,9 +272,12 @@ WarpX::FillBoundaryE (int lev, PatchType patch_type) pml[lev]->FillBoundaryE(patch_type); } + amrex::Print() << " before defining multifab \n"; const auto& cperiod = Geom(lev-1).periodicity(); Vector mf{Efield_cp[lev][0].get(),Efield_cp[lev][1].get(),Efield_cp[lev][2].get()}; + amrex::Print() << " before amrex fill bndry \n"; amrex::FillBoundary(mf, cperiod); + amrex::Print() << " after amrex fill bndry \n"; } } diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 695faaa62..94b58ca07 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -660,7 +660,7 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, const std::array& cxyzmin_tile = WarpX::LowerCorner(ctilebox, lev-1); #ifdef AMREX_USE_GPU - data_ptr = (*crhomf)[pti].dataPtr(); + data_ptr = (*crhomf)[pti].dataPtr(icomp); auto rholen = (*crhomf)[pti].length(); #else tile_box = amrex::convert(ctilebox, IntVect::TheUnitVector()); diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 532858556..5814d6829 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -558,7 +558,9 @@ WarpX::MakeNewLevelFromScratch (int lev, Real time, const BoxArray& new_grids, InitLevelData(lev, time); #ifdef WARPX_USE_PSATD + amrex::Print() << " alloc level data fft \n"; AllocLevelDataFFT(lev); + amrex::Print() << " init level data fft \n"; InitLevelDataFFT(lev, time); #endif } diff --git a/Source/main.cpp b/Source/main.cpp index d89e89e47..2e1a5e9cf 100644 --- a/Source/main.cpp +++ b/Source/main.cpp @@ -30,10 +30,13 @@ int main(int argc, char* argv[]) const Real strt_total = amrex::second(); { + amrex::Print() << " about to construct warpx \n"; WarpX warpx; + amrex::Print() << " call warpx init data \n"; warpx.InitData(); + amrex::Print() << " call warpx evolve \n"; warpx.Evolve(); Real end_total = amrex::second() - strt_total; -- cgit v1.2.3 From 4766b39209bf3ed2849e936c4f2dca7e437f991e Mon Sep 17 00:00:00 2001 From: Revathi Jambunathan Date: Tue, 14 May 2019 18:54:24 -0400 Subject: changed made after merging with lastest dev version --- GNUmakefile | 10 +++++----- Source/Evolve/WarpXEvolveEM.cpp | 9 --------- .../SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H | 18 +++++++++--------- .../SpectralAlgorithms/PsatdAlgorithm.cpp | 5 ++--- .../FieldSolver/SpectralSolver/SpectralFieldData.cpp | 18 ++++++++++-------- Source/FieldSolver/WarpXFFT.cpp | 11 +++++++++++ Source/Initialization/WarpXInitData.cpp | 11 ----------- Source/WarpX.cpp | 2 -- Source/main.cpp | 3 --- 9 files changed, 37 insertions(+), 50 deletions(-) (limited to 'Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp') diff --git a/GNUmakefile b/GNUmakefile index e3ed55d80..dd80ee161 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -8,17 +8,17 @@ DEBUG = FALSE #DIM = 2 DIM = 3 -#COMP = gcc +COMP = gcc #COMP = intel -COMP = pgi +#COMP = pgi TINY_PROFILE = TRUE #PROFILE = TRUE #COMM_PROFILE = TRUE #TRACE_PROFILE = TRUE -USE_OMP = FALSE -USE_GPU = TRUE +USE_OMP = TRUE +USE_GPU = FALSE EBASE = main @@ -29,7 +29,7 @@ USE_ASCENT_INSITU = FALSE WarpxBinDir = Bin -USE_PSATD = FALSE +USE_PSATD = TRUE USE_RZ = FALSE DO_ELECTROSTATIC = FALSE diff --git a/Source/Evolve/WarpXEvolveEM.cpp b/Source/Evolve/WarpXEvolveEM.cpp index 02c21529b..6541dd4be 100644 --- a/Source/Evolve/WarpXEvolveEM.cpp +++ b/Source/Evolve/WarpXEvolveEM.cpp @@ -69,20 +69,17 @@ WarpX::EvolveEM (int numsteps) // At the beginning, we have B^{n} and E^{n}. // Particles have p^{n} and x^{n}. // is_synchronized is true. - amrex::Print() << " in evolve before fill boundary \n"; if (is_synchronized) { FillBoundaryE(); FillBoundaryB(); UpdateAuxilaryData(); // on first step, push p by -0.5*dt - amrex::Print() << " in evolve before pushP \n"; for (int lev = 0; lev <= finest_level; ++lev) { mypc->PushP(lev, -0.5*dt[lev], *Efield_aux[lev][0],*Efield_aux[lev][1],*Efield_aux[lev][2], *Bfield_aux[lev][0],*Bfield_aux[lev][1],*Bfield_aux[lev][2]); } is_synchronized = false; - amrex::Print() << " in evolve after pushP \n"; } else { // Beyond one step, we have E^{n} and B^{n}. @@ -92,7 +89,6 @@ WarpX::EvolveEM (int numsteps) UpdateAuxilaryData(); } - amrex::Print() << " in evolve after fill boundary \n"; if (do_subcycling == 0 || finest_level == 0) { OneStep_nosub(cur_time); @@ -102,7 +98,6 @@ WarpX::EvolveEM (int numsteps) amrex::Print() << "Error: do_subcycling = " << do_subcycling << std::endl; amrex::Abort("Unsupported do_subcycling type"); } - amrex::Print() << " in evolve after onestep no sub \n"; if (num_mirrors>0){ applyMirrors(cur_time); @@ -274,9 +269,7 @@ WarpX::OneStep_nosub (Real cur_time) if (warpx_py_particlescraper) warpx_py_particlescraper(); if (warpx_py_beforedeposition) warpx_py_beforedeposition(); #endif - amrex::Print() << " before push \n"; PushParticlesandDepose(cur_time); - amrex::Print() << " after push \n"; #ifdef WARPX_USE_PY if (warpx_py_afterdeposition) warpx_py_afterdeposition(); @@ -290,9 +283,7 @@ WarpX::OneStep_nosub (Real cur_time) // (And update guard cells immediately afterwards) #ifdef WARPX_USE_PSATD PushPSATD(dt[0]); - amrex::Print() << " before fill bndry E \n"; FillBoundaryE(); - amrex::Print() << " before fill bndry B \n"; FillBoundaryB(); #else EvolveF(0.5*dt[0], DtType::FirstHalf); diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H index 34743525e..36d5782e8 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H +++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H @@ -1,8 +1,8 @@ #ifndef WARPX_PSATD_ALGORITHM_H_ #define WARPX_PSATD_ALGORITHM_H_ -#include -#include +//#include +//#include #include /* \brief Class that updates the field in spectral space @@ -17,19 +17,19 @@ class PsatdAlgorithm : public SpectralBaseAlgorithm const amrex::DistributionMapping& dm, const int norder_x, const int norder_y, const int norder_z, const bool nodal, const amrex::Real dt); - PsatdAlgorithm() = default; // Default constructor - PsatdAlgorithm& operator=(PsatdAlgorithm&& algorithm) = default; - void pushSpectralFields(SpectralFieldData& f) const; + //PsatdAlgorithm() = default; // Default constructor + //PsatdAlgorithm& operator=(PsatdAlgorithm&& algorithm) = default; void InitializeCoefficience(const SpectralKSpace& spectral_kspace, const amrex::DistributionMapping& dm, const amrex::Real dt); + void pushSpectralFields(SpectralFieldData& f) const override final; private: // Modified finite-order vectors - KVectorComponent modified_kx_vec, modified_kz_vec; -#if (AMREX_SPACEDIM==3) - KVectorComponent modified_ky_vec; -#endif +// KVectorComponent modified_kx_vec, modified_kz_vec; +//#if (AMREX_SPACEDIM==3) +// KVectorComponent modified_ky_vec; +//#endif SpectralCoefficients C_coef, S_ck_coef, X1_coef, X2_coef, X3_coef; }; diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp index 8dd2a830f..3da0ef453 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp @@ -23,6 +23,7 @@ PsatdAlgorithm::PsatdAlgorithm(const SpectralKSpace& spectral_kspace, X3_coef = SpectralCoefficients(ba, dm, 1, 0); InitializeCoefficience(spectral_kspace, dm, dt); +} // // Fill them with the right values: // // Loop over boxes and allocate the corresponding coefficients // // for each box owned by the local MPI proc @@ -76,7 +77,6 @@ PsatdAlgorithm::PsatdAlgorithm(const SpectralKSpace& spectral_kspace, // } // }); // } -}; /* Advance the E and B field in spectral space (stored in `f`) * over one time step */ @@ -173,8 +173,7 @@ void PsatdAlgorithm::InitializeCoefficience(const SpectralKSpace& spectral_kspac // for each box owned by the local MPI proc for (MFIter mfi(ba, dm); mfi.isValid(); ++mfi){ - //const Box& bx = ba[mfi]; - const Box bx = ba[mfi]; + const Box& bx = ba[mfi]; // Extract pointers for the k vectors const Real* modified_kx = modified_kx_vec[mfi].dataPtr(); diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp index 5998bdd2b..ca9e87f0f 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp @@ -152,9 +152,10 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp); }); -//#ifdef AMREX_USE_GPU -// cudaDeviceSynchronize(); -//#endif +#ifdef AMREX_USE_GPU + cudaDeviceSynchronize(); +#endif + amrex::Print() << " before forward transform " << mf_arr(15,15,15,0) << " " << mf_arr(15,15,15,1); } // Perform Fourier transform from `tmpRealField` to `tmpSpectralField` @@ -165,7 +166,7 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, cufftResult result; cudaStream_t stream = amrex::Cuda::Device::cudaStream(); amrex::Print() << " stream is " << stream << "\n"; - cufftSetStream ( forward_plan[mfi], stream); +// cufftSetStream ( forward_plan[mfi], stream); result = cufftExecD2Z( forward_plan[mfi], tmpRealField[mfi].dataPtr(), reinterpret_cast( @@ -268,7 +269,7 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, cufftResult result; cudaStream_t stream = amrex::Cuda::Device::cudaStream(); amrex::Print() << " stream is " << stream << "\n"; - cufftSetStream ( backward_plan[mfi], stream); +// cufftSetStream ( backward_plan[mfi], stream); result = cufftExecZ2D( backward_plan[mfi], reinterpret_cast( tmpSpectralField[mfi].dataPtr()), @@ -298,9 +299,10 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, // Copy and normalize field mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k); }); -//#ifdef AMREX_USE_GPU -// cudaDeviceSynchronize(); -//#endif +#ifdef AMREX_USE_GPU + cudaDeviceSynchronize(); +#endif + amrex::Print() << " mf_arr after BT " << mf_arr(15,15,15,0) << " " << mf_arr(15,15,15,1) << "\n"; #ifdef AMREX_USE_GPU cudaDeviceSynchronize(); diff --git a/Source/FieldSolver/WarpXFFT.cpp b/Source/FieldSolver/WarpXFFT.cpp index c4e0461f9..3c7127d24 100644 --- a/Source/FieldSolver/WarpXFFT.cpp +++ b/Source/FieldSolver/WarpXFFT.cpp @@ -452,16 +452,27 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) auto& solver = *spectral_solver_fp[lev]; // Perform forward Fourier transform + amrex::Print() << " FT Ex \n"; solver.ForwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex); + amrex::Print() << " FT Ey \n"; solver.ForwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey); + amrex::Print() << " FT Ez \n"; solver.ForwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez); + amrex::Print() << " FT Bx \n"; solver.ForwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx); + amrex::Print() << " FT By \n"; solver.ForwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By); + amrex::Print() << " FT Bz \n"; solver.ForwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz); + amrex::Print() << " FT jx \n"; solver.ForwardTransform(*current_fp_fft[lev][0], SpectralFieldIndex::Jx); + amrex::Print() << " FT jy \n"; solver.ForwardTransform(*current_fp_fft[lev][1], SpectralFieldIndex::Jy); + amrex::Print() << " FT jz \n"; solver.ForwardTransform(*current_fp_fft[lev][2], SpectralFieldIndex::Jz); + amrex::Print() << " FT rho old \n"; solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_old, 0); + amrex::Print() << " FT rho new \n"; solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_new, 1); // Advance fields in spectral space diff --git a/Source/Initialization/WarpXInitData.cpp b/Source/Initialization/WarpXInitData.cpp index b9f27f07e..6a7e79924 100644 --- a/Source/Initialization/WarpXInitData.cpp +++ b/Source/Initialization/WarpXInitData.cpp @@ -319,27 +319,16 @@ void WarpX::InitLevelDataFFT (int lev, Real time) { - amrex::Print() << " print out pointer " << &Efield_fp_fft[lev][0] << "\n"; Efield_fp_fft[lev][0]->setVal(0.0); - amrex::Print() << " ex set \n"; Efield_fp_fft[lev][1]->setVal(0.0); - amrex::Print() << " ey set \n"; Efield_fp_fft[lev][2]->setVal(0.0); - amrex::Print() << " ez set \n"; Bfield_fp_fft[lev][0]->setVal(0.0); - amrex::Print() << " bx set \n"; Bfield_fp_fft[lev][1]->setVal(0.0); - amrex::Print() << " by set \n"; Bfield_fp_fft[lev][2]->setVal(0.0); - amrex::Print() << " bz set \n"; current_fp_fft[lev][0]->setVal(0.0); - amrex::Print() << " jx set \n"; current_fp_fft[lev][1]->setVal(0.0); - amrex::Print() << " jy set \n"; current_fp_fft[lev][2]->setVal(0.0); - amrex::Print() << " jz set \n"; rho_fp_fft[lev]->setVal(0.0); - amrex::Print() << " rhofp set \n"; if (lev > 0) { diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 83788d5b9..a3a24897a 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -556,9 +556,7 @@ WarpX::MakeNewLevelFromScratch (int lev, Real time, const BoxArray& new_grids, InitLevelData(lev, time); #ifdef WARPX_USE_PSATD - amrex::Print() << " alloc level data fft \n"; AllocLevelDataFFT(lev); - amrex::Print() << " init level data fft \n"; InitLevelDataFFT(lev, time); #endif } diff --git a/Source/main.cpp b/Source/main.cpp index 2e1a5e9cf..d89e89e47 100644 --- a/Source/main.cpp +++ b/Source/main.cpp @@ -30,13 +30,10 @@ int main(int argc, char* argv[]) const Real strt_total = amrex::second(); { - amrex::Print() << " about to construct warpx \n"; WarpX warpx; - amrex::Print() << " call warpx init data \n"; warpx.InitData(); - amrex::Print() << " call warpx evolve \n"; warpx.Evolve(); Real end_total = amrex::second() - strt_total; -- cgit v1.2.3 From 7cad9dae5a2acf76e2352d428cd2a03c88453a3f Mon Sep 17 00:00:00 2001 From: Revathi Jambunathan Date: Thu, 6 Jun 2019 14:38:11 -0400 Subject: cleaning code for spectral cufft for 3D --- .../SpectralAlgorithms/PsatdAlgorithm.H | 10 ++-- .../SpectralAlgorithms/PsatdAlgorithm.cpp | 58 ++-------------------- .../SpectralSolver/SpectralFieldData.cpp | 50 +++++++------------ .../FieldSolver/SpectralSolver/SpectralKSpace.cpp | 1 + Source/FieldSolver/WarpXFFT.cpp | 23 ++++----- 5 files changed, 35 insertions(+), 107 deletions(-) (limited to 'Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp') diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H index 36d5782e8..0a8614e54 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H +++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H @@ -17,19 +17,15 @@ class PsatdAlgorithm : public SpectralBaseAlgorithm const amrex::DistributionMapping& dm, const int norder_x, const int norder_y, const int norder_z, const bool nodal, const amrex::Real dt); - //PsatdAlgorithm() = default; // Default constructor - //PsatdAlgorithm& operator=(PsatdAlgorithm&& algorithm) = default; - void InitializeCoefficience(const SpectralKSpace& spectral_kspace, + + void InitializeSpectralCoefficients(const SpectralKSpace& spectral_kspace, const amrex::DistributionMapping& dm, const amrex::Real dt); + void pushSpectralFields(SpectralFieldData& f) const override final; private: // Modified finite-order vectors -// KVectorComponent modified_kx_vec, modified_kz_vec; -//#if (AMREX_SPACEDIM==3) -// KVectorComponent modified_ky_vec; -//#endif SpectralCoefficients C_coef, S_ck_coef, X1_coef, X2_coef, X3_coef; }; diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp index 3da0ef453..d45b01bda 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.cpp @@ -22,61 +22,8 @@ PsatdAlgorithm::PsatdAlgorithm(const SpectralKSpace& spectral_kspace, X2_coef = SpectralCoefficients(ba, dm, 1, 0); X3_coef = SpectralCoefficients(ba, dm, 1, 0); - InitializeCoefficience(spectral_kspace, dm, dt); + InitializeSpectralCoefficients(spectral_kspace, dm, dt); } -// // Fill them with the right values: -// // Loop over boxes and allocate the corresponding coefficients -// // for each box owned by the local MPI proc -// for (MFIter mfi(ba, dm); mfi.isValid(); ++mfi){ -// -// //const Box& bx = ba[mfi]; -// const Box bx = ba[mfi]; -// -// // Extract pointers for the k vectors -// const Real* modified_kx = modified_kx_vec[mfi].dataPtr(); -//#if (AMREX_SPACEDIM==3) -// const Real* modified_ky = modified_ky_vec[mfi].dataPtr(); -//#endif -// const Real* modified_kz = modified_kz_vec[mfi].dataPtr(); -// // Extract arrays for the coefficients -// Array4 C = C_coef[mfi].array(); -// Array4 S_ck = S_ck_coef[mfi].array(); -// Array4 X1 = X1_coef[mfi].array(); -// Array4 X2 = X2_coef[mfi].array(); -// Array4 X3 = X3_coef[mfi].array(); -// -// // Loop over indices within one box -// ParallelFor(bx, -// [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept -// { -// // Calculate norm of vector -// const Real k_norm = std::sqrt( -// std::pow(modified_kx[i], 2) + -//#if (AMREX_SPACEDIM==3) -// std::pow(modified_ky[j], 2) + -// std::pow(modified_kz[k], 2)); -//#else -// std::pow(modified_kz[j], 2)); -//#endif -// -// // Calculate coefficients -// constexpr Real c = PhysConst::c; -// constexpr Real ep0 = PhysConst::ep0; -// if (k_norm != 0){ -// C(i,j,k) = std::cos(c*k_norm*dt); -// S_ck(i,j,k) = std::sin(c*k_norm*dt)/(c*k_norm); -// X1(i,j,k) = (1. - C(i,j,k))/(ep0 * c*c * k_norm*k_norm); -// X2(i,j,k) = (1. - S_ck(i,j,k)/dt)/(ep0 * k_norm*k_norm); -// X3(i,j,k) = (C(i,j,k) - S_ck(i,j,k)/dt)/(ep0 * k_norm*k_norm); -// } else { // Handle k_norm = 0, by using the analytical limit -// C(i,j,k) = 1.; -// S_ck(i,j,k) = dt; -// X1(i,j,k) = 0.5 * dt*dt / ep0; -// X2(i,j,k) = c*c * dt*dt / (6.*ep0); -// X3(i,j,k) = - c*c * dt*dt / (3.*ep0); -// } -// }); -// } /* Advance the E and B field in spectral space (stored in `f`) * over one time step */ @@ -139,6 +86,7 @@ PsatdAlgorithm::pushSpectralFields(SpectralFieldData& f) const{ const Real X2 = X2_arr(i,j,k); const Real X3 = X3_arr(i,j,k); + // Update E (see WarpX online documentation: theory section) fields(i,j,k,Idx::Ex) = C*Ex_old + S_ck*(c2*I*(ky*Bz_old - kz*By_old) - inv_ep0*Jx) @@ -163,7 +111,7 @@ PsatdAlgorithm::pushSpectralFields(SpectralFieldData& f) const{ } }; -void PsatdAlgorithm::InitializeCoefficience(const SpectralKSpace& spectral_kspace, +void PsatdAlgorithm::InitializeSpectralCoefficients(const SpectralKSpace& spectral_kspace, const amrex::DistributionMapping& dm, const amrex::Real dt) { diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp index ca9e87f0f..cb26b19b7 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp @@ -55,23 +55,30 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, #ifdef AMREX_USE_GPU // Add cuFFT-specific code // Creating 3D plan for real to complex -- double precision - cudaDeviceSynchronize(); + cufftResult result; +#if (AMREX_SPACEDIM == 3) result = cufftPlan3d( &forward_plan[mfi], fft_size[2], fft_size[1],fft_size[0], CUFFT_D2Z); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d forward failed! \n"; } +#else // Add 2D cuFFT-spacific code for D2Z // Note that D2Z is inherently forward plan +#endif +#if (AMREX_SPACEDIM == 3) result = cufftPlan3d( &backward_plan[mfi], fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); - // Add 2D cuFFT-specific code for Z2D if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d backward failed! \n"; } - cudaDeviceSynchronize(); +#else + // Add 2D cuFFT-specific code for Z2D + // Note that Z2D is inherently backward plan + +#endif #else // Create FFTW plans @@ -152,29 +159,22 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp); }); -#ifdef AMREX_USE_GPU - cudaDeviceSynchronize(); -#endif - amrex::Print() << " before forward transform " << mf_arr(15,15,15,0) << " " << mf_arr(15,15,15,1); } // Perform Fourier transform from `tmpRealField` to `tmpSpectralField` #ifdef AMREX_USE_GPU // Add cuFFT-specific code ; make sure that this is done on the same // GPU stream as the above copy - cudaDeviceSynchronize(); cufftResult result; - cudaStream_t stream = amrex::Cuda::Device::cudaStream(); - amrex::Print() << " stream is " << stream << "\n"; -// cufftSetStream ( forward_plan[mfi], stream); + cudaStream_t stream = amrex::Gpu::Device::cudaStream(); + cufftSetStream ( forward_plan[mfi], stream); result = cufftExecD2Z( forward_plan[mfi], tmpRealField[mfi].dataPtr(), reinterpret_cast( tmpSpectralField[mfi].dataPtr()) ); if ( result != CUFFT_SUCCESS ) { - amrex::Print() << " cufftplan3d execute failed ! \n"; + amrex::Print() << " forward transform using cufftExecD2Z failed ! \n"; } - cudaDeviceSynchronize(); #else fftw_execute( forward_plan[mfi] ); #endif @@ -193,6 +193,7 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, const Complex* zshift_arr = zshift_FFTfromCell[mfi].dataPtr(); // Loop over indices within one box const Box spectralspace_bx = tmpSpectralField[mfi].box(); + ParallelFor( spectralspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { Complex spectral_field_value = tmp_arr(i,j,k); @@ -245,6 +246,7 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, const Complex* zshift_arr = zshift_FFTtoCell[mfi].dataPtr(); // Loop over indices within one box const Box spectralspace_bx = tmpSpectralField[mfi].box(); + ParallelFor( spectralspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { Complex spectral_field_value = field_arr(i,j,k,field_index); @@ -265,22 +267,16 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, #ifdef AMREX_USE_GPU // Add cuFFT-specific code ; make sure that this is done on the same // GPU stream as the above copy - cudaDeviceSynchronize(); cufftResult result; - cudaStream_t stream = amrex::Cuda::Device::cudaStream(); - amrex::Print() << " stream is " << stream << "\n"; -// cufftSetStream ( backward_plan[mfi], stream); + cudaStream_t stream = amrex::Gpu::Device::cudaStream(); + cufftSetStream ( backward_plan[mfi], stream); result = cufftExecZ2D( backward_plan[mfi], reinterpret_cast( tmpSpectralField[mfi].dataPtr()), tmpRealField[mfi].dataPtr() ); if ( result != CUFFT_SUCCESS ) { - amrex::Print() << " cufftplan3d execute inverse failed ! \n"; + amrex::Print() << " Backward transform using cufftexecZ2D failed! \n"; } - if ( result == CUFFT_SUCCESS ) { - amrex::Print() << " created cufft inverse transform\n"; - } - cudaDeviceSynchronize(); #else fftw_execute( backward_plan[mfi] ); #endif @@ -294,20 +290,12 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, Array4 tmp_arr = tmpRealField[mfi].array(); // Normalization: divide by the number of points in realspace const Real inv_N = 1./realspace_bx.numPts(); + ParallelFor( realspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { // Copy and normalize field mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k); }); -#ifdef AMREX_USE_GPU - cudaDeviceSynchronize(); -#endif - amrex::Print() << " mf_arr after BT " << mf_arr(15,15,15,0) << " " << mf_arr(15,15,15,1) << "\n"; - -#ifdef AMREX_USE_GPU - cudaDeviceSynchronize(); -#endif - amrex::Print() << " divided by 1/N \n"; } } } diff --git a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp index 6a88a52a3..6fe5e3939 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralKSpace.cpp @@ -149,6 +149,7 @@ SpectralKSpace::getSpectralShiftFactor( const DistributionMapping& dm, #else shift[i] = std::exp( I*sign*k[i]*0.5*dx[i_dim] ); #endif + } } return shift_factor; diff --git a/Source/FieldSolver/WarpXFFT.cpp b/Source/FieldSolver/WarpXFFT.cpp index d2cb83e60..13d92f6f3 100644 --- a/Source/FieldSolver/WarpXFFT.cpp +++ b/Source/FieldSolver/WarpXFFT.cpp @@ -56,6 +56,7 @@ BuildFFTOwnerMask (const MultiFab& mf, const Geometry& geom) for (const auto& b : bl) { fab.setVal(nonowner, b, 0, 1); } + } return mask; @@ -89,7 +90,7 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba const FArrayBox& srcfab = mf_fft[mfi]; const Box& srcbox = srcfab.box(); - + if (srcbox.contains(bx)) { // Copy the interior region (without guard cells) @@ -107,6 +108,8 @@ CopyDataFromFFTToValid (MultiFab& mf, const MultiFab& mf_fft, const BoxArray& ba // the cell that has non-zero mask is the one which is retained. mf.setVal(0.0, 0); mf.ParallelAdd(mftmp); + + } } @@ -449,34 +452,23 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) amrex::Abort("WarpX::PushPSATD: TODO"); } #else // AMREX_USE_CUDA is defined ; running on GPU - amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU.") + amrex::Abort("The option `psatd.fft_hybrid_mpi_decomposition` does not work on GPU."); #endif } else { // Not using the hybrid decomposition auto& solver = *spectral_solver_fp[lev]; // Perform forward Fourier transform - amrex::Print() << " FT Ex \n"; solver.ForwardTransform(*Efield_fp_fft[lev][0], SpectralFieldIndex::Ex); - amrex::Print() << " FT Ey \n"; solver.ForwardTransform(*Efield_fp_fft[lev][1], SpectralFieldIndex::Ey); - amrex::Print() << " FT Ez \n"; solver.ForwardTransform(*Efield_fp_fft[lev][2], SpectralFieldIndex::Ez); - amrex::Print() << " FT Bx \n"; solver.ForwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx); - amrex::Print() << " FT By \n"; solver.ForwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By); - amrex::Print() << " FT Bz \n"; solver.ForwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz); - amrex::Print() << " FT jx \n"; solver.ForwardTransform(*current_fp_fft[lev][0], SpectralFieldIndex::Jx); - amrex::Print() << " FT jy \n"; solver.ForwardTransform(*current_fp_fft[lev][1], SpectralFieldIndex::Jy); - amrex::Print() << " FT jz \n"; solver.ForwardTransform(*current_fp_fft[lev][2], SpectralFieldIndex::Jz); - amrex::Print() << " FT rho old \n"; solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_old, 0); - amrex::Print() << " FT rho new \n"; solver.ForwardTransform(*rho_fp_fft[lev], SpectralFieldIndex::rho_new, 1); // Advance fields in spectral space @@ -489,6 +481,7 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) solver.BackwardTransform(*Bfield_fp_fft[lev][0], SpectralFieldIndex::Bx); solver.BackwardTransform(*Bfield_fp_fft[lev][1], SpectralFieldIndex::By); solver.BackwardTransform(*Bfield_fp_fft[lev][2], SpectralFieldIndex::Bz); + } BL_PROFILE_VAR_STOP(blp_push_eb); @@ -505,5 +498,7 @@ WarpX::PushPSATD (int lev, amrex::Real /* dt */) { amrex::Abort("WarpX::PushPSATD: TODO"); } - amrex::Print() << " coped data from fft to valid \n"; + } + + -- cgit v1.2.3 From e7bc3c34fc838c4ab6ee845de2e3f6354c3cbdec Mon Sep 17 00:00:00 2001 From: Revathi Jambunathan Date: Thu, 6 Jun 2019 17:23:22 -0400 Subject: adding 2D cufft plans --- .../SpectralSolver/SpectralFieldData.cpp | 21 +++++++++++++-------- Source/Parallelization/WarpXComm.cpp | 5 ----- Source/Particles/PhysicalParticleContainer.cpp | 2 -- Source/Particles/WarpXParticleContainer.cpp | 2 -- 4 files changed, 13 insertions(+), 17 deletions(-) (limited to 'Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp') diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp index cb26b19b7..6eeb266e7 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp @@ -55,7 +55,7 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, #ifdef AMREX_USE_GPU // Add cuFFT-specific code // Creating 3D plan for real to complex -- double precision - + // Assuming CUDA is used for programming GPU cufftResult result; #if (AMREX_SPACEDIM == 3) result = cufftPlan3d( &forward_plan[mfi], fft_size[2], @@ -63,21 +63,26 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d forward failed! \n"; } -#else - // Add 2D cuFFT-spacific code for D2Z - // Note that D2Z is inherently forward plan -#endif -#if (AMREX_SPACEDIM == 3) result = cufftPlan3d( &backward_plan[mfi], fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d backward failed! \n"; } #else - // Add 2D cuFFT-specific code for Z2D - // Note that Z2D is inherently backward plan + // Add 2D cuFFT-spacific code for D2Z + // Note that D2Z is inherently forward plan + result = cufftPlan2d( &forward_plan[mfi], fft_size[1], + fft_size[0], CUFFT_D2Z ); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " cufftplan2d forward failed! \n"; + } + result = cufftPlan2d( &backward_plan[mfi], fft_size[1], + fft_size[0], CUFFT_Z2D ); + if ( result != CUFFT_SUCCESS ) { + amrex::Print() << " cufftplan2d backward failed! \n"; + } #endif #else diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 998fc4de1..00dcb85d0 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -222,9 +222,7 @@ WarpX::FillBoundaryE () { for (int lev = 0; lev <= finest_level; ++lev) { - amrex::Print() << " lev " << lev << " in fill bndry E \n"; FillBoundaryE(lev); - amrex::Print() << " lev " << lev << " after fill bndry E \n"; } } @@ -273,12 +271,9 @@ WarpX::FillBoundaryE (int lev, PatchType patch_type) pml[lev]->FillBoundaryE(patch_type); } - amrex::Print() << " before defining multifab \n"; const auto& cperiod = Geom(lev-1).periodicity(); Vector mf{Efield_cp[lev][0].get(),Efield_cp[lev][1].get(),Efield_cp[lev][2].get()}; - amrex::Print() << " before amrex fill bndry \n"; amrex::FillBoundary(mf, cperiod); - amrex::Print() << " after amrex fill bndry \n"; } } diff --git a/Source/Particles/PhysicalParticleContainer.cpp b/Source/Particles/PhysicalParticleContainer.cpp index 0826605ec..1f517fccb 100644 --- a/Source/Particles/PhysicalParticleContainer.cpp +++ b/Source/Particles/PhysicalParticleContainer.cpp @@ -1388,9 +1388,7 @@ PhysicalParticleContainer::Evolve (int lev, pti.GetPosition(m_xp[thread_num], m_yp[thread_num], m_zp[thread_num]); BL_PROFILE_VAR_STOP(blp_copy); - amrex::Print() << " before deposit chage \n"; if (rho) DepositCharge(pti, wp, rho, crho, 0, np_current, np, thread_num, lev); - amrex::Print() << " after deposit chage \n"; if (! do_not_push) { diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp index 576c53319..47d57294d 100644 --- a/Source/Particles/WarpXParticleContainer.cpp +++ b/Source/Particles/WarpXParticleContainer.cpp @@ -612,7 +612,6 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, const std::array& xyzmin = xyzmin_tile; #ifdef AMREX_USE_GPU - amrex::Print() << " before icomp data ptr " << icomp << "\n"; data_ptr = (*rhomf)[pti].dataPtr(icomp); auto rholen = (*rhomf)[pti].length(); #else @@ -635,7 +634,6 @@ WarpXParticleContainer::DepositCharge ( WarpXParIter& pti, RealVector& wp, const long nz = rholen[1]-1-2*ngRho; #endif BL_PROFILE_VAR_START(blp_pxr_chd); - amrex::Print() << " before warpxcharge deposition \n"; warpx_charge_deposition(data_ptr, &np_current, m_xp[thread_num].dataPtr(), m_yp[thread_num].dataPtr(), -- cgit v1.2.3 From 2569bcd08921227bedc7ccdb1018a5614ab31610 Mon Sep 17 00:00:00 2001 From: Revathi Jambunathan Date: Thu, 6 Jun 2019 19:19:57 -0400 Subject: Included revisions as suggested in the PR --- .../SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H | 4 ---- Source/FieldSolver/SpectralSolver/SpectralFieldData.H | 1 - Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp | 16 +++++++++------- 3 files changed, 9 insertions(+), 12 deletions(-) (limited to 'Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp') diff --git a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H index 0a8614e54..12718e38b 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H +++ b/Source/FieldSolver/SpectralSolver/SpectralAlgorithms/PsatdAlgorithm.H @@ -1,8 +1,6 @@ #ifndef WARPX_PSATD_ALGORITHM_H_ #define WARPX_PSATD_ALGORITHM_H_ -//#include -//#include #include /* \brief Class that updates the field in spectral space @@ -10,7 +8,6 @@ */ class PsatdAlgorithm : public SpectralBaseAlgorithm { - using SpectralCoefficients = amrex::FabArray< amrex::BaseFab >; public: PsatdAlgorithm(const SpectralKSpace& spectral_kspace, @@ -25,7 +22,6 @@ class PsatdAlgorithm : public SpectralBaseAlgorithm void pushSpectralFields(SpectralFieldData& f) const override final; private: - // Modified finite-order vectors SpectralCoefficients C_coef, S_ck_coef, X1_coef, X2_coef, X3_coef; }; diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.H b/Source/FieldSolver/SpectralSolver/SpectralFieldData.H index 1d64817ef..7954414b8 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.H +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.H @@ -25,7 +25,6 @@ class SpectralFieldData // (plans are only initialized for the boxes that are owned by // the local MPI rank) #ifdef AMREX_USE_GPU - // Add cuFFT-specific code using FFTplans = amrex::LayoutData; #else using FFTplans = amrex::LayoutData; diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp index 6eeb266e7..a2b695568 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp @@ -53,9 +53,11 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, // the FFT plan, the valid dimensions are those of the real-space box. IntVect fft_size = realspace_ba[mfi].length(); #ifdef AMREX_USE_GPU - // Add cuFFT-specific code + // Create cuFFT plans // Creating 3D plan for real to complex -- double precision // Assuming CUDA is used for programming GPU + // Note that D2Z is inherently forward plan + // and Z2D is inherently backward plan cufftResult result; #if (AMREX_SPACEDIM == 3) result = cufftPlan3d( &forward_plan[mfi], fft_size[2], @@ -70,8 +72,6 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, amrex::Print() << " cufftplan3d backward failed! \n"; } #else - // Add 2D cuFFT-spacific code for D2Z - // Note that D2Z is inherently forward plan result = cufftPlan2d( &forward_plan[mfi], fft_size[1], fft_size[0], CUFFT_D2Z ); if ( result != CUFFT_SUCCESS ) { @@ -117,7 +117,7 @@ SpectralFieldData::~SpectralFieldData() if (tmpRealField.size() > 0){ for ( MFIter mfi(tmpRealField); mfi.isValid(); ++mfi ){ #ifdef AMREX_USE_GPU - // Add cuFFT-specific code + // Destroy cuFFT plans cufftDestroy( forward_plan[mfi] ); cufftDestroy( backward_plan[mfi] ); #else @@ -168,8 +168,9 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, // Perform Fourier transform from `tmpRealField` to `tmpSpectralField` #ifdef AMREX_USE_GPU - // Add cuFFT-specific code ; make sure that this is done on the same - // GPU stream as the above copy + // Perform Fast Fourier Transform on GPU using cuFFT + // make sure that this is done on the same + // GPU stream as the above copy cufftResult result; cudaStream_t stream = amrex::Gpu::Device::cudaStream(); cufftSetStream ( forward_plan[mfi], stream); @@ -270,7 +271,8 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, // Perform Fourier transform from `tmpSpectralField` to `tmpRealField` #ifdef AMREX_USE_GPU - // Add cuFFT-specific code ; make sure that this is done on the same + // Perform Fast Fourier Transform on GPU using cuFFT. + // make sure that this is done on the same // GPU stream as the above copy cufftResult result; cudaStream_t stream = amrex::Gpu::Device::cudaStream(); -- cgit v1.2.3 From 5266d00087b780ff0eb3c64c6b931d9973f0be6a Mon Sep 17 00:00:00 2001 From: Remi Lehe Date: Mon, 24 Jun 2019 15:22:36 -0700 Subject: Only copy FFT data to the valid box --- .../SpectralSolver/SpectralFieldData.cpp | 43 +++++++++++----------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp') diff --git a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp index a2b695568..948baf0a6 100644 --- a/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp +++ b/Source/FieldSolver/SpectralSolver/SpectralFieldData.cpp @@ -55,30 +55,30 @@ SpectralFieldData::SpectralFieldData( const BoxArray& realspace_ba, #ifdef AMREX_USE_GPU // Create cuFFT plans // Creating 3D plan for real to complex -- double precision - // Assuming CUDA is used for programming GPU - // Note that D2Z is inherently forward plan - // and Z2D is inherently backward plan + // Assuming CUDA is used for programming GPU + // Note that D2Z is inherently forward plan + // and Z2D is inherently backward plan cufftResult result; #if (AMREX_SPACEDIM == 3) - result = cufftPlan3d( &forward_plan[mfi], fft_size[2], + result = cufftPlan3d( &forward_plan[mfi], fft_size[2], fft_size[1],fft_size[0], CUFFT_D2Z); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d forward failed! \n"; } - result = cufftPlan3d( &backward_plan[mfi], fft_size[2], + result = cufftPlan3d( &backward_plan[mfi], fft_size[2], fft_size[1], fft_size[0], CUFFT_Z2D); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan3d backward failed! \n"; } #else - result = cufftPlan2d( &forward_plan[mfi], fft_size[1], + result = cufftPlan2d( &forward_plan[mfi], fft_size[1], fft_size[0], CUFFT_D2Z ); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan2d forward failed! \n"; } - result = cufftPlan2d( &backward_plan[mfi], fft_size[1], + result = cufftPlan2d( &backward_plan[mfi], fft_size[1], fft_size[0], CUFFT_Z2D ); if ( result != CUFFT_SUCCESS ) { amrex::Print() << " cufftplan2d backward failed! \n"; @@ -162,20 +162,20 @@ SpectralFieldData::ForwardTransform( const MultiFab& mf, Array4 tmp_arr = tmpRealField[mfi].array(); ParallelFor( realspace_bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { - tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp); + tmp_arr(i,j,k) = mf_arr(i,j,k,i_comp); }); } // Perform Fourier transform from `tmpRealField` to `tmpSpectralField` #ifdef AMREX_USE_GPU - // Perform Fast Fourier Transform on GPU using cuFFT - // make sure that this is done on the same - // GPU stream as the above copy + // Perform Fast Fourier Transform on GPU using cuFFT + // make sure that this is done on the same + // GPU stream as the above copy cufftResult result; - cudaStream_t stream = amrex::Gpu::Device::cudaStream(); + cudaStream_t stream = amrex::Gpu::Device::cudaStream(); cufftSetStream ( forward_plan[mfi], stream); - result = cufftExecD2Z( forward_plan[mfi], - tmpRealField[mfi].dataPtr(), + result = cufftExecD2Z( forward_plan[mfi], + tmpRealField[mfi].dataPtr(), reinterpret_cast( tmpSpectralField[mfi].dataPtr()) ); if ( result != CUFFT_SUCCESS ) { @@ -271,13 +271,13 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, // Perform Fourier transform from `tmpSpectralField` to `tmpRealField` #ifdef AMREX_USE_GPU - // Perform Fast Fourier Transform on GPU using cuFFT. - // make sure that this is done on the same + // Perform Fast Fourier Transform on GPU using cuFFT. + // make sure that this is done on the same // GPU stream as the above copy cufftResult result; - cudaStream_t stream = amrex::Gpu::Device::cudaStream(); + cudaStream_t stream = amrex::Gpu::Device::cudaStream(); cufftSetStream ( backward_plan[mfi], stream); - result = cufftExecZ2D( backward_plan[mfi], + result = cufftExecZ2D( backward_plan[mfi], reinterpret_cast( tmpSpectralField[mfi].dataPtr()), tmpRealField[mfi].dataPtr() ); @@ -289,16 +289,17 @@ SpectralFieldData::BackwardTransform( MultiFab& mf, #endif // Copy the temporary field `tmpRealField` to the real-space field `mf` - + // (only in the valid cells ; not in the guard cells) // Normalize (divide by 1/N) since the FFT+IFFT results in a factor N { - const Box realspace_bx = tmpRealField[mfi].box(); Array4 mf_arr = mf[mfi].array(); Array4 tmp_arr = tmpRealField[mfi].array(); // Normalization: divide by the number of points in realspace + // (includes the guard cells) + const Box realspace_bx = tmpRealField[mfi].box(); const Real inv_N = 1./realspace_bx.numPts(); - ParallelFor( realspace_bx, + ParallelFor( mfi.validbox(), [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { // Copy and normalize field mf_arr(i,j,k,i_comp) = inv_N*tmp_arr(i,j,k); -- cgit v1.2.3