From d6f8ce933459c52552772ae8bba683ec2e8cb3ad Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Wed, 11 Sep 2019 16:06:07 -0700 Subject: Source: tabs2spaces Manually fix tabs to four spaces and alignments for consistent prepresentation of source code over all machines. --- Source/Parallelization/WarpXComm.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 93fc12799..ea939cef3 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -252,12 +252,12 @@ WarpX::FillBoundaryE (int lev, PatchType patch_type) { if (do_pml && pml[lev]->ok()) { - pml[lev]->ExchangeE(patch_type, + pml[lev]->ExchangeE(patch_type, { Efield_fp[lev][0].get(), Efield_fp[lev][1].get(), Efield_fp[lev][2].get() }, do_pml_in_domain); - pml[lev]->FillBoundaryE(patch_type); + pml[lev]->FillBoundaryE(patch_type); } const auto& period = Geom(lev).periodicity(); @@ -296,7 +296,7 @@ WarpX::FillBoundaryB (int lev, PatchType patch_type) { if (do_pml && pml[lev]->ok()) { - pml[lev]->ExchangeB(patch_type, + pml[lev]->ExchangeB(patch_type, { Bfield_fp[lev][0].get(), Bfield_fp[lev][1].get(), Bfield_fp[lev][2].get() }, -- cgit v1.2.3 From eff5d96ea1bb6bca586376ed48d57e82bd6d84a3 Mon Sep 17 00:00:00 2001 From: Remi Lehe Date: Fri, 13 Sep 2019 12:23:19 -0700 Subject: Correct deposition buffers --- Source/Parallelization/WarpXComm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index ea939cef3..1c8c37cad 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -582,7 +582,7 @@ WarpX::AddCurrentFromFineLevelandSumBoundary (int lev) } else if (current_buf[lev+1][idim]) // but no filter { - MultiFab::Copy(*current_buf[lev+1][idim], + MultiFab::Add(*current_buf[lev+1][idim], *current_cp [lev+1][idim], 0, 0, current_buf[lev+1][idim]->nComp(), current_cp[lev+1][idim]->nGrow()); mf.ParallelAdd(*current_buf[lev+1][idim], 0, 0, current_buf[lev+1][idim]->nComp(), -- cgit v1.2.3 From 2fe4cae1b0da80e04f2cc0a7865ff86ac64e8a39 Mon Sep 17 00:00:00 2001 From: Remi Lehe Date: Sat, 14 Sep 2019 10:52:08 -0700 Subject: Fix charge deposition with MR --- Source/Parallelization/WarpXComm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 1c8c37cad..e24dd772c 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -688,7 +688,7 @@ WarpX::AddRhoFromFineLevelandSumBoundary(int lev, int icomp, int ncomp) } else if (charge_buf[lev+1]) // but no filter { - MultiFab::Copy(*charge_buf[lev+1], + MultiFab::Add(*charge_buf[lev+1], *rho_cp[lev+1], icomp, icomp, ncomp, rho_cp[lev+1]->nGrow()); mf.ParallelAdd(*charge_buf[lev+1], icomp, 0, -- cgit v1.2.3 From 22e42395271564fbca74a5a4da0b13b265cede61 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Mon, 16 Sep 2019 19:06:03 -0700 Subject: implement the energy conserving way of interploating from coarse to fine for auxilary data and remove the high-order divB-preserving option. --- Source/Parallelization/Make.package | 1 + Source/Parallelization/WarpXComm.cpp | 147 +++++++++++--------------------- Source/Parallelization/WarpXComm_K.H | 161 +++++++++++++++++++++++++++++++++++ 3 files changed, 213 insertions(+), 96 deletions(-) create mode 100644 Source/Parallelization/WarpXComm_K.H (limited to 'Source/Parallelization/WarpXComm.cpp') diff --git a/Source/Parallelization/Make.package b/Source/Parallelization/Make.package index 3d1fcf1da..c74583522 100644 --- a/Source/Parallelization/Make.package +++ b/Source/Parallelization/Make.package @@ -1,6 +1,7 @@ CEXE_sources += WarpXComm.cpp CEXE_sources += WarpXRegrid.cpp CEXE_headers += WarpXSumGuardCells.H +CEXE_headers += WarpXComm_K.H INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Parallelization VPATH_LOCATIONS += $(WARPX_HOME)/Source/Parallelization diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index e24dd772c..990d0f988 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -1,9 +1,8 @@ +#include #include #include #include -#include - #include #include @@ -52,8 +51,6 @@ WarpX::UpdateAuxilaryData () { BL_PROFILE("UpdateAuxilaryData()"); - const int use_limiter = 0; - for (int lev = 1; lev <= finest_level; ++lev) { const auto& crse_period = Geom(lev-1).periodicity(); @@ -81,57 +78,37 @@ WarpX::UpdateAuxilaryData () MultiFab::Subtract(dBy, *Bfield_cp[lev][1], 0, 0, Bfield_cp[lev][1]->nComp(), ng); MultiFab::Subtract(dBz, *Bfield_cp[lev][2], 0, 0, Bfield_cp[lev][2]->nComp(), ng); - const Real* dx = Geom(lev-1).CellSize(); const int refinement_ratio = refRatio(lev-1)[0]; + AMREX_ALWAYS_ASSERT(refinement_ratio == 2); + #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif + for (MFIter mfi(*Bfield_aux[lev][0]); mfi.isValid(); ++mfi) { - std::array bfab; - for (MFIter mfi(*Bfield_aux[lev][0]); mfi.isValid(); ++mfi) + Array4 const& bx_aux = Bfield_aux[lev][0]->array(mfi); + Array4 const& by_aux = Bfield_aux[lev][1]->array(mfi); + Array4 const& bz_aux = Bfield_aux[lev][2]->array(mfi); + Array4 const& bx_fp = Bfield_fp[lev][0]->const_array(mfi); + Array4 const& by_fp = Bfield_fp[lev][1]->const_array(mfi); + Array4 const& bz_fp = Bfield_fp[lev][2]->const_array(mfi); + Array4 const& bx_c = dBx.const_array(mfi); + Array4 const& by_c = dBy.const_array(mfi); + Array4 const& bz_c = dBz.const_array(mfi); + + amrex::ParallelFor(Box(bx_aux), Box(by_aux), Box(bz_aux), + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept { - Box ccbx = mfi.fabbox(); - ccbx.enclosedCells(); - ccbx.coarsen(refinement_ratio).refine(refinement_ratio); // so that ccbx is coarsenable - - const FArrayBox& cxfab = dBx[mfi]; - const FArrayBox& cyfab = dBy[mfi]; - const FArrayBox& czfab = dBz[mfi]; - bfab[0].resize(amrex::convert(ccbx,Bx_nodal_flag)); - bfab[1].resize(amrex::convert(ccbx,By_nodal_flag)); - bfab[2].resize(amrex::convert(ccbx,Bz_nodal_flag)); - -#if (AMREX_SPACEDIM == 3) - amrex_interp_div_free_bfield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(bfab[0]), - BL_TO_FORTRAN_ANYD(bfab[1]), - BL_TO_FORTRAN_ANYD(bfab[2]), - BL_TO_FORTRAN_ANYD(cxfab), - BL_TO_FORTRAN_ANYD(cyfab), - BL_TO_FORTRAN_ANYD(czfab), - dx, &refinement_ratio,&use_limiter); -#else - amrex_interp_div_free_bfield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(bfab[0]), - BL_TO_FORTRAN_ANYD(bfab[2]), - BL_TO_FORTRAN_ANYD(cxfab), - BL_TO_FORTRAN_ANYD(czfab), - dx, &refinement_ratio,&use_limiter); - amrex_interp_cc_bfield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(bfab[1]), - BL_TO_FORTRAN_ANYD(cyfab), - &refinement_ratio,&use_limiter); -#endif - - for (int idim = 0; idim < 3; ++idim) - { - FArrayBox& aux = (*Bfield_aux[lev][idim])[mfi]; - FArrayBox& fp = (*Bfield_fp[lev][idim])[mfi]; - const Box& bx = aux.box(); - aux.copy(fp, bx, 0, bx, 0, 1); - aux.plus(bfab[idim], bx, bx, 0, 0, 1); - } - } + warpx_interp_bfield_x(j,k,l, bx_aux, bx_fp, bx_c); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_bfield_y(j,k,l, by_aux, by_fp, by_c); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_bfield_z(j,k,l, bz_aux, bz_fp, bz_c); + }); } } @@ -156,56 +133,34 @@ WarpX::UpdateAuxilaryData () MultiFab::Subtract(dEy, *Efield_cp[lev][1], 0, 0, Efield_cp[lev][1]->nComp(), ng); MultiFab::Subtract(dEz, *Efield_cp[lev][2], 0, 0, Efield_cp[lev][2]->nComp(), ng); - const int refinement_ratio = refRatio(lev-1)[0]; #ifdef _OPEMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif + for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi) { - std::array efab; - for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi) + Array4 const& ex_aux = Efield_aux[lev][0]->array(mfi); + Array4 const& ey_aux = Efield_aux[lev][1]->array(mfi); + Array4 const& ez_aux = Efield_aux[lev][2]->array(mfi); + Array4 const& ex_fp = Efield_fp[lev][0]->const_array(mfi); + Array4 const& ey_fp = Efield_fp[lev][1]->const_array(mfi); + Array4 const& ez_fp = Efield_fp[lev][2]->const_array(mfi); + Array4 const& ex_c = dEx.const_array(mfi); + Array4 const& ey_c = dEy.const_array(mfi); + Array4 const& ez_c = dEz.const_array(mfi); + + amrex::ParallelFor(Box(ex_aux), Box(ey_aux), Box(ez_aux), + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept { - Box ccbx = mfi.fabbox(); - ccbx.enclosedCells(); - ccbx.coarsen(refinement_ratio).refine(refinement_ratio); // so that ccbx is coarsenable - - const FArrayBox& cxfab = dEx[mfi]; - const FArrayBox& cyfab = dEy[mfi]; - const FArrayBox& czfab = dEz[mfi]; - efab[0].resize(amrex::convert(ccbx,Ex_nodal_flag)); - efab[1].resize(amrex::convert(ccbx,Ey_nodal_flag)); - efab[2].resize(amrex::convert(ccbx,Ez_nodal_flag)); - -#if (AMREX_SPACEDIM == 3) - amrex_interp_efield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(efab[0]), - BL_TO_FORTRAN_ANYD(efab[1]), - BL_TO_FORTRAN_ANYD(efab[2]), - BL_TO_FORTRAN_ANYD(cxfab), - BL_TO_FORTRAN_ANYD(cyfab), - BL_TO_FORTRAN_ANYD(czfab), - &refinement_ratio,&use_limiter); -#else - amrex_interp_efield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(efab[0]), - BL_TO_FORTRAN_ANYD(efab[2]), - BL_TO_FORTRAN_ANYD(cxfab), - BL_TO_FORTRAN_ANYD(czfab), - &refinement_ratio,&use_limiter); - amrex_interp_nd_efield(ccbx.loVect(), ccbx.hiVect(), - BL_TO_FORTRAN_ANYD(efab[1]), - BL_TO_FORTRAN_ANYD(cyfab), - &refinement_ratio); -#endif - - for (int idim = 0; idim < 3; ++idim) - { - FArrayBox& aux = (*Efield_aux[lev][idim])[mfi]; - FArrayBox& fp = (*Efield_fp[lev][idim])[mfi]; - const Box& bx = aux.box(); - aux.copy(fp, bx, 0, bx, 0, Efield_fp[lev][idim]->nComp()); - aux.plus(efab[idim], bx, bx, 0, 0, Efield_fp[lev][idim]->nComp()); - } - } + warpx_interp_efield_x(j,k,l, ex_aux, ex_fp, ex_c); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_efield_y(j,k,l, ey_aux, ey_fp, ey_c); + }, + [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept + { + warpx_interp_efield_z(j,k,l, ez_aux, ez_fp, ez_c); + }); } } } diff --git a/Source/Parallelization/WarpXComm_K.H b/Source/Parallelization/WarpXComm_K.H new file mode 100644 index 000000000..093323ec3 --- /dev/null +++ b/Source/Parallelization/WarpXComm_K.H @@ -0,0 +1,161 @@ +#ifndef WARPX_COMM_K_H_ +#define WARPX_COMM_K_H_ + +#include + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_bfield_x (int j, int k, int l, + amrex::Array4 const& Bxa, + amrex::Array4 const& Bxf, + amrex::Array4 const& Bxc) +{ + using namespace amrex; + + int lg = amrex::coarsen(l,2); + int kg = amrex::coarsen(k,2); + int jg = amrex::coarsen(j,2); + + Real wx = (j == jg*2) ? 0.0 : 0.5; + Real owx = 1.0-wx; + Bxa(j,k,l) = owx * Bxc(jg,kg,lg) + wx * Bxc(jg+1,kg,lg) + Bxf(j,k,l); +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_bfield_y (int j, int k, int l, + amrex::Array4 const& Bya, + amrex::Array4 const& Byf, + amrex::Array4 const& Byc) +{ + using namespace amrex; + + int lg = amrex::coarsen(l,2); + int kg = amrex::coarsen(k,2); + int jg = amrex::coarsen(j,2); + + // Note that for 2d, l=0, because the amrex convention is used here. + +#if (AMREX_SPACEDIM == 3) + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + Bya(j,k,l) = owy * Byc(jg,kg,lg) + wy * Byc(jg,kg+1,lg) + Byf(j,k,l); +#else + Bya(j,k,l) = Byc(jg,kg,lg) + Byf(j,k,l); +#endif +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_bfield_z (int j, int k, int l, + amrex::Array4 const& Bza, + amrex::Array4 const& Bzf, + amrex::Array4 const& Bzc) +{ + using namespace amrex; + + int lg = amrex::coarsen(l,2); + int kg = amrex::coarsen(k,2); + int jg = amrex::coarsen(j,2); + + // Note that for 2d, l=0, because the amrex convention is used here. + +#if (AMREX_SPACEDIM == 3) + Real wz = (l == lg*2) ? 0.0 : 0.5; + Real owz = 1.0-wz; + Bza(j,k,l) = owz * Bzc(jg,kg,lg) + owz * Bzc(jg,kg,lg+1) + Bzf(j,k,l); +#else + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + Bza(j,k,l) = owy * Bzc(jg,kg,lg) + owy * Bzc(jg,kg+1,lg) + Bzf(j,k,l); +#endif +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_efield_x (int j, int k, int l, + amrex::Array4 const& Exa, + amrex::Array4 const& Exf, + amrex::Array4 const& Exc) +{ + using namespace amrex; + + int lg = amrex::coarsen(l,2); + int kg = amrex::coarsen(k,2); + int jg = amrex::coarsen(j,2); + + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + +#if (AMREX_SPACEDIM == 3) + Real wz = (l == lg*2) ? 0.0 : 0.5; + Real owz = 1.0-wz; + Exa(j,k,l) = owy * owz * Exc(jg ,kg ,lg ) + + wy * owz * Exc(jg ,kg+1,lg ) + + owy * wz * Exc(jg ,kg ,lg+1) + + wy * wz * Exc(jg ,kg+1,lg+1) + + Exf(j,k,l); +#else + Exa(j,k,l) = owy * Exc(jg,kg,lg) + wy * Exc(jg,kg+1,lg) + Exf(j,k,l); +#endif +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_efield_y (int j, int k, int l, + amrex::Array4 const& Eya, + amrex::Array4 const& Eyf, + amrex::Array4 const& Eyc) +{ + using namespace amrex; + + int lg = amrex::coarsen(l,2); + int kg = amrex::coarsen(k,2); + int jg = amrex::coarsen(j,2); + + Real wx = (j == jg*2) ? 0.0 : 0.5; + Real owx = 1.0-wx; + +#if (AMREX_SPACEDIM == 3) + Real wz = (l == lg*2) ? 0.0 : 0.5; + Real owz = 1.0-wz; + Eya(j,k,l) = owx * owz * Eyc(jg ,kg ,lg ) + + wx * owz * Eyc(jg+1,kg ,lg ) + + owx * wz * Eyc(jg ,kg ,lg+1) + + wx * wz * Eyc(jg+1,kg ,lg+1) + + Eyf(j,k,l); +#else + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + Eya(j,k,l) = owx * owy * Eyc(jg ,kg ,lg) + + wx * owy * Eyc(jg+1,kg ,lg) + + owx * wy * Eyc(jg ,kg+1,lg) + + wx * wy * Eyc(jg+1,kg+1,lg) + + Eyf(j,k,l); +#endif +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +void warpx_interp_efield_z (int j, int k, int l, + amrex::Array4 const& Eza, + amrex::Array4 const& Ezf, + amrex::Array4 const& Ezc) +{ + using namespace amrex; + + int lg = amrex::coarsen(l,2); + int kg = amrex::coarsen(k,2); + int jg = amrex::coarsen(j,2); + + Real wx = (j == jg*2) ? 0.0 : 0.5; + Real owx = 1.0-wx; + +#if (AMREX_SPACEDIM == 3) + Real wy = (k == kg*2) ? 0.0 : 0.5; + Real owy = 1.0-wy; + Eza(j,k,l) = owx * owy * Ezc(jg ,kg ,lg ) + + wx * owy * Ezc(jg+1,kg ,lg ) + + owx * wy * Ezc(jg ,kg+1,lg ) + + wx * wy * Ezc(jg+1,kg+1,lg ) + + Ezf(j,k,l); +#else + Eza(j,k,l) = owx * Ezc(jg,kg,lg) + wx * Ezc(jg+1,kg,lg) + Ezf(j,k,l); +#endif +} + +#endif -- cgit v1.2.3