diff options
author | 2019-04-17 11:04:14 -0700 | |
---|---|---|
committer | 2019-04-17 11:04:14 -0700 | |
commit | 142a17cc95229e355804116b018443a85374f24b (patch) | |
tree | 997075fefcbc2c15aa31c0e93a6a38b98d0cc1ac /Source/Utils/WarpXMovingWindow.cpp | |
parent | 60793d80ea5d7c33dcfe681549c0986322b8004a (diff) | |
download | WarpX-142a17cc95229e355804116b018443a85374f24b.tar.gz WarpX-142a17cc95229e355804116b018443a85374f24b.tar.zst WarpX-142a17cc95229e355804116b018443a85374f24b.zip |
optimize shiftMF for gpu
Diffstat (limited to 'Source/Utils/WarpXMovingWindow.cpp')
-rw-r--r-- | Source/Utils/WarpXMovingWindow.cpp | 48 |
1 files changed, 27 insertions, 21 deletions
diff --git a/Source/Utils/WarpXMovingWindow.cpp b/Source/Utils/WarpXMovingWindow.cpp index 908c70573..05e171f22 100644 --- a/Source/Utils/WarpXMovingWindow.cpp +++ b/Source/Utils/WarpXMovingWindow.cpp @@ -189,7 +189,7 @@ WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir) AMREX_ALWAYS_ASSERT(ng.min() >= num_shift); - MultiFab tmpmf(ba, dm, nc, ng); + MultiFab tmpmf(ba, dm, nc, ng, MFInfo().SetDeviceFab(false)); MultiFab::Copy(tmpmf, mf, 0, 0, nc, ng); tmpmf.FillBoundary(geom.periodicity()); @@ -217,29 +217,35 @@ WarpX::shiftMF (MultiFab& mf, const Geometry& geom, int num_shift, int dir) } } + IntVect shiftiv(0); + shiftiv[dir] = num_shift; + Dim3 shift = shiftiv.dim3(); + #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel if (Gpu::notInLaunchRegion()) #endif for (MFIter mfi(tmpmf); mfi.isValid(); ++mfi ) { - FArrayBox* dstfab = mf.fabPtr(mfi); - FArrayBox* srcfab = tmpmf.fabPtr(mfi); - Box outbox = mfi.fabbox(); - outbox &= adjBox; - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(outbox, toutbox, - { - srcfab->setVal(0.0, toutbox, 0, nc); - }); - Box dstBox = mf[mfi].box(); - if (num_shift > 0) { - dstBox.growHi(dir, -num_shift); - } else { - dstBox.growLo(dir, num_shift); - } - AMREX_LAUNCH_HOST_DEVICE_LAMBDA(dstBox, tdstBox, - { - dstfab->setVal(0.0, tdstBox, 0, nc); - dstfab->copy(*srcfab, amrex::shift(tdstBox,dir,num_shift), 0, tdstBox, 0, nc); - }); + auto const& dstfab = mf.array(mfi); + auto const& srcfab = tmpmf.array(mfi); + + const Box& outbox = mfi.fabbox() & adjBox; + if (outbox.ok()) { + AMREX_PARALLEL_FOR_4D ( outbox, nc, i, j, k, n, + { + srcfab(i,j,k,n) = 0.0; + }); + } + + Box dstBox = mf[mfi].box(); + if (num_shift > 0) { + dstBox.growHi(dir, -num_shift); + } else { + dstBox.growLo(dir, num_shift); + } + AMREX_PARALLEL_FOR_4D ( dstBox, nc, i, j, k, n, + { + dstfab(i,j,k,n) = srcfab(i+shift.x,j+shift.y,k+shift.z,n); + }); } } |