From 69b0a98b3e8495eb2eca4269533516d66f3ff6f3 Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <weiqunzhang@lbl.gov>
Date: Fri, 27 Sep 2019 11:41:49 -0700
Subject: nodal auxilary data

---
 Source/Parallelization/WarpXComm.cpp | 151 +++++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)

(limited to 'Source/Parallelization/WarpXComm.cpp')
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index 990d0f988..e67acec22 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -51,6 +51,157 @@ WarpX::UpdateAuxilaryData ()
 {
     BL_PROFILE("UpdateAuxilaryData()");
 
+    if (Bfield_aux[0][0]->is_nodal()) {
+        UpdateNodalAuxilaryData();
+    } else {
+        UpdateStagAuxilaryData();
+    }
+}
+
+void
+WarpX::UpdateNodalAuxilaryData ()
+{
+    // For level 0, we only need to do the average.
+#ifdef _OPENMP
+#pragma omp parallel if (Gpu::notInLaunchRegion())
+#endif
+    for (MFIter mfi(*Bfield_aux[0][0]); mfi.isValid(); ++mfi)
+    {
+        Array4<Real> const& bx_aux = Bfield_aux[0][0]->array(mfi);
+        Array4<Real> const& by_aux = Bfield_aux[0][1]->array(mfi);
+        Array4<Real> const& bz_aux = Bfield_aux[0][2]->array(mfi);
+        Array4<Real const> const& bx_fp = Bfield_fp[0][0]->const_array(mfi);
+        Array4<Real const> const& by_fp = Bfield_fp[0][1]->const_array(mfi);
+        Array4<Real const> const& bz_fp = Bfield_fp[0][2]->const_array(mfi);
+
+        Array4<Real> const& ex_aux = Efield_aux[0][0]->array(mfi);
+        Array4<Real> const& ey_aux = Efield_aux[0][1]->array(mfi);
+        Array4<Real> const& ez_aux = Efield_aux[0][2]->array(mfi);
+        Array4<Real const> const& ex_fp = Efield_fp[0][0]->const_array(mfi);
+        Array4<Real const> const& ey_fp = Efield_fp[0][1]->const_array(mfi);
+        Array4<Real const> const& ez_fp = Efield_fp[0][2]->const_array(mfi);
+
+        const Box& bx = mfi.fabbox();
+        amrex::ParallelFor(bx,
+        [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept
+        {
+            warpx_interp_nd_bfield_x(j,k,l, bx_aux, bx_fp);
+            warpx_interp_nd_bfield_y(j,k,l, by_aux, by_fp);
+            warpx_interp_nd_bfield_z(j,k,l, bz_aux, bz_fp);
+            warpx_interp_nd_efield_x(j,k,l, ex_aux, ex_fp);
+            warpx_interp_nd_efield_y(j,k,l, ey_aux, ey_fp);
+            warpx_interp_nd_efield_z(j,k,l, ez_aux, ez_fp);
+        });
+    }
+
+    for (int lev = 1; lev <= finest_level; ++lev)
+    {
+        BoxArray const& nba = Bfield_aux[lev][0]->boxArray();
+        BoxArray const& cnba = amrex::coarsen(nba,2);
+        DistributionMapping const& dm = Bfield_aux[lev][0]->DistributionMap();
+        auto const& cperiod = Geom(lev-1).periodicity();
+
+        // Bfield
+        {
+            Array<std::unique_ptr<MultiFab>,3> Btmp;
+            if (Bfield_cax[lev][0]) {
+                for (int i = 0; i < 3; ++i) {
+                    Btmp[i].reset(new MultiFab(*Bfield_cax[lev][i], amrex::make_alias, 0, 1));
+                }
+            } else {
+                IntVect ngtmp = Bfield_aux[lev-1][0]->nGrowVect();
+                for (int i = 0; i < 3; ++i) {
+                    Btmp[i].reset(new MultiFab(cnba, dm, 1, ngtmp));
+                }
+            }
+            // ParallelCopy from coarse level
+            for (int i = 0; i < 3; ++i) {
+                IntVect ng = Btmp[i]->nGrowVect();
+                Btmp[i]->ParallelCopy(*Bfield_aux[lev-1][i], 0, 0, 1, ng, ng, cperiod);
+            }
+
+#ifdef _OPENMP
+#pragma omp parallel if (Gpu::notInLaunchRegion())
+#endif
+            for (MFIter mfi(*Bfield_aux[lev][0]); mfi.isValid(); ++mfi)
+            {
+                Array4<Real> const& bx_aux = Bfield_aux[lev][0]->array(mfi);
+                Array4<Real> const& by_aux = Bfield_aux[lev][1]->array(mfi);
+                Array4<Real> const& bz_aux = Bfield_aux[lev][2]->array(mfi);
+                Array4<Real const> const& bx_fp = Bfield_fp[lev][0]->const_array(mfi);
+                Array4<Real const> const& by_fp = Bfield_fp[lev][1]->const_array(mfi);
+                Array4<Real const> const& bz_fp = Bfield_fp[lev][2]->const_array(mfi);
+                Array4<Real const> const& bx_cp = Bfield_cp[lev][0]->const_array(mfi);
+                Array4<Real const> const& by_cp = Bfield_cp[lev][1]->const_array(mfi);
+                Array4<Real const> const& bz_cp = Bfield_cp[lev][2]->const_array(mfi);
+                Array4<Real const> const& bx_c = Btmp[0]->const_array(mfi);
+                Array4<Real const> const& by_c = Btmp[1]->const_array(mfi);
+                Array4<Real const> const& bz_c = Btmp[2]->const_array(mfi);
+
+                const Box& bx = mfi.fabbox();
+                amrex::ParallelFor(bx,
+                [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept
+                {
+                    warpx_interp_nd_bfield_x(j,k,l, bx_aux, bx_fp, bx_cp, bx_c);
+                    warpx_interp_nd_bfield_y(j,k,l, by_aux, by_fp, by_cp, by_c);
+                    warpx_interp_nd_bfield_z(j,k,l, bz_aux, bz_fp, bz_cp, bz_c);
+                });
+            }
+        }
+
+        // Efield
+        {
+            Array<std::unique_ptr<MultiFab>,3> Etmp;
+            if (Efield_cax[lev][0]) {
+                for (int i = 0; i < 3; ++i) {
+                    Etmp[i].reset(new MultiFab(*Efield_cax[lev][i], amrex::make_alias, 0, 1));
+                }
+            } else {
+                IntVect ngtmp = Efield_aux[lev-1][0]->nGrowVect();
+                for (int i = 0; i < 3; ++i) {
+                    Etmp[i].reset(new MultiFab(cnba, dm, 1, ngtmp));
+                }
+            }
+            // ParallelCopy from coarse level
+            for (int i = 0; i < 3; ++i) {
+                IntVect ng = Etmp[i]->nGrowVect();
+                Etmp[i]->ParallelCopy(*Efield_aux[lev-1][i], 0, 0, 1, ng, ng, cperiod);
+            }
+
+#ifdef _OPENMP
+#pragma omp parallel if (Gpu::notInLaunchRegion())
+#endif
+            for (MFIter mfi(*Efield_aux[lev][0]); mfi.isValid(); ++mfi)
+            {
+                Array4<Real> const& ex_aux = Efield_aux[lev][0]->array(mfi);
+                Array4<Real> const& ey_aux = Efield_aux[lev][1]->array(mfi);
+                Array4<Real> const& ez_aux = Efield_aux[lev][2]->array(mfi);
+                Array4<Real const> const& ex_fp = Efield_fp[lev][0]->const_array(mfi);
+                Array4<Real const> const& ey_fp = Efield_fp[lev][1]->const_array(mfi);
+                Array4<Real const> const& ez_fp = Efield_fp[lev][2]->const_array(mfi);
+                Array4<Real const> const& ex_cp = Efield_cp[lev][0]->const_array(mfi);
+                Array4<Real const> const& ey_cp = Efield_cp[lev][1]->const_array(mfi);
+                Array4<Real const> const& ez_cp = Efield_cp[lev][2]->const_array(mfi);
+                Array4<Real const> const& ex_c = Etmp[0]->const_array(mfi);
+                Array4<Real const> const& ey_c = Etmp[1]->const_array(mfi);
+                Array4<Real const> const& ez_c = Etmp[2]->const_array(mfi);
+
+                const Box& bx = mfi.fabbox();
+                amrex::ParallelFor(bx,
+                [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept
+                {
+                    warpx_interp_nd_efield_x(j,k,l, ex_aux, ex_fp, ex_cp, ex_c);
+                    warpx_interp_nd_efield_y(j,k,l, ey_aux, ey_fp, ey_cp, ey_c);
+                    warpx_interp_nd_efield_z(j,k,l, ez_aux, ez_fp, ez_cp, ez_c);
+                });
+            }
+        }
+    }
+}
+
+void
+WarpX::UpdateStagAuxilaryData ()
+{
     for (int lev = 1; lev <= finest_level; ++lev)
     {
         const auto& crse_period = Geom(lev-1).periodicity();
-- 
cgit v1.2.3


From 45b6e243030bb72816f79f55aa60f88f7aa7f7bc Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Thu, 26 Sep 2019 18:24:45 -0700
Subject: Rho Synchronize: Port to GPU

Port the charge density synchronize functions to GPU.
---
 Source/FortranInterface/WarpX_f.F90                |  59 ----------
 Source/FortranInterface/WarpX_f.H                  |   9 --
 .../InterpolateDensityFineToCoarse.H               | 120 +++++++++++++++++++++
 Source/Parallelization/WarpXComm.cpp               |  35 +++---
 Source/WarpX.H                                     |   8 +-
 5 files changed, 143 insertions(+), 88 deletions(-)
 create mode 100644 Source/Parallelization/InterpolateDensityFineToCoarse.H

(limited to 'Source/Parallelization/WarpXComm.cpp')

diff --git a/Source/FortranInterface/WarpX_f.F90 b/Source/FortranInterface/WarpX_f.F90
index 2ddf9409d..762ed2cd8 100644
--- a/Source/FortranInterface/WarpX_f.F90
+++ b/Source/FortranInterface/WarpX_f.F90
@@ -51,65 +51,6 @@ contains
 
   end subroutine warpx_compute_E
 
-  subroutine warpx_sync_rho_2d (lo, hi, crse, clo, chi, fine, flo, fhi, nc) &
-       bind(c, name='warpx_sync_rho_2d')
-    integer, intent(in) :: lo(2), hi(2), flo(2), fhi(2), clo(2), chi(2), nc
-    real(amrex_real), intent(in   ) :: fine(flo(1):fhi(1),flo(2):fhi(2),nc)
-    real(amrex_real), intent(inout) :: crse(clo(1):chi(1),clo(2):chi(2),nc)
-
-    integer :: i,j,ii,jj,m
-
-    do m = 1, nc
-       do j = lo(2), hi(2)
-          jj = j*2
-          do i = lo(1), hi(1)
-             ii = i*2
-             crse(i,j,m) = 0.25d0 * &
-                  ( fine(ii,jj,m) + 0.5d0 *(fine(ii-1,jj  ,m)+fine(ii+1,jj  ,m) &
-                  &                       + fine(ii  ,jj-1,m)+fine(ii  ,jj+1,m)) &
-                  &               + 0.25d0*(fine(ii-1,jj-1,m)+fine(ii+1,jj-1,m) &
-                  &                       + fine(ii-1,jj+1,m)+fine(ii+1,jj+1,m)) )
-          end do
-       end do
-    end do
-  end subroutine warpx_sync_rho_2d
-
-  subroutine warpx_sync_rho_3d (lo, hi, crse, clo, chi, fine, flo, fhi, nc) &
-       bind(c, name='warpx_sync_rho_3d')
-    integer, intent(in) :: lo(3), hi(3), flo(3), fhi(3), clo(3), chi(3), nc
-    real(amrex_real), intent(in   ) :: fine(flo(1):fhi(1),flo(2):fhi(2),flo(3):fhi(3),nc)
-    real(amrex_real), intent(inout) :: crse(clo(1):chi(1),clo(2):chi(2),clo(3):chi(3),nc)
-
-    integer :: i,j,k,ii,jj,kk,m
-
-    do m = 1, nc
-       do k = lo(3), hi(3)
-          kk = k*2
-          do j = lo(2), hi(2)
-             jj = j*2
-             do i = lo(1), hi(1)
-                ii = i*2
-                crse(i,j,k,m) = 0.125d0 * &
-                     (fine(ii,jj,kk,m) + 0.5d0  *(fine(ii-1,jj  ,kk  ,m)+fine(ii+1,jj  ,kk  ,m) &
-                     &                          + fine(ii  ,jj-1,kk  ,m)+fine(ii  ,jj+1,kk  ,m) &
-                     &                          + fine(ii  ,jj  ,kk-1,m)+fine(ii  ,jj  ,kk+1,m)) &
-                     &                 + 0.25d0 *(fine(ii-1,jj-1,kk  ,m)+fine(ii+1,jj-1,kk  ,m) &
-                     &                          + fine(ii-1,jj+1,kk  ,m)+fine(ii+1,jj+1,kk  ,m) &
-                     &                          + fine(ii-1,jj  ,kk-1,m)+fine(ii+1,jj  ,kk-1,m) &
-                     &                          + fine(ii-1,jj  ,kk+1,m)+fine(ii+1,jj  ,kk+1,m) &
-                     &                          + fine(ii  ,jj-1,kk-1,m)+fine(ii  ,jj+1,kk-1,m) &
-                     &                          + fine(ii  ,jj-1,kk+1,m)+fine(ii  ,jj+1,kk+1,m)) &
-                     &                 + 0.125d0*(fine(ii-1,jj-1,kk-1,m)+fine(ii-1,jj-1,kk+1,m) &
-                     &                          + fine(ii-1,jj+1,kk-1,m)+fine(ii-1,jj+1,kk+1,m) &
-                     &                          + fine(ii+1,jj-1,kk-1,m)+fine(ii+1,jj-1,kk+1,m) &
-                     &                          + fine(ii+1,jj+1,kk-1,m)+fine(ii+1,jj+1,kk+1,m)))
-             end do
-          end do
-       end do
-    end do
-
-  end subroutine warpx_sync_rho_3d
-
   subroutine warpx_build_buffer_masks (lo, hi, msk, mlo, mhi, gmsk, glo, ghi, ng) &
        bind(c, name='warpx_build_buffer_masks')
     integer, dimension(3), intent(in) :: lo, hi, mlo, mhi, glo, ghi
diff --git a/Source/FortranInterface/WarpX_f.H b/Source/FortranInterface/WarpX_f.H
index 2d5d346e2..b51a83387 100644
--- a/Source/FortranInterface/WarpX_f.H
+++ b/Source/FortranInterface/WarpX_f.H
@@ -16,8 +16,6 @@
 
 #if (AMREX_SPACEDIM == 3)
 
-#define WRPX_SYNC_RHO                    warpx_sync_rho_3d
-
 #define WRPX_SUM_FINE_TO_CRSE_NODAL      warpx_sum_fine_to_crse_nodal_3d
 #define WRPX_ZERO_OUT_BNDRY              warpx_zero_out_bndry_3d
 #define WRPX_BUILD_MASK                  warpx_build_mask_3d
@@ -30,8 +28,6 @@
 
 #elif (AMREX_SPACEDIM == 2)
 
-#define WRPX_SYNC_RHO                    warpx_sync_rho_2d
-
 #define WRPX_SUM_FINE_TO_CRSE_NODAL      warpx_sum_fine_to_crse_nodal_2d
 #define WRPX_ZERO_OUT_BNDRY              warpx_zero_out_bndry_2d
 #define WRPX_BUILD_MASK                  warpx_build_mask_2d
@@ -229,11 +225,6 @@ extern "C"
 #endif
                           const amrex::Real* sigbz, int sigbz_lo, int sigbz_hi);
 
-    void WRPX_SYNC_RHO (const int* lo, const int* hi,
-                        BL_FORT_FAB_ARG_ANYD(crse),
-                        const BL_FORT_FAB_ARG_ANYD(fine),
-                        const int* ncomp);
-
 #ifdef WARPX_USE_PSATD
     void warpx_fft_mpi_init (int fcomm);
     void warpx_fft_domain_decomp (int* warpx_local_nz, int* warpx_local_z0,
diff --git a/Source/Parallelization/InterpolateDensityFineToCoarse.H b/Source/Parallelization/InterpolateDensityFineToCoarse.H
new file mode 100644
index 000000000..947db2aac
--- /dev/null
+++ b/Source/Parallelization/InterpolateDensityFineToCoarse.H
@@ -0,0 +1,120 @@
+/* Copyright 2019 Axel Huebl, Weiqun Zhang
+ *
+ * This file is part of WarpX.
+ *
+ * License: BSD-3-Clause-LBNL
+ */
+
+#ifndef INTERPOLATE_DENSITY_FINE_TO_COARSE_H
+#define INTERPOLATE_DENSITY_FINE_TO_COARSE_H
+
+#include <AMReX_Array4.H>
+#include <AMReX_REAL.H>
+#include <AMReX_BLassert.H>
+#include <AMReX_Extension.H>
+#include <AMReX_GpuQualifiers.H>
+
+#include <utility> // std::move
+
+
+/** Fill a charge density (rho) coarse patch with averaged values from a fine patch
+ *
+ * Fills the values of the charge density on the coarse patch
+ * by averaging the values of the charge density of the fine patch.
+ */
+class InterpolateDensityFineToCoarse
+{
+public:
+    /** Construct with fine and coarse patch and their refinement ratio
+     *
+     * @param[in]     fine read-only fine patch
+     * @param[in,out] coarse overwritten coarse patch
+     * @param[in]     refinement_ratio ratio between coarse and fine patch granularity
+     *                (currently, only a value of is implemented)
+     * @param[in]     number_of_components the number of components
+     */
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    InterpolateDensityFineToCoarse(
+        amrex::Array4<amrex::Real const> const fine,
+        amrex::Array4<amrex::Real      > const coarse,
+        int const refinement_ratio,
+        int const number_of_components
+    ) : m_fine(std::move(fine)),
+        m_coarse(std::move(coarse)),
+        m_refinement_ratio(std::move(refinement_ratio)),
+        m_number_of_components(std::move(number_of_components))
+    {
+        //! @note constants and stencils in operator() implementation assume 2x refinement
+        BL_ASSERT(refinement_ratio == 2);
+    }
+
+    AMREX_GPU_DEVICE AMREX_FORCE_INLINE
+    void
+    operator()(
+        int const i,
+        int const j,
+        int const k
+    ) const noexcept
+    {
+        using namespace amrex;
+
+        auto const & fine_unsafe = m_fine;   // out-of-bounds access not secured with zero-values yet
+        auto const & coarse      = m_coarse; // out-of-bounds access not secured but will also not occur
+
+        // return zero for out-of-bounds accesses during interpolation
+        // this is efficiently used as a method to add neutral elements beyond guards in the average below
+        auto const fine = [fine_unsafe] AMREX_GPU_DEVICE (int const j, int const k, int const l, int const m) noexcept
+        {
+            return fine_unsafe.contains(j, k, l) ? fine_unsafe(j, k, l, m) :  amrex::Real{0.};
+        };
+
+        int const ii = i * m_refinement_ratio;
+        int const jj = j * m_refinement_ratio;
+        int const kk = k * m_refinement_ratio;
+        for( int m = 0; m < m_number_of_components; ++m ) {
+#if AMREX_SPACEDIM == 2
+            coarse(i,j,kk,m) = 0.25_rt * (
+                fine(ii,jj,kk,m) +
+                0.5_rt  * (
+                    fine(ii-1,jj  ,kk,m) + fine(ii+1,jj  ,kk,m) +
+                    fine(ii  ,jj-1,kk,m) + fine(ii  ,jj+1,kk,m)
+                ) +
+                0.25_rt * (
+                    fine(ii-1,jj-1,kk,m) + fine(ii+1,jj-1,kk,m) +
+                    fine(ii-1,jj+1,kk,m) + fine(ii+1,jj+1,kk,m)
+                )
+            );
+#elif AMREX_SPACEDIM == 3
+            coarse(i,j,k,m) = 0.125_rt * (
+                fine(ii,jj,kk,m) +
+                0.5_rt   * (
+                    fine(ii-1,jj  ,kk  ,m) + fine(ii+1,jj  ,kk  ,m) +
+                    fine(ii  ,jj-1,kk  ,m) + fine(ii  ,jj+1,kk  ,m) +
+                    fine(ii  ,jj  ,kk-1,m) + fine(ii  ,jj  ,kk+1,m)
+                ) +
+                0.25_rt  * (
+                    fine(ii-1,jj-1,kk  ,m) + fine(ii+1,jj-1,kk  ,m) +
+                    fine(ii-1,jj+1,kk  ,m) + fine(ii+1,jj+1,kk  ,m) +
+                    fine(ii-1,jj  ,kk-1,m) + fine(ii+1,jj  ,kk-1,m) +
+                    fine(ii-1,jj  ,kk+1,m) + fine(ii+1,jj  ,kk+1,m) +
+                    fine(ii  ,jj-1,kk-1,m) + fine(ii  ,jj+1,kk-1,m) +
+                    fine(ii  ,jj-1,kk+1,m) + fine(ii  ,jj+1,kk+1,m)
+                ) +
+                0.125_rt * (
+                    fine(ii-1,jj-1,kk-1,m) + fine(ii-1,jj-1,kk+1,m) +
+                    fine(ii-1,jj+1,kk-1,m) + fine(ii-1,jj+1,kk+1,m) +
+                    fine(ii+1,jj-1,kk-1,m) + fine(ii+1,jj-1,kk+1,m) +
+                    fine(ii+1,jj+1,kk-1,m) + fine(ii+1,jj+1,kk+1,m)
+                )
+            );
+#endif
+        }
+    }
+private:
+    amrex::Array4< amrex::Real const > m_fine;
+    amrex::Array4< amrex::Real       > m_coarse;
+    int m_refinement_ratio;
+    int m_number_of_components;
+};
+
+#endif // INTERPOLATE_DENSITY_FINE_TO_COARSE_H
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index 4f870e79c..92f0b4f09 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -3,10 +3,12 @@
 #include <WarpX_f.H>
 #include <WarpXSumGuardCells.H>
 #include <Parallelization/InterpolateCurrentFineToCoarse.H>
+#include <Parallelization/InterpolateDensityFineToCoarse.H>
 
 #include <algorithm>
 #include <cstdlib>
 
+
 using namespace amrex;
 
 void
@@ -354,7 +356,7 @@ WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 >
                                         std::array< amrex::MultiFab       *, 3 > const & coarse,
                                         int const refinement_ratio)
 {
-    BL_PROFILE("InterpolateCurrentFineToCoarse()");
+    BL_PROFILE("interpolateCurrentFineToCoarse()");
     BL_ASSERT(refinement_ratio == 2);
     const IntVect& ng = (fine[0]->nGrowVect() + 1) / refinement_ratio; // add equivalent no. of guards to coarse patch
 
@@ -386,6 +388,8 @@ WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 >
 void
 WarpX::SyncRho ()
 {
+    BL_PROFILE("SyncRho()");
+
     if (!rho_fp[0]) return;
     const int ncomp = rho_fp[0]->nComp();
 
@@ -395,7 +399,7 @@ WarpX::SyncRho ()
     {
         rho_cp[lev]->setVal(0.0);
         const IntVect& refinement_ratio = refRatio(lev-1);
-        SyncRho(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]);
+        interpolateDensityFineToCoarse(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]);
     }
 
     // For each level
@@ -408,29 +412,26 @@ WarpX::SyncRho ()
 }
 
 void
-WarpX::SyncRho (const MultiFab& fine, MultiFab& crse, int refinement_ratio)
+WarpX::interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio)
 {
+    BL_PROFILE("interpolateDensityFineToCoarse()");
     BL_ASSERT(refinement_ratio == 2);
-    const IntVect& ng = (fine.nGrowVect()+1)/refinement_ratio;
+    const IntVect& ng = (fine.nGrowVect() + 1) / refinement_ratio;  // add equivalent no. of guards to coarse patch
     const int nc = fine.nComp();
 
 #ifdef _OPEMP
 #pragma omp parallel if (Gpu::notInLaunchRegion())
 #endif
     {
-        FArrayBox ffab;
-        for (MFIter mfi(crse,true); mfi.isValid(); ++mfi)
+        // OMP in-box decomposition of coarse into tilebox
+        for (MFIter mfi(coarse, TilingIfNotGPU()); mfi.isValid(); ++mfi)
         {
-            const Box& bx = mfi.growntilebox(ng);
-            Box fbx = amrex::grow(amrex::refine(bx,refinement_ratio),1);
-            ffab.resize(fbx, nc);
-            fbx &= fine[mfi].box();
-            ffab.setVal(0.0);
-            ffab.copy(fine[mfi], fbx, 0, fbx, 0, nc);
-            WRPX_SYNC_RHO(bx.loVect(), bx.hiVect(),
-                          BL_TO_FORTRAN_ANYD(crse[mfi]),
-                          BL_TO_FORTRAN_ANYD(ffab),
-                          &nc);
+            const Box& bx = mfi.growntilebox(ng); // only grow to outer directions of tileboxes for filling guards
+
+            amrex::ParallelFor(
+                bx,
+                InterpolateDensityFineToCoarse(fine.const_array(mfi), coarse.array(mfi), refinement_ratio, nc)
+            );
         }
     }
 }
@@ -562,7 +563,7 @@ WarpX::RestrictRhoFromFineToCoarsePatch (int lev)
     if (rho_fp[lev]) {
         rho_cp[lev]->setVal(0.0);
         const IntVect& refinement_ratio = refRatio(lev-1);
-        SyncRho(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]);
+        interpolateDensityFineToCoarse(*rho_fp[lev], *rho_cp[lev], refinement_ratio[0]);
     }
 }
 
diff --git a/Source/WarpX.H b/Source/WarpX.H
index c59802427..0da1cf350 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -436,10 +436,12 @@ private:
      *  averaging the values of the charge density of the fine patch (on the same level).
      *
      * \param[in]  fine             fine patches to interpolate from
-     * \param[out] crse             coarse patches to interpolate to
-     * \param[in]  ref_ratio        integer ratio between the two
+     * \param[out] coarse           coarse patches to interpolate to
+     * \param[in]  refinement_ratio integer ratio between the two
      */
-    void SyncRho (const amrex::MultiFab& fine, amrex::MultiFab& crse, int ref_ratio);
+    void interpolateDensityFineToCoarse (const amrex::MultiFab& fine,
+                                               amrex::MultiFab& coarse,
+                                         int const refinement_ratio);
 
     void ExchangeWithPmlB (int lev);
     void ExchangeWithPmlE (int lev);
-- 
cgit v1.2.3


From 6b70e4abd40d70b48bf4a40cd1a7675a77c8a7ff Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <weiqunzhang@lbl.gov>
Date: Fri, 4 Oct 2019 16:38:41 -0700
Subject: fix for cases with warpx.do_nodal=1

---
 Source/Parallelization/WarpXComm.cpp | 10 +++++-----
 Source/WarpX.H                       |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'Source/Parallelization/WarpXComm.cpp')

diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index 6128b27a4..c5a8877d3 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -52,15 +52,15 @@ WarpX::UpdateAuxilaryData ()
 {
     BL_PROFILE("UpdateAuxilaryData()");
 
-    if (Bfield_aux[0][0]->is_nodal()) {
-        UpdateNodalAuxilaryData();
+    if (Bfield_aux[0][0]->ixType() == Bfield_fp[0][0]->ixType()) {
+        UpdateAuxilaryDataSameType();
     } else {
-        UpdateStagAuxilaryData();
+        UpdateAuxilaryDataStagToNodal();
     }
 }
 
 void
-WarpX::UpdateNodalAuxilaryData ()
+WarpX::UpdateAuxilaryDataStagToNodal ()
 {
     // For level 0, we only need to do the average.
 #ifdef _OPENMP
@@ -201,7 +201,7 @@ WarpX::UpdateNodalAuxilaryData ()
 }
 
 void
-WarpX::UpdateStagAuxilaryData ()
+WarpX::UpdateAuxilaryDataSameType ()
 {
     for (int lev = 1; lev <= finest_level; ++lev)
     {
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 561e0533d..dd86dfc14 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -203,8 +203,8 @@ public:
     // This function does aux(lev) = fp(lev) + I(aux(lev-1)-cp(lev)).
     // Caller must make sure fp and cp have ghost cells filled.
     void UpdateAuxilaryData ();
-    void UpdateNodalAuxilaryData ();
-    void UpdateStagAuxilaryData ();
+    void UpdateAuxilaryDataStagToNodal ();
+    void UpdateAuxilaryDataSameType ();
 
     // Fill boundary cells including coarse/fine boundaries
     void FillBoundaryB ();
-- 
cgit v1.2.3


From aea192d6ff5aabfedf09c9f139e831fb5d7bbdd6 Mon Sep 17 00:00:00 2001
From: Remi Lehe <remi.lehe@normalesup.org>
Date: Mon, 11 Nov 2019 20:43:52 -0800
Subject: Update DepositCharge

---
 Source/Parallelization/Make.package         |  1 +
 Source/Parallelization/WarpXComm.cpp        | 14 +++++++-------
 Source/Particles/WarpXParticleContainer.cpp |  9 +++++----
 Source/WarpX.H                              | 23 -----------------------
 4 files changed, 13 insertions(+), 34 deletions(-)

(limited to 'Source/Parallelization/WarpXComm.cpp')

diff --git a/Source/Parallelization/Make.package b/Source/Parallelization/Make.package
index c74583522..7c3c38627 100644
--- a/Source/Parallelization/Make.package
+++ b/Source/Parallelization/Make.package
@@ -2,6 +2,7 @@ CEXE_sources += WarpXComm.cpp
 CEXE_sources += WarpXRegrid.cpp
 CEXE_headers += WarpXSumGuardCells.H
 CEXE_headers += WarpXComm_K.H
+CEXE_headers += WarpXComm.H
 
 INCLUDE_LOCATIONS += $(WARPX_HOME)/Source/Parallelization
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Parallelization
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index 52df3dc25..851b78748 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -1,14 +1,14 @@
+#include "WarpXComm.H"
 #include <WarpXComm_K.H>
 #include <WarpX.H>
 #include <WarpX_f.H>
 #include <WarpXSumGuardCells.H>
-#include <Parallelization/InterpolateCurrentFineToCoarse.H>
-#include <Parallelization/InterpolateDensityFineToCoarse.H>
+#include <InterpolateCurrentFineToCoarse.H>
+#include <InterpolateDensityFineToCoarse.H>
 
 #include <algorithm>
 #include <cstdlib>
 
-
 using namespace amrex;
 
 void
@@ -503,9 +503,9 @@ WarpX::SyncCurrent ()
 }
 
 void
-WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine,
-                                        std::array< amrex::MultiFab       *, 3 > const & coarse,
-                                        int const refinement_ratio)
+interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine,
+                                 std::array< amrex::MultiFab       *, 3 > const & coarse,
+                                 int const refinement_ratio)
 {
     BL_PROFILE("interpolateCurrentFineToCoarse()");
     BL_ASSERT(refinement_ratio == 2);
@@ -563,7 +563,7 @@ WarpX::SyncRho ()
 }
 
 void
-WarpX::interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio)
+interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio)
 {
     BL_PROFILE("interpolateDensityFineToCoarse()");
     BL_ASSERT(refinement_ratio == 2);
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 6f195fa87..4be339707 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -7,7 +7,7 @@
 #include <WarpX_f.H>
 #include <WarpX.H>
 #include <WarpXAlgorithmSelection.H>
-
+#include <WarpXComm.H>
 // Import low-level single-particle kernels
 #include <GetAndSetPosition.H>
 #include <UpdatePosition.H>
@@ -505,7 +505,8 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho,
                                         bool local, bool reset)
 {
     // Loop over the refinement levels
-    for (int lev = 0; lev < max_level; ++lev) {
+    int const finest_level = rho.size() - 1;
+    for (int lev = 0; lev < finest_level; ++lev) {
 
         // Reset the `rho` array if `reset` is True
         if (reset) rho[lev]->setVal(0.0, rho[lev]->nGrow());
@@ -557,8 +558,8 @@ WarpXParticleContainer::DepositCharge (Vector<std::unique_ptr<MultiFab> >& rho,
 
         int const refinement_ratio = 2;
 
-        interpolateDensityFineToCoarse( rho[lev+1], coarsened_fine_data, refinement_ratio );
-        rho[lev].ParallelAdd( coarsened_fine_data, m_gdb->Geom(lev).periodicity() );
+        interpolateDensityFineToCoarse( *rho[lev+1], coarsened_fine_data, refinement_ratio );
+        rho[lev]->ParallelAdd( coarsened_fine_data, m_gdb->Geom(lev).periodicity() );
     }
 }
 
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 4275df3c5..d17787c57 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -419,29 +419,6 @@ private:
 
     std::array<std::unique_ptr<amrex::MultiFab>, 3> getInterpolatedB(int lev) const;
 
-    /** \brief Fills the values of the current on the coarse patch by
-     *  averaging the values of the current of the fine patch (on the same level).
-     *  Also fills the guards of the coarse patch.
-     *
-     * \param[in]  fine             fine patches to interpolate from
-     * \param[out] coarse           coarse patches to interpolate to
-     * \param[in]  refinement_ratio integer ratio between the two
-     */
-    void interpolateCurrentFineToCoarse (std::array< amrex::MultiFab const *, 3 > const & fine,
-                                         std::array< amrex::MultiFab       *, 3 > const & coarse,
-                                         int const refinement_ratio);
-
-    /** \brief Fills the values of the charge density on the coarse patch by
-     *  averaging the values of the charge density of the fine patch (on the same level).
-     *
-     * \param[in]  fine             fine patches to interpolate from
-     * \param[out] coarse           coarse patches to interpolate to
-     * \param[in]  refinement_ratio integer ratio between the two
-     */
-    void interpolateDensityFineToCoarse (const amrex::MultiFab& fine,
-                                               amrex::MultiFab& coarse,
-                                         int const refinement_ratio);
-
     void ExchangeWithPmlB (int lev);
     void ExchangeWithPmlE (int lev);
     void ExchangeWithPmlF (int lev);
-- 
cgit v1.2.3


From 38ffda500337187c6f51422ccdd0ac208b483691 Mon Sep 17 00:00:00 2001
From: Remi Lehe <remi.lehe@normalesup.org>
Date: Tue, 12 Nov 2019 09:35:13 -0800
Subject: Remove Fortran files

---
 Source/Parallelization/WarpXComm.cpp        |   9 +-
 Source/Particles/Make.package               |   1 -
 Source/Particles/WarpXParticleContainer.cpp |   1 +
 Source/Particles/deposit_cic.F90            | 134 ----------------------------
 4 files changed, 6 insertions(+), 139 deletions(-)
 delete mode 100644 Source/Particles/deposit_cic.F90

(limited to 'Source/Parallelization/WarpXComm.cpp')

diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index 52df3dc25..529d52604 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -1,3 +1,4 @@
+#include <WarpXComm.H>
 #include <WarpXComm_K.H>
 #include <WarpX.H>
 #include <WarpX_f.H>
@@ -503,9 +504,9 @@ WarpX::SyncCurrent ()
 }
 
 void
-WarpX::interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine,
-                                        std::array< amrex::MultiFab       *, 3 > const & coarse,
-                                        int const refinement_ratio)
+interpolateCurrentFineToCoarse ( std::array< amrex::MultiFab const *, 3 > const & fine,
+                                 std::array< amrex::MultiFab       *, 3 > const & coarse,
+                                 int const refinement_ratio)
 {
     BL_PROFILE("interpolateCurrentFineToCoarse()");
     BL_ASSERT(refinement_ratio == 2);
@@ -563,7 +564,7 @@ WarpX::SyncRho ()
 }
 
 void
-WarpX::interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio)
+interpolateDensityFineToCoarse (const MultiFab& fine, MultiFab& coarse, int const refinement_ratio)
 {
     BL_PROFILE("interpolateDensityFineToCoarse()");
     BL_ASSERT(refinement_ratio == 2);
diff --git a/Source/Particles/Make.package b/Source/Particles/Make.package
index a6c124ddd..6d34fa0ef 100644
--- a/Source/Particles/Make.package
+++ b/Source/Particles/Make.package
@@ -1,4 +1,3 @@
-F90EXE_sources += deposit_cic.F90
 F90EXE_sources += interpolate_cic.F90
 F90EXE_sources += push_particles_ES.F90
 
diff --git a/Source/Particles/WarpXParticleContainer.cpp b/Source/Particles/WarpXParticleContainer.cpp
index 51ed84ea7..2a5be6f5d 100644
--- a/Source/Particles/WarpXParticleContainer.cpp
+++ b/Source/Particles/WarpXParticleContainer.cpp
@@ -4,6 +4,7 @@
 #include <MultiParticleContainer.H>
 #include <WarpXParticleContainer.H>
 #include <AMReX_AmrParGDB.H>
+#include <WarpXComm.H>
 #include <WarpX_f.H>
 #include <WarpX.H>
 #include <WarpXAlgorithmSelection.H>
diff --git a/Source/Particles/deposit_cic.F90 b/Source/Particles/deposit_cic.F90
deleted file mode 100644
index 2831ce96b..000000000
--- a/Source/Particles/deposit_cic.F90
+++ /dev/null
@@ -1,134 +0,0 @@
-module warpx_ES_deposit_cic
-
-  use iso_c_binding
-  use amrex_fort_module, only : amrex_real, amrex_particle_real
-
-  implicit none
-
-contains
-
-! This routine computes the charge density due to the particles using cloud-in-cell
-! deposition. The Fab rho is assumed to be node-centered.
-!
-! Arguments:
-!     particles : a pointer to the particle array-of-structs
-!     ns        : the stride length of particle struct (the size of the struct in number of reals)
-!     np        : the number of particles
-!     weights   : the particle weights
-!     charge    : the charge of this particle species
-!     rho       : a Fab that will contain the charge density on exit
-!     lo        : a pointer to the lo corner of this valid box for rho, in index space
-!     hi        : a pointer to the hi corner of this valid box for rho, in index space
-!     plo       : the real position of the left-hand corner of the problem domain
-!     dx        : the mesh spacing
-!     ng        : the number of ghost cells in rho
-!
-  subroutine warpx_deposit_cic_3d(particles, ns, np,                     &
-                                  weights, charge, rho, lo, hi, plo, dx, &
-                                  ng)                                    &
-       bind(c,name='warpx_deposit_cic_3d')
-    integer, value,   intent(in)     :: ns, np
-    real(amrex_particle_real), intent(in)     :: particles(ns,np)
-    real(amrex_particle_real), intent(in)     :: weights(np)
-    real(amrex_real), intent(in)     :: charge
-    integer,          intent(in)     :: lo(3)
-    integer,          intent(in)     :: hi(3)
-    integer,          intent(in)     :: ng
-    real(amrex_real), intent(inout)  :: rho(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng, lo(3)-ng:hi(3)+ng)
-    real(amrex_real), intent(in)     :: plo(3)
-    real(amrex_real), intent(in)     :: dx(3)
-
-    integer i, j, k, n
-    real(amrex_real) wx_lo, wy_lo, wz_lo, wx_hi, wy_hi, wz_hi
-    real(amrex_real) lx, ly, lz
-    real(amrex_real) inv_dx(3)
-    real(amrex_real) qp, inv_vol
-
-    inv_dx = 1.0d0/dx
-
-    inv_vol = inv_dx(1) * inv_dx(2) * inv_dx(3)
-
-    do n = 1, np
-
-       qp = weights(n) * charge * inv_vol
-
-       lx = (particles(1, n) - plo(1))*inv_dx(1)
-       ly = (particles(2, n) - plo(2))*inv_dx(2)
-       lz = (particles(3, n) - plo(3))*inv_dx(3)
-
-       i = floor(lx)
-       j = floor(ly)
-       k = floor(lz)
-
-       wx_hi = lx - i
-       wy_hi = ly - j
-       wz_hi = lz - k
-
-       wx_lo = 1.0d0 - wx_hi
-       wy_lo = 1.0d0 - wy_hi
-       wz_lo = 1.0d0 - wz_hi
-
-       rho(i,   j,   k  ) = rho(i,   j,   k  ) + wx_lo*wy_lo*wz_lo*qp
-       rho(i,   j,   k+1) = rho(i,   j,   k+1) + wx_lo*wy_lo*wz_hi*qp
-       rho(i,   j+1, k  ) = rho(i,   j+1, k  ) + wx_lo*wy_hi*wz_lo*qp
-       rho(i,   j+1, k+1) = rho(i,   j+1, k+1) + wx_lo*wy_hi*wz_hi*qp
-       rho(i+1, j,   k  ) = rho(i+1, j,   k  ) + wx_hi*wy_lo*wz_lo*qp
-       rho(i+1, j,   k+1) = rho(i+1, j,   k+1) + wx_hi*wy_lo*wz_hi*qp
-       rho(i+1, j+1, k  ) = rho(i+1, j+1, k  ) + wx_hi*wy_hi*wz_lo*qp
-       rho(i+1, j+1, k+1) = rho(i+1, j+1, k+1) + wx_hi*wy_hi*wz_hi*qp
-
-    end do
-
-  end subroutine warpx_deposit_cic_3d
-
-  subroutine warpx_deposit_cic_2d(particles, ns, np,                     &
-                                  weights, charge, rho, lo, hi, plo, dx, &
-                                  ng)                                    &
-       bind(c,name='warpx_deposit_cic_2d')
-    integer, value,   intent(in)     :: ns, np
-    real(amrex_particle_real), intent(in)     :: particles(ns,np)
-    real(amrex_particle_real), intent(in)     :: weights(np)
-    real(amrex_real), intent(in)     :: charge
-    integer,          intent(in)     :: lo(2)
-    integer,          intent(in)     :: hi(2)
-    integer,          intent(in)     :: ng
-    real(amrex_real), intent(inout)  :: rho(lo(1)-ng:hi(1)+ng, lo(2)-ng:hi(2)+ng)
-    real(amrex_real), intent(in)     :: plo(2)
-    real(amrex_real), intent(in)     :: dx(2)
-
-    integer i, j, n
-    real(amrex_real) wx_lo, wy_lo, wx_hi, wy_hi
-    real(amrex_real) lx, ly
-    real(amrex_real) inv_dx(2)
-    real(amrex_real) qp, inv_vol
-
-    inv_dx = 1.0d0/dx
-
-    inv_vol = inv_dx(1) * inv_dx(2)
-
-    do n = 1, np
-
-       qp = weights(n) * charge * inv_vol
-
-       lx = (particles(1, n) - plo(1))*inv_dx(1)
-       ly = (particles(2, n) - plo(2))*inv_dx(2)
-
-       i = floor(lx)
-       j = floor(ly)
-
-       wx_hi = lx - i
-       wy_hi = ly - j
-
-       wx_lo = 1.0d0 - wx_hi
-       wy_lo = 1.0d0 - wy_hi
-
-       rho(i,   j  ) = rho(i,   j  ) + wx_lo*wy_lo*qp
-       rho(i,   j+1) = rho(i,   j+1) + wx_lo*wy_hi*qp
-       rho(i+1, j  ) = rho(i+1, j  ) + wx_hi*wy_lo*qp
-       rho(i+1, j+1) = rho(i+1, j+1) + wx_hi*wy_hi*qp
-
-    end do
-
-  end subroutine warpx_deposit_cic_2d
-
-end module warpx_ES_deposit_cic
-- 
cgit v1.2.3