From 2e1f6348c991c06161c788c04fdb57ee9bc130fe Mon Sep 17 00:00:00 2001
From: Edoardo Zoni <59625522+EZoni@users.noreply.github.com>
Date: Fri, 10 Apr 2020 15:57:37 -0700
Subject: Interpolation with coarsening for general staggering (#853)

* Start implementation of new averaging with staggering:

- face-to-cell-center and edge-to-cell-center replaced so far;
- TODO: node-to-cell-center and 1D behavior (AMREX_SPACEDIM=1).

* first implementation of Diags base classes

* add example, temporarily

* Continue implementation of new averaging with staggering:

- new function takes reference to single MultiFab (no vector);
- TODO: node-to-cell-center still in progress.

* Fix small bug and clean up

* Fix bug in loop over n=0,...,ncomp-1 and clean up

* add more functions

* Add Doxygen documentation and clean up

* Small clean-up in Doxygen documentation

* Compile in single precision: add _rt suffix to avoid unnecessary conversions

* Avoid accessing staggering index directly from IntVect in innermost loops

* Replace do-while loop with for loop (default ncomp=1)

* Remove temporary pointer and pass reference to MultiFab (instead of MultiFab*)

* Replace AMREX_LAUNCH_HOST_DEVICE_LAMBDA with ParallelFor

* cleaning and initialize output mf

* use general average routine

* move flush in new class, and implemented the Plotfile derived class

* add comments

* eol

* free memory in destructor

* typo

* typo

* no need to clear MF pointers there

* though shalt not break existing tests

* FlushRaw doesnt have to be virtual for now

* The importance of being constant

* Capability to select fields in output files

* EOL

* revert to old inputs

* const in right place

* avoid brace initializer there

* oops, fix logic error in is_in

* user can choose flush interval, same behavior as plot_int

* Add option to plot raw fields

* eol

* replace ter flush with dump to avoid confusion

* add options

* Diagnostics stores a vector of functors to compute diags on the fly

* eol

* Field gather from diags to sync particle quantities

* New diagnostics handle RZ with same behavior as old ones

* cleaning and doc

* const ref for string

* smarter for loop from Axel and typo fix from Reva

* Functors to compute some fields

* simplify code following Dave's comments

* Create subfolders and add more output options (divE etc.)

* eol

* rename mode_avg to convertRZmodes2cartesian

* Update CellCenterFunctor.H

* fill varnames and vector of functors at the same time

* output rho_new, not rho_old

* WarpX instance not needed here

* add const

* little bit more of reorganization

* Apply suggestions from code review

Co-Authored-By: Axel Huebl <axel.huebl@plasma.ninja>

* add a bunch of const

* Move average_down overload used in WarpX to Average::FineToCoarse

* Write Average::FineToCoarse for cell-centered and nodal data

* Clean up Average::FineToCoarse (WARNING: interface changed)

* Small clean-up

* Replace AMREX_LAUNCH_HOST_DEVICE_LAMBDA with ParallelFor

* Small clean-up

* Change name from Average::FineToCoarse to Average::Coarsen

* Add dcomp to interface of Coarsen (different from scomp in general)

* Remove ASSERT (general input and output staggerings will be accepted)

* Fix averaging from fine nodal to coarse centered

* Clean up Doxygen in old Average::ToCellCenter (likely to be removed)

* Intermediate layers Average::CoarsenLoop and Average::CoarsenKernel

* Revert "Intermediate layers Average::CoarsenLoop and Average::CoarsenKernel"

This reverts commit 91f2beaa20617a3fb25df536472b74899942ffc5.

* Clean-up

* Implement interpolation with coarsening (remove averaging)

* Trigger failing source/style checks on Travis CI

* Add ASSERTS to check coarsening ratio and guard cells

* Input/output MultiFabs can have different number of components

* Use AMReX built-in functions to check if MultiFab is coarsenable

* Add Doxygen documentation for new functions

* Copy starting from correct component in destination MultiFab

* Pass IntVect to BoxArray method 'coarsenable' directly

* Improve comments

* Rename 'ratio' as 'crse_ratio'

* Add _rt suffix when necessary

* Improve comments

* Use method 'toIntVect' instead of 'ixType'

* New names: mf_src, arr_src, stag_src, mf_dst, arr_dst, stag_dst

* More clean-up

* Replace calls to ToCellCenter and fix bug with staggering of temporary BoxArray

* Check if coarsenable on BoxArray with destination staggering

* Replace remaining calls to Average::ToCellCenter

* Remove Average::ToCellCenter

* Use amrex::Gpu::ManagedVector<int> to avoid issues on GPUs

* Add default value IntVect(1) for coarsening ratio

* Add ngrow to possibly fill guard cells too

* Add ngrow to Doxygen documentation

* Count number of points outside innermost loop

* Add number of guard cells required to previous ASSERT and force ngrow=0 with coarsening

Co-authored-by: MaxThevenet <mthevenet@lbl.gov>
Co-authored-by: Axel Huebl <axel.huebl@plasma.ninja>
---
 Source/Utils/Average.cpp | 109 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 97 insertions(+), 12 deletions(-)

(limited to 'Source/Utils/Average.cpp')

diff --git a/Source/Utils/Average.cpp b/Source/Utils/Average.cpp
index b98f1e948..ba03bf5b8 100644
--- a/Source/Utils/Average.cpp
+++ b/Source/Utils/Average.cpp
@@ -3,27 +3,112 @@
 using namespace amrex;
 
 void
-Average::ToCellCenter ( MultiFab& mf_out,
-                        const MultiFab& mf_in,
-                        const int dcomp,
-                        const int ngrow,
-                        const int scomp,
-                        const int ncomp )
+Average::CoarsenAndInterpolateLoop ( MultiFab& mf_dst,
+                                     const MultiFab& mf_src,
+                                     const int dcomp,
+                                     const int scomp,
+                                     const int ncomp,
+                                     const int ngrow,
+                                     const IntVect crse_ratio )
 {
+    // Staggering of source fine MultiFab and destination coarse MultiFab
+    const IntVect stag_src = mf_src.boxArray().ixType().toIntVect();
+    const IntVect stag_dst = mf_dst.boxArray().ixType().toIntVect();
+
+    if ( crse_ratio > IntVect(1) ) AMREX_ALWAYS_ASSERT_WITH_MESSAGE( ngrow == 0,
+        "option of filling guard cells of destination MultiFab with coarsening not supported for this interpolation" );
+
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE( mf_src.nGrowVect() >= stag_dst-stag_src+IntVect(ngrow),
+        "source fine MultiFab does not have enough guard cells for this interpolation" );
+
+    // Auxiliary integer arrays (always 3D)
+    Gpu::ManagedVector<int> sf_gpuarr, sc_gpuarr, cr_gpuarr, np_gpuarr;
+    sf_gpuarr.resize( 3 ); // staggering of source fine MultiFab
+    sc_gpuarr.resize( 3 ); // staggering of destination coarse MultiFab
+    cr_gpuarr.resize( 3 ); // coarsening ratio
+    np_gpuarr.resize( 3 ); // number of points to loop over for interpolation
+
+    sf_gpuarr[0] = stag_src[0];
+    sf_gpuarr[1] = stag_src[1];
+#if   (AMREX_SPACEDIM == 2)
+    sf_gpuarr[2] = 0;
+#elif (AMREX_SPACEDIM == 3)
+    sf_gpuarr[2] = stag_src[2];
+#endif
+
+    sc_gpuarr[0] = stag_dst[0];
+    sc_gpuarr[1] = stag_dst[1];
+#if   (AMREX_SPACEDIM == 2)
+    sc_gpuarr[2] = 0;
+#elif (AMREX_SPACEDIM == 3)
+    sc_gpuarr[2] = stag_dst[2];
+#endif
+
+    cr_gpuarr[0] = crse_ratio[0];
+    cr_gpuarr[1] = crse_ratio[1];
+#if   (AMREX_SPACEDIM == 2)
+    cr_gpuarr[2] = 1;
+#elif (AMREX_SPACEDIM == 3)
+    cr_gpuarr[2] = crse_ratio[2];
+#endif
+
+    int const* const AMREX_RESTRICT sf = sf_gpuarr.data();
+    int const* const AMREX_RESTRICT sc = sc_gpuarr.data();
+    int const* const AMREX_RESTRICT cr = cr_gpuarr.data();
+    int      * const AMREX_RESTRICT np = np_gpuarr.data();
+
+    // Compute number of points to loop over (either 1 or 2) in each direction
+    for ( int l = 0; l < 3; ++l ) {
+        if ( cr[l] == 1 ) np[l] = 1+abs(sf[l]-sc[l]); // no coarsening
+        else              np[l] = 2-sf[l];
+    }
+
 #ifdef _OPENMP
 #pragma omp parallel if (Gpu::notInLaunchRegion())
 #endif
     // Loop over boxes (or tiles if not on GPU)
-    for (MFIter mfi( mf_out, TilingIfNotGPU() ); mfi.isValid(); ++mfi)
+    for (MFIter mfi( mf_dst, TilingIfNotGPU() ); mfi.isValid(); ++mfi)
     {
-        const Box bx = mfi.growntilebox( ngrow );
-        Array4<Real> const& mf_out_arr = mf_out.array( mfi );
-        Array4<Real const> const& mf_in_arr = mf_in.const_array( mfi );
-        const IntVect stag = mf_in.boxArray().ixType().toIntVect();
+        // Tiles defined at the coarse level
+        const Box& bx = mfi.growntilebox( ngrow );
+        Array4<Real> const& arr_dst = mf_dst.array( mfi );
+        Array4<Real const> const& arr_src = mf_src.const_array( mfi );
         ParallelFor( bx, ncomp,
                      [=] AMREX_GPU_DEVICE( int i, int j, int k, int n )
                      {
-                         mf_out_arr(i,j,k,n+dcomp) = Average::ToCellCenter( mf_in_arr, stag, i, j, k, n+scomp );
+                         arr_dst(i,j,k,n+dcomp) = Average::CoarsenAndInterpolateKernel(
+                             arr_src, sf, sc, cr, np, i, j, k, n+scomp );
                      } );
     }
 }
+
+void
+Average::CoarsenAndInterpolate ( MultiFab& mf_dst,
+                                 const MultiFab& mf_src,
+                                 const int dcomp,
+                                 const int scomp,
+                                 const int ncomp,
+                                 const int ngrow,
+                                 const IntVect crse_ratio )
+{
+    BL_PROFILE( "Average::CoarsenAndInterpolate" );
+
+    // Convert BoxArray of source MultiFab to staggering of destination MultiFab and coarsen it (if possible)
+    BoxArray ba_tmp = amrex::convert( mf_src.boxArray(), mf_dst.ixType().toIntVect() );
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE( ba_tmp.coarsenable( crse_ratio ),
+        "source MultiFab converted to staggering of destination MultiFab is not coarsenable" );
+    ba_tmp.coarsen( crse_ratio );
+
+    if ( ba_tmp == mf_dst.boxArray() and mf_src.DistributionMap() == mf_dst.DistributionMap() )
+        Average::CoarsenAndInterpolateLoop( mf_dst, mf_src, dcomp, scomp, ncomp, ngrow, crse_ratio );
+    else
+    {
+        // Cannot coarsen into MultiFab with different BoxArray or DistributionMapping:
+        // 1) create temporary MultiFab on coarsened version of source BoxArray with same DistributionMapping
+        MultiFab mf_tmp( ba_tmp, mf_src.DistributionMap(), ncomp, 0, MFInfo(), FArrayBoxFactory() );
+        // 2) interpolate from mf_src to mf_tmp (start writing into component 0)
+        Average::CoarsenAndInterpolateLoop( mf_tmp, mf_src, 0, scomp, ncomp, ngrow, crse_ratio );
+        // 3) copy from mf_tmp to mf_dst (with different BoxArray or DistributionMapping)
+        mf_dst.copy( mf_tmp, 0, dcomp, ncomp );
+    }
+}
-- 
cgit v1.2.3