diff options
-rw-r--r-- | Source/Parallelization/WarpXRegrid.cpp | 61 | ||||
-rw-r--r-- | Source/WarpX.H | 18 | ||||
-rw-r--r-- | Source/WarpX.cpp | 1 |
3 files changed, 73 insertions, 7 deletions
diff --git a/Source/Parallelization/WarpXRegrid.cpp b/Source/Parallelization/WarpXRegrid.cpp index 36a1ed776..43a03f5ed 100644 --- a/Source/Parallelization/WarpXRegrid.cpp +++ b/Source/Parallelization/WarpXRegrid.cpp @@ -30,22 +30,33 @@ WarpX::LoadBalance () for (int lev = 0; lev <= nLevels; ++lev) { #ifdef AMREX_USE_MPI - // Parallel reduce the costs_heurisitc + // Parallel reduce the costs amrex::Vector<Real>::iterator it = (*costs[lev]).begin(); amrex::Real* itAddr = &(*it); - ParallelAllReduce::Sum(itAddr, - costs[lev]->size(), - ParallelContext::CommunicatorSub()); + ParallelAllReduce::Sum(itAddr, costs[lev]->size(), ParallelContext::CommunicatorSub()); #endif + // Compute efficiency for the current distribution mapping + const DistributionMapping& currentdm = DistributionMap(lev); + amrex::Real currentEfficiency = 0.0; + if (load_balance_efficiency_ratio_threshold > 0.0) + { + ComputeDistributionMappingEfficiency(currentdm, *costs[lev], + currentEfficiency); + } + const amrex::Real nboxes = costs[lev]->size(); const amrex::Real nprocs = ParallelContext::NProcsSub(); const int nmax = static_cast<int>(std::ceil(nboxes/nprocs*load_balance_knapsack_factor)); + amrex::Real proposedEfficiency = 0.0; const DistributionMapping newdm = (load_balance_with_sfc) - ? DistributionMapping::makeSFC(*costs[lev], boxArray(lev), false) - : DistributionMapping::makeKnapSack(*costs[lev], nmax); + ? DistributionMapping::makeSFC(*costs[lev], boxArray(lev), proposedEfficiency, false) + : DistributionMapping::makeKnapSack(*costs[lev], proposedEfficiency, nmax); - RemakeLevel(lev, t_new[lev], boxArray(lev), newdm); + if (proposedEfficiency > load_balance_efficiency_ratio_threshold*currentEfficiency) + { + RemakeLevel(lev, t_new[lev], boxArray(lev), newdm); + } } mypc->Redistribute(); } @@ -284,3 +295,39 @@ WarpX::ResetCosts () costs[lev]->assign((*costs[lev]).size(), 0.0); } } + +void +WarpX::ComputeDistributionMappingEfficiency (const DistributionMapping& dm, + const Vector<Real>& cost, + Real& efficiency) +{ + const Real nprocs = ParallelDescriptor::NProcs(); + + // Collect costs per fab corresponding to each rank, then collapse into vector + // of total cost per proc + + // This will store mapping from (proc) --> ([cost_FAB_1, cost_FAB_2, ... ]) + // for each proc + std::map<int, Vector<Real>> rankToCosts; + + for (int i=0; i<cost.size(); ++i) + { + rankToCosts[dm[i]].push_back(cost[i]); + } + + Real maxCost = -1.0; + + // This will store mapping from (proc) --> (sum of cost) for each proc + Vector<Real> rankToCost = {0.0}; + rankToCost.resize(nprocs); + for (int i=0; i<nprocs; ++i) { + const Real rwSum = std::accumulate(rankToCosts[i].begin(), + rankToCosts[i].end(), 0.0); + rankToCost[i] = rwSum; + maxCost = std::max(maxCost, rwSum); + } + + // `efficiency` is mean cost per proc + efficiency = (std::accumulate(rankToCost.begin(), + rankToCost.end(), 0.0) / (nprocs*maxCost)); +} diff --git a/Source/WarpX.H b/Source/WarpX.H index 3c66a6cd7..6d209b604 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -493,6 +493,18 @@ public: */ void ComputeCostsHeuristic (amrex::Vector<std::unique_ptr<amrex::Vector<amrex::Real> > >& costs); + /** \brief Computes the average cost per MPI rank given a distribution mapping + * and vector of cost for each FAB. + * @param[in] dm distribution mapping (mapping from FAB to MPI processes) + * @param[in] cost vector which gives mapping from FAB index space to the + * respective FAB's cost + * @param[out] efficiency average cost per MPI process computed from the + * given distribution mapping and and cost + */ + void ComputeDistributionMappingEfficiency (const amrex::DistributionMapping& dm, + const amrex::Vector<amrex::Real>& cost, + amrex::Real& efficiency); + protected: /** @@ -708,6 +720,12 @@ private: * `load_balance_knapsack_factor=2` limits the maximum number of boxes that can * be assigned to a rank to 8. */ amrex::Real load_balance_knapsack_factor = 1.24; + /** Threshold value that controls whether to adopt the proposed distribution + * mapping during load balancing. The new distribution mapping is adopted + * if the ratio of proposed distribution mapping efficiency to current + * distribution mapping efficiency is larger than the threshold; 'efficiency' + * here means the average cost per MPI rank. */ + amrex::Real load_balance_efficiency_ratio_threshold = 0; /** Weight factor for cells in `Heuristic` costs update. * Default values on GPU are determined from single-GPU tests on Summit. * The problem setup for these tests is an empty (i.e. no particles) domain diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 3adc7c3d2..32c0b1c27 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -663,6 +663,7 @@ WarpX::ReadParameters () pp.query("load_balance_int", load_balance_int); pp.query("load_balance_with_sfc", load_balance_with_sfc); pp.query("load_balance_knapsack_factor", load_balance_knapsack_factor); + pp.query("load_balance_efficiency_ratio_threshold", load_balance_efficiency_ratio_threshold); pp.query("do_dynamic_scheduling", do_dynamic_scheduling); |