|             Line data    Source code 
       1              : #include "Utility/IpplTimings.h"
       2              : #include <type_traits>
       3              : 
       4              : namespace ippl {
       5              : 
       6              :     template <class Field, class Tp>
       7           48 :     void OrthogonalRecursiveBisection<Field, Tp>::initialize(FieldLayout<Dim>& fl, mesh_type& mesh,
       8              :                                                              const Field& rho) {
       9           48 :         bf_m.initialize(mesh, fl);
      10           48 :         bf_m = rho;
      11           48 :     }
      12              : 
      13              :     template <class Field, class Tp>
      14              :     template <typename Attrib>
      15           48 :     bool OrthogonalRecursiveBisection<Field, Tp>::binaryRepartition(
      16              :         const Attrib& R, FieldLayout<Dim>& fl, const bool& isFirstRepartition) {
      17              :         // Timings
      18           48 :         static IpplTimings::TimerRef tbasicOp       = IpplTimings::getTimer("basicOperations");
      19           48 :         static IpplTimings::TimerRef tperpReduction = IpplTimings::getTimer("perpReduction");
      20           48 :         static IpplTimings::TimerRef tallReduce     = IpplTimings::getTimer("allReduce");
      21           48 :         static IpplTimings::TimerRef tscatter       = IpplTimings::getTimer("scatterR");
      22              : 
      23              :         // Scattering of particle positions in field
      24              :         // In case of first repartition we know the density from the
      25              :         // analytical expression and we use that for load balancing
      26              :         // and create particles. Note the particles are created only
      27              :         // after the first repartition and hence we cannot call scatter
      28              :         // before it.
      29           48 :         IpplTimings::startTimer(tscatter);
      30           48 :         if (!isFirstRepartition) {
      31           48 :             scatterR(R);
      32              :         }
      33              : 
      34           48 :         IpplTimings::stopTimer(tscatter);
      35              : 
      36           48 :         IpplTimings::startTimer(tbasicOp);
      37              : 
      38              :         // Get number of ranks
      39           48 :         auto& comm = fl.comm;
      40           48 :         int nprocs = comm.size();
      41              : 
      42              :         // Start with whole domain and total number of nodes
      43           96 :         std::vector<NDIndex<Dim>> domains = {fl.getDomain()};
      44           96 :         std::vector<int> procs            = {nprocs};
      45              : 
      46              :         // Arrays for reduction
      47           48 :         std::vector<Tf> reduced, reducedRank;
      48              : 
      49              :         // Start recursive repartition loop
      50           48 :         unsigned int it = 0;
      51           48 :         int maxprocs    = nprocs;
      52           48 :         IpplTimings::stopTimer(tbasicOp);
      53              : 
      54           96 :         while (maxprocs > 1) {
      55              :             // Find cut axis
      56           48 :             IpplTimings::startTimer(tbasicOp);
      57           48 :             int cutAxis = findCutAxis(domains[it]);
      58           48 :             IpplTimings::stopTimer(tbasicOp);
      59              : 
      60              :             // Reserve space
      61           48 :             IpplTimings::startTimer(tperpReduction);
      62           48 :             reduced.resize(domains[it][cutAxis].length());
      63           48 :             reducedRank.resize(domains[it][cutAxis].length());
      64              : 
      65           48 :             std::fill(reducedRank.begin(), reducedRank.end(), 0.0);
      66           48 :             std::fill(reduced.begin(), reduced.end(), 0.0);
      67              : 
      68              :             // Peform reduction with field of weights and communicate to the other ranks
      69           48 :             perpendicularReduction(reducedRank, cutAxis, domains[it]);
      70           48 :             IpplTimings::stopTimer(tperpReduction);
      71              : 
      72              :             // Communicate to all the reduced weights
      73           48 :             IpplTimings::startTimer(tallReduce);
      74           48 :             comm.allreduce(reducedRank.data(), reduced.data(), reducedRank.size(), std::plus<Tf>());
      75           48 :             IpplTimings::stopTimer(tallReduce);
      76              : 
      77              :             // Find median of reduced weights
      78           48 :             IpplTimings::startTimer(tbasicOp);
      79              :             // Initialize median to some value (1 is lower bound value)
      80           48 :             int median = 1;
      81           48 :             median     = findMedian(reduced);
      82           48 :             IpplTimings::stopTimer(tbasicOp);
      83              : 
      84              :             // Cut domains and procs
      85           48 :             IpplTimings::startTimer(tbasicOp);
      86           48 :             cutDomain(domains, procs, it, cutAxis, median);
      87              : 
      88              :             // Update max procs
      89           48 :             maxprocs = 0;
      90          144 :             for (unsigned int i = 0; i < procs.size(); i++) {
      91           96 :                 if (procs[i] > maxprocs) {
      92           48 :                     maxprocs = procs[i];
      93           48 :                     it       = i;
      94              :                 }
      95              :             }
      96           48 :             IpplTimings::stopTimer(tbasicOp);
      97              : 
      98              :             // Clear all arrays
      99           48 :             IpplTimings::startTimer(tperpReduction);
     100           48 :             reduced.clear();
     101           48 :             reducedRank.clear();
     102           48 :             IpplTimings::stopTimer(tperpReduction);
     103              :         }
     104              : 
     105              :         // Check that no plane was obtained in the repartition
     106           48 :         IpplTimings::startTimer(tbasicOp);
     107          144 :         for (const auto& domain : domains) {
     108          432 :             for (const auto& axis : domain) {
     109          336 :                 if (axis.length() == 1) {
     110            0 :                     return false;
     111              :                 }
     112              :             }
     113              :         }
     114              : 
     115              :         // Update FieldLayout with new indices
     116           48 :         fl.updateLayout(domains);
     117              : 
     118              :         // Update local field with new layout
     119           48 :         bf_m.updateLayout(fl);
     120           48 :         IpplTimings::stopTimer(tbasicOp);
     121              : 
     122           48 :         return true;
     123           48 :     }
     124              : 
     125              :     template <class Field, class Tp>
     126           48 :     int OrthogonalRecursiveBisection<Field, Tp>::findCutAxis(NDIndex<Dim>& dom) {
     127              :         // Find longest domain size
     128           48 :         return std::distance(dom.begin(), std::max_element(dom.begin(), dom.end(),
     129          120 :                                                            [&](const Index& a, const Index& b) {
     130            0 :                                                                return a.length() < b.length();
     131           48 :                                                            }));
     132              :     }
     133              : 
     134              :     template <class Field, class Tp>
     135           48 :     void OrthogonalRecursiveBisection<Field, Tp>::perpendicularReduction(
     136              :         std::vector<Tf>& rankWeights, unsigned int cutAxis, NDIndex<Dim>& dom) {
     137              :         // Check if domains overlap, if not no need for reduction
     138           48 :         NDIndex<Dim> lDom = bf_m.getOwned();
     139           48 :         if (lDom[cutAxis].first() > dom[cutAxis].last()
     140           48 :             || lDom[cutAxis].last() < dom[cutAxis].first()) {
     141            0 :             return;
     142              :         }
     143              : 
     144              :         // Get field's local weights
     145           48 :         int nghost      = bf_m.getNghost();
     146           48 :         const auto data = bf_m.getView();
     147              : 
     148              :         // Determine the iteration bounds of the reduction
     149           48 :         int cutAxisFirst =
     150           48 :             std::max(lDom[cutAxis].first(), dom[cutAxis].first()) - lDom[cutAxis].first() + nghost;
     151           48 :         int cutAxisLast =
     152           48 :             std::min(lDom[cutAxis].last(), dom[cutAxis].last()) - lDom[cutAxis].first() + nghost;
     153              : 
     154              :         // Set iterator for where to write in the reduced array
     155           48 :         unsigned int arrayStart = 0;
     156           48 :         if (dom[cutAxis].first() < lDom[cutAxis].first()) {
     157           24 :             arrayStart = lDom[cutAxis].first() - dom[cutAxis].first();
     158              :         }
     159              : 
     160              :         // Find all the perpendicular axes
     161              :         using exec_space = typename Field::execution_space;
     162              :         using index_type = typename RangePolicy<Dim, exec_space>::index_type;
     163              :         Kokkos::Array<index_type, Dim> begin, end;
     164          216 :         for (unsigned d = 0; d < Dim; d++) {
     165          168 :             if (d == cutAxis) {
     166           48 :                 continue;
     167              :             }
     168              : 
     169          120 :             int inf = std::max(lDom[d].first(), dom[d].first()) - lDom[d].first() + nghost;
     170          120 :             int sup = std::min(lDom[d].last(), dom[d].last()) - lDom[d].first() + nghost;
     171              :             // inf and sup bounds must be within the domain to reduce, if not no need to reduce
     172          120 :             if (sup < inf) {
     173            0 :                 return;
     174              :             }
     175              : 
     176          120 :             begin[d] = inf;
     177              :             // The +1 is for Kokkos loop
     178          120 :             end[d] = sup + 1;
     179              :         }
     180              : 
     181              :         // Iterate along cutAxis
     182          816 :         for (int i = cutAxisFirst; i <= cutAxisLast; i++) {
     183          768 :             begin[cutAxis] = i;
     184          768 :             end[cutAxis]   = i + 1;
     185              : 
     186              :             // Reducing over perpendicular plane defined by cutAxis
     187          768 :             Tf tempRes = Tf(0);
     188              : 
     189              :             using index_array_type = typename RangePolicy<Dim, exec_space>::index_array_type;
     190          768 :             ippl::parallel_reduce(
     191         1536 :                 "ORB weight reduction", createRangePolicy<Dim, exec_space>(begin, end),
     192       956160 :                 KOKKOS_LAMBDA(const index_array_type& args, Tf& weight) {
     193       477312 :                     weight += apply(data, args);
     194              :                 },
     195         1536 :                 Kokkos::Sum<Tf>(tempRes));
     196              : 
     197          768 :             Kokkos::fence();
     198              : 
     199          768 :             rankWeights[arrayStart++] = tempRes;
     200              :         }
     201           48 :     }
     202              : 
     203              :     template <class Field, class Tp>
     204           48 :     int OrthogonalRecursiveBisection<Field, Tp>::findMedian(std::vector<Tf>& w) {
     205              :         // Special case when array must be cut in half in order to not have planes
     206           48 :         if (w.size() == 4) {
     207            0 :             return 1;
     208              :         }
     209              :         // Get total sum of array
     210           48 :         Tf tot = std::accumulate(w.begin(), w.end(), Tf(0));
     211              : 
     212              :         // Find position of median as half of total in array
     213           48 :         Tf half = 0.5 * tot;
     214           48 :         Tf curr = Tf(0);
     215              :         // Do not need to iterate to full extent since it must not give planes
     216          792 :         for (unsigned int i = 0; i < w.size() - 1; i++) {
     217          792 :             curr += w[i];
     218          792 :             if (curr >= half) {
     219              :                 // If all particles are in the first plane, cut at 1 so to have size 2
     220           48 :                 if (i == 0) {
     221            0 :                     return 1;
     222              :                 }
     223           48 :                 Tf previous = curr - w[i];
     224              :                 // curr - half < half - previous
     225           48 :                 if ((curr + previous) <= tot
     226           24 :                     && curr != half) {  // if true then take current i, otherwise i-1
     227           24 :                     if (i == w.size() - 2) {
     228            0 :                         return (i - 1);
     229              :                     } else {
     230           24 :                         return i;
     231              :                     }
     232              :                 } else {
     233           24 :                     return (i > 1) ? (i - 1) : 1;
     234              :                 }
     235              :             }
     236              :         }
     237              :         // If all particles are in the last plane, cut two indices before the end so to have size 2
     238            0 :         return w.size() - 3;
     239              :     }
     240              : 
     241              :     template <class Field, class Tp>
     242           48 :     void OrthogonalRecursiveBisection<Field, Tp>::cutDomain(std::vector<NDIndex<Dim>>& domains,
     243              :                                                             std::vector<int>& procs, int it,
     244              :                                                             int cutAxis, int median) {
     245              :         // Cut domains[it] in half at median along cutAxis
     246           48 :         NDIndex<Dim> leftDom, rightDom;
     247           48 :         domains[it].split(leftDom, rightDom, cutAxis, median + domains[it][cutAxis].first());
     248           48 :         domains[it] = leftDom;
     249           48 :         domains.insert(domains.begin() + it + 1, rightDom);
     250              : 
     251              :         // Cut procs in half
     252           48 :         int temp  = procs[it];
     253           48 :         procs[it] = procs[it] / 2;
     254           48 :         procs.insert(procs.begin() + it + 1, temp - procs[it]);
     255           48 :     }
     256              : 
     257              :     template <class Field, class Tp>
     258              :     template <typename Attrib>
     259           48 :     void OrthogonalRecursiveBisection<Field, Tp>::scatterR(const Attrib& r) {
     260              :         using vector_type = typename mesh_type::vector_type;
     261              :         static_assert(
     262              :             Kokkos::SpaceAccessibility<typename Attrib::memory_space,
     263              :                                        typename Field::memory_space>::accessible,
     264              :             "Particle attribute memory space must be accessible from ORB field memory space");
     265              : 
     266              :         // Reset local field
     267           48 :         bf_m = 0.0;
     268              :         // Get local data
     269           48 :         auto view                      = bf_m.getView();
     270           48 :         const mesh_type& mesh          = bf_m.get_mesh();
     271           48 :         const FieldLayout<Dim>& layout = bf_m.getLayout();
     272           48 :         const NDIndex<Dim>& lDom       = layout.getLocalNDIndex();
     273           48 :         const int nghost               = bf_m.getNghost();
     274              : 
     275              :         // Get spacings
     276           48 :         const vector_type& dx     = mesh.getMeshSpacing();
     277           48 :         const vector_type& origin = mesh.getOrigin();
     278           48 :         const vector_type invdx   = 1.0 / dx;
     279              : 
     280              :         using policy_type = Kokkos::RangePolicy<size_t, typename Field::execution_space>;
     281              : 
     282           48 :         Kokkos::parallel_for(
     283           96 :             "ParticleAttrib::scatterR", policy_type(0, r.getParticleCount()),
     284         3168 :             KOKKOS_LAMBDA(const size_t idx) {
     285              :                 // Find nearest grid point
     286         3072 :                 Vector<Tp, Dim> l      = (r(idx) - origin) * invdx + 0.5;
     287         3072 :                 Vector<int, Dim> index = l;
     288         3072 :                 Vector<Tf, Dim> whi    = l - index;
     289         3072 :                 Vector<Tf, Dim> wlo    = 1.0 - whi;
     290              : 
     291         3072 :                 Vector<size_t, Dim> args = index - lDom.first() + nghost;
     292              :                 
     293              :                 // Scatter
     294         3072 :                 scatterToField(std::make_index_sequence<1 << Dim>{}, view, wlo, whi, args, 1);
     295         3072 :             });
     296              : 
     297           48 :         bf_m.accumulateHalo();
     298           48 :     }
     299              : 
     300              : }  // namespace ippl
         |