LCOV - code coverage report
Current view: top level - src/Decomposition - OrthogonalRecursiveBisection.hpp (source / functions) Coverage Total Hit
Test: final_report.info Lines: 94.7 % 152 144
Test Date: 2025-07-19 00:25:23 Functions: 87.5 % 88 77

            Line data    Source code
       1              : #include "Utility/IpplTimings.h"
       2              : 
       3              : namespace ippl {
       4              : 
       5              :     template <class Field, class Tp>
       6           48 :     void OrthogonalRecursiveBisection<Field, Tp>::initialize(FieldLayout<Dim>& fl, mesh_type& mesh,
       7              :                                                              const Field& rho) {
       8           48 :         bf_m.initialize(mesh, fl);
       9           48 :         bf_m = rho;
      10           48 :     }
      11              : 
      12              :     template <class Field, class Tp>
      13              :     template <typename Attrib>
      14           48 :     bool OrthogonalRecursiveBisection<Field, Tp>::binaryRepartition(
      15              :         const Attrib& R, FieldLayout<Dim>& fl, const bool& isFirstRepartition) {
      16              :         // Timings
      17           48 :         static IpplTimings::TimerRef tbasicOp       = IpplTimings::getTimer("basicOperations");
      18           48 :         static IpplTimings::TimerRef tperpReduction = IpplTimings::getTimer("perpReduction");
      19           48 :         static IpplTimings::TimerRef tallReduce     = IpplTimings::getTimer("allReduce");
      20           48 :         static IpplTimings::TimerRef tscatter       = IpplTimings::getTimer("scatterR");
      21              : 
      22              :         // Scattering of particle positions in field
      23              :         // In case of first repartition we know the density from the
      24              :         // analytical expression and we use that for load balancing
      25              :         // and create particles. Note the particles are created only
      26              :         // after the first repartition and hence we cannot call scatter
      27              :         // before it.
      28           48 :         IpplTimings::startTimer(tscatter);
      29           48 :         if (!isFirstRepartition) {
      30           48 :             scatterR(R);
      31              :         }
      32              : 
      33           48 :         IpplTimings::stopTimer(tscatter);
      34              : 
      35           48 :         IpplTimings::startTimer(tbasicOp);
      36              : 
      37              :         // Get number of ranks
      38           48 :         auto& comm = fl.comm;
      39           48 :         int nprocs = comm.size();
      40              : 
      41              :         // Start with whole domain and total number of nodes
      42           96 :         std::vector<NDIndex<Dim>> domains = {fl.getDomain()};
      43           96 :         std::vector<int> procs            = {nprocs};
      44              : 
      45              :         // Arrays for reduction
      46           48 :         std::vector<Tf> reduced, reducedRank;
      47              : 
      48              :         // Start recursive repartition loop
      49           48 :         unsigned int it = 0;
      50           48 :         int maxprocs    = nprocs;
      51           48 :         IpplTimings::stopTimer(tbasicOp);
      52              : 
      53           96 :         while (maxprocs > 1) {
      54              :             // Find cut axis
      55           48 :             IpplTimings::startTimer(tbasicOp);
      56           48 :             int cutAxis = findCutAxis(domains[it]);
      57           48 :             IpplTimings::stopTimer(tbasicOp);
      58              : 
      59              :             // Reserve space
      60           48 :             IpplTimings::startTimer(tperpReduction);
      61           48 :             reduced.resize(domains[it][cutAxis].length());
      62           48 :             reducedRank.resize(domains[it][cutAxis].length());
      63              : 
      64           48 :             std::fill(reducedRank.begin(), reducedRank.end(), 0.0);
      65           48 :             std::fill(reduced.begin(), reduced.end(), 0.0);
      66              : 
      67              :             // Peform reduction with field of weights and communicate to the other ranks
      68           48 :             perpendicularReduction(reducedRank, cutAxis, domains[it]);
      69           48 :             IpplTimings::stopTimer(tperpReduction);
      70              : 
      71              :             // Communicate to all the reduced weights
      72           48 :             IpplTimings::startTimer(tallReduce);
      73           48 :             comm.allreduce(reducedRank.data(), reduced.data(), reducedRank.size(), std::plus<Tf>());
      74           48 :             IpplTimings::stopTimer(tallReduce);
      75              : 
      76              :             // Find median of reduced weights
      77           48 :             IpplTimings::startTimer(tbasicOp);
      78              :             // Initialize median to some value (1 is lower bound value)
      79           48 :             int median = 1;
      80           48 :             median     = findMedian(reduced);
      81           48 :             IpplTimings::stopTimer(tbasicOp);
      82              : 
      83              :             // Cut domains and procs
      84           48 :             IpplTimings::startTimer(tbasicOp);
      85           48 :             cutDomain(domains, procs, it, cutAxis, median);
      86              : 
      87              :             // Update max procs
      88           48 :             maxprocs = 0;
      89          144 :             for (unsigned int i = 0; i < procs.size(); i++) {
      90           96 :                 if (procs[i] > maxprocs) {
      91           48 :                     maxprocs = procs[i];
      92           48 :                     it       = i;
      93              :                 }
      94              :             }
      95           48 :             IpplTimings::stopTimer(tbasicOp);
      96              : 
      97              :             // Clear all arrays
      98           48 :             IpplTimings::startTimer(tperpReduction);
      99           48 :             reduced.clear();
     100           48 :             reducedRank.clear();
     101           48 :             IpplTimings::stopTimer(tperpReduction);
     102              :         }
     103              : 
     104              :         // Check that no plane was obtained in the repartition
     105           48 :         IpplTimings::startTimer(tbasicOp);
     106          144 :         for (const auto& domain : domains) {
     107          432 :             for (const auto& axis : domain) {
     108          336 :                 if (axis.length() == 1) {
     109            0 :                     return false;
     110              :                 }
     111              :             }
     112              :         }
     113              : 
     114              :         // Update FieldLayout with new indices
     115           48 :         fl.updateLayout(domains);
     116              : 
     117              :         // Update local field with new layout
     118           48 :         bf_m.updateLayout(fl);
     119           48 :         IpplTimings::stopTimer(tbasicOp);
     120              : 
     121           48 :         return true;
     122           48 :     }
     123              : 
     124              :     template <class Field, class Tp>
     125           48 :     int OrthogonalRecursiveBisection<Field, Tp>::findCutAxis(NDIndex<Dim>& dom) {
     126              :         // Find longest domain size
     127           48 :         return std::distance(dom.begin(), std::max_element(dom.begin(), dom.end(),
     128          120 :                                                            [&](const Index& a, const Index& b) {
     129            0 :                                                                return a.length() < b.length();
     130           48 :                                                            }));
     131              :     }
     132              : 
     133              :     template <class Field, class Tp>
     134           48 :     void OrthogonalRecursiveBisection<Field, Tp>::perpendicularReduction(
     135              :         std::vector<Tf>& rankWeights, unsigned int cutAxis, NDIndex<Dim>& dom) {
     136              :         // Check if domains overlap, if not no need for reduction
     137           48 :         NDIndex<Dim> lDom = bf_m.getOwned();
     138           48 :         if (lDom[cutAxis].first() > dom[cutAxis].last()
     139           48 :             || lDom[cutAxis].last() < dom[cutAxis].first()) {
     140            0 :             return;
     141              :         }
     142              : 
     143              :         // Get field's local weights
     144           48 :         int nghost      = bf_m.getNghost();
     145           48 :         const auto data = bf_m.getView();
     146              : 
     147              :         // Determine the iteration bounds of the reduction
     148           48 :         int cutAxisFirst =
     149           48 :             std::max(lDom[cutAxis].first(), dom[cutAxis].first()) - lDom[cutAxis].first() + nghost;
     150           48 :         int cutAxisLast =
     151           48 :             std::min(lDom[cutAxis].last(), dom[cutAxis].last()) - lDom[cutAxis].first() + nghost;
     152              : 
     153              :         // Set iterator for where to write in the reduced array
     154           48 :         unsigned int arrayStart = 0;
     155           48 :         if (dom[cutAxis].first() < lDom[cutAxis].first()) {
     156           24 :             arrayStart = lDom[cutAxis].first() - dom[cutAxis].first();
     157              :         }
     158              : 
     159              :         // Find all the perpendicular axes
     160              :         using exec_space = typename Field::execution_space;
     161              :         using index_type = typename RangePolicy<Dim, exec_space>::index_type;
     162              :         Kokkos::Array<index_type, Dim> begin, end;
     163          216 :         for (unsigned d = 0; d < Dim; d++) {
     164          168 :             if (d == cutAxis) {
     165           48 :                 continue;
     166              :             }
     167              : 
     168          120 :             int inf = std::max(lDom[d].first(), dom[d].first()) - lDom[d].first() + nghost;
     169          120 :             int sup = std::min(lDom[d].last(), dom[d].last()) - lDom[d].first() + nghost;
     170              :             // inf and sup bounds must be within the domain to reduce, if not no need to reduce
     171          120 :             if (sup < inf) {
     172            0 :                 return;
     173              :             }
     174              : 
     175          120 :             begin[d] = inf;
     176              :             // The +1 is for Kokkos loop
     177          120 :             end[d] = sup + 1;
     178              :         }
     179              : 
     180              :         // Iterate along cutAxis
     181          816 :         for (int i = cutAxisFirst; i <= cutAxisLast; i++) {
     182          768 :             begin[cutAxis] = i;
     183          768 :             end[cutAxis]   = i + 1;
     184              : 
     185              :             // Reducing over perpendicular plane defined by cutAxis
     186          768 :             Tf tempRes = Tf(0);
     187              : 
     188              :             using index_array_type = typename RangePolicy<Dim, exec_space>::index_array_type;
     189          768 :             ippl::parallel_reduce(
     190         1536 :                 "ORB weight reduction", createRangePolicy<Dim, exec_space>(begin, end),
     191       956160 :                 KOKKOS_LAMBDA(const index_array_type& args, Tf& weight) {
     192       477312 :                     weight += apply(data, args);
     193              :                 },
     194         1536 :                 Kokkos::Sum<Tf>(tempRes));
     195              : 
     196          768 :             Kokkos::fence();
     197              : 
     198          768 :             rankWeights[arrayStart++] = tempRes;
     199              :         }
     200           48 :     }
     201              : 
     202              :     template <class Field, class Tp>
     203           48 :     int OrthogonalRecursiveBisection<Field, Tp>::findMedian(std::vector<Tf>& w) {
     204              :         // Special case when array must be cut in half in order to not have planes
     205           48 :         if (w.size() == 4) {
     206            0 :             return 1;
     207              :         }
     208              :         // Get total sum of array
     209           48 :         Tf tot = std::accumulate(w.begin(), w.end(), Tf(0));
     210              : 
     211              :         // Find position of median as half of total in array
     212           48 :         Tf half = 0.5 * tot;
     213           48 :         Tf curr = Tf(0);
     214              :         // Do not need to iterate to full extent since it must not give planes
     215          792 :         for (unsigned int i = 0; i < w.size() - 1; i++) {
     216          792 :             curr += w[i];
     217          792 :             if (curr >= half) {
     218              :                 // If all particles are in the first plane, cut at 1 so to have size 2
     219           48 :                 if (i == 0) {
     220            0 :                     return 1;
     221              :                 }
     222           48 :                 Tf previous = curr - w[i];
     223              :                 // curr - half < half - previous
     224           48 :                 if ((curr + previous) <= tot
     225           24 :                     && curr != half) {  // if true then take current i, otherwise i-1
     226           24 :                     if (i == w.size() - 2) {
     227            0 :                         return (i - 1);
     228              :                     } else {
     229           24 :                         return i;
     230              :                     }
     231              :                 } else {
     232           24 :                     return (i > 1) ? (i - 1) : 1;
     233              :                 }
     234              :             }
     235              :         }
     236              :         // If all particles are in the last plane, cut two indices before the end so to have size 2
     237            0 :         return w.size() - 3;
     238              :     }
     239              : 
     240              :     template <class Field, class Tp>
     241           48 :     void OrthogonalRecursiveBisection<Field, Tp>::cutDomain(std::vector<NDIndex<Dim>>& domains,
     242              :                                                             std::vector<int>& procs, int it,
     243              :                                                             int cutAxis, int median) {
     244              :         // Cut domains[it] in half at median along cutAxis
     245           48 :         NDIndex<Dim> leftDom, rightDom;
     246           48 :         domains[it].split(leftDom, rightDom, cutAxis, median + domains[it][cutAxis].first());
     247           48 :         domains[it] = leftDom;
     248           48 :         domains.insert(domains.begin() + it + 1, rightDom);
     249              : 
     250              :         // Cut procs in half
     251           48 :         int temp  = procs[it];
     252           48 :         procs[it] = procs[it] / 2;
     253           48 :         procs.insert(procs.begin() + it + 1, temp - procs[it]);
     254           48 :     }
     255              : 
     256              :     template <class Field, class Tp>
     257              :     template <typename Attrib>
     258           48 :     void OrthogonalRecursiveBisection<Field, Tp>::scatterR(const Attrib& r) {
     259              :         using vector_type = typename mesh_type::vector_type;
     260              :         static_assert(
     261              :             Kokkos::SpaceAccessibility<typename Attrib::memory_space,
     262              :                                        typename Field::memory_space>::accessible,
     263              :             "Particle attribute memory space must be accessible from ORB field memory space");
     264              : 
     265              :         // Reset local field
     266           48 :         bf_m = 0.0;
     267              :         // Get local data
     268           48 :         auto view                      = bf_m.getView();
     269           48 :         const mesh_type& mesh          = bf_m.get_mesh();
     270           48 :         const FieldLayout<Dim>& layout = bf_m.getLayout();
     271           48 :         const NDIndex<Dim>& lDom       = layout.getLocalNDIndex();
     272           48 :         const int nghost               = bf_m.getNghost();
     273              : 
     274              :         // Get spacings
     275           48 :         const vector_type& dx     = mesh.getMeshSpacing();
     276           48 :         const vector_type& origin = mesh.getOrigin();
     277           48 :         const vector_type invdx   = 1.0 / dx;
     278              : 
     279              :         using policy_type = Kokkos::RangePolicy<size_t, typename Field::execution_space>;
     280              : 
     281           48 :         Kokkos::parallel_for(
     282           96 :             "ParticleAttrib::scatterR", policy_type(0, r.getParticleCount()),
     283         3168 :             KOKKOS_LAMBDA(const size_t idx) {
     284              :                 // Find nearest grid point
     285         3072 :                 Vector<Tp, Dim> l      = (r(idx) - origin) * invdx + 0.5;
     286         3072 :                 Vector<int, Dim> index = l;
     287         3072 :                 Vector<Tf, Dim> whi    = l - index;
     288         3072 :                 Vector<Tf, Dim> wlo    = 1.0 - whi;
     289              : 
     290         3072 :                 Vector<size_t, Dim> args = index - lDom.first() + nghost;
     291              : 
     292              :                 // Scatter
     293         3072 :                 scatterToField(std::make_index_sequence<1 << Dim>{}, view, wlo, whi, args);
     294         3072 :             });
     295              : 
     296           48 :         bf_m.accumulateHalo();
     297           48 :     }
     298              : 
     299              : }  // namespace ippl
        

Generated by: LCOV version 2.0-1