LCOV - code coverage report
Current view: top level - src/Decomposition - OrthogonalRecursiveBisection.hpp (source / functions) Coverage Total Hit
Test: final_report.info Lines: 35.5 % 152 54
Test Date: 2025-07-09 17:06:48 Functions: 47.7 % 88 42

            Line data    Source code
       1              : #include "Utility/IpplTimings.h"
       2              : 
       3              : namespace ippl {
       4              : 
       5              :     template <class Field, class Tp>
       6           24 :     void OrthogonalRecursiveBisection<Field, Tp>::initialize(FieldLayout<Dim>& fl, mesh_type& mesh,
       7              :                                                              const Field& rho) {
       8           24 :         bf_m.initialize(mesh, fl);
       9           24 :         bf_m = rho;
      10           24 :     }
      11              : 
      12              :     template <class Field, class Tp>
      13              :     template <typename Attrib>
      14           24 :     bool OrthogonalRecursiveBisection<Field, Tp>::binaryRepartition(
      15              :         const Attrib& R, FieldLayout<Dim>& fl, const bool& isFirstRepartition) {
      16              :         // Timings
      17           24 :         static IpplTimings::TimerRef tbasicOp       = IpplTimings::getTimer("basicOperations");
      18           24 :         static IpplTimings::TimerRef tperpReduction = IpplTimings::getTimer("perpReduction");
      19           24 :         static IpplTimings::TimerRef tallReduce     = IpplTimings::getTimer("allReduce");
      20           24 :         static IpplTimings::TimerRef tscatter       = IpplTimings::getTimer("scatterR");
      21              : 
      22              :         // Scattering of particle positions in field
      23              :         // In case of first repartition we know the density from the
      24              :         // analytical expression and we use that for load balancing
      25              :         // and create particles. Note the particles are created only
      26              :         // after the first repartition and hence we cannot call scatter
      27              :         // before it.
      28           24 :         IpplTimings::startTimer(tscatter);
      29           24 :         if (!isFirstRepartition) {
      30           24 :             scatterR(R);
      31              :         }
      32              : 
      33           24 :         IpplTimings::stopTimer(tscatter);
      34              : 
      35           24 :         IpplTimings::startTimer(tbasicOp);
      36              : 
      37              :         // Get number of ranks
      38           24 :         auto& comm = fl.comm;
      39           24 :         int nprocs = comm.size();
      40              : 
      41              :         // Start with whole domain and total number of nodes
      42           48 :         std::vector<NDIndex<Dim>> domains = {fl.getDomain()};
      43           48 :         std::vector<int> procs            = {nprocs};
      44              : 
      45              :         // Arrays for reduction
      46           24 :         std::vector<Tf> reduced, reducedRank;
      47              : 
      48              :         // Start recursive repartition loop
      49           24 :         unsigned int it = 0;
      50           24 :         int maxprocs    = nprocs;
      51           24 :         IpplTimings::stopTimer(tbasicOp);
      52              : 
      53           24 :         while (maxprocs > 1) {
      54              :             // Find cut axis
      55            0 :             IpplTimings::startTimer(tbasicOp);
      56            0 :             int cutAxis = findCutAxis(domains[it]);
      57            0 :             IpplTimings::stopTimer(tbasicOp);
      58              : 
      59              :             // Reserve space
      60            0 :             IpplTimings::startTimer(tperpReduction);
      61            0 :             reduced.resize(domains[it][cutAxis].length());
      62            0 :             reducedRank.resize(domains[it][cutAxis].length());
      63              : 
      64            0 :             std::fill(reducedRank.begin(), reducedRank.end(), 0.0);
      65            0 :             std::fill(reduced.begin(), reduced.end(), 0.0);
      66              : 
      67              :             // Peform reduction with field of weights and communicate to the other ranks
      68            0 :             perpendicularReduction(reducedRank, cutAxis, domains[it]);
      69            0 :             IpplTimings::stopTimer(tperpReduction);
      70              : 
      71              :             // Communicate to all the reduced weights
      72            0 :             IpplTimings::startTimer(tallReduce);
      73            0 :             comm.allreduce(reducedRank.data(), reduced.data(), reducedRank.size(), std::plus<Tf>());
      74            0 :             IpplTimings::stopTimer(tallReduce);
      75              : 
      76              :             // Find median of reduced weights
      77            0 :             IpplTimings::startTimer(tbasicOp);
      78              :             // Initialize median to some value (1 is lower bound value)
      79            0 :             int median = 1;
      80            0 :             median     = findMedian(reduced);
      81            0 :             IpplTimings::stopTimer(tbasicOp);
      82              : 
      83              :             // Cut domains and procs
      84            0 :             IpplTimings::startTimer(tbasicOp);
      85            0 :             cutDomain(domains, procs, it, cutAxis, median);
      86              : 
      87              :             // Update max procs
      88            0 :             maxprocs = 0;
      89            0 :             for (unsigned int i = 0; i < procs.size(); i++) {
      90            0 :                 if (procs[i] > maxprocs) {
      91            0 :                     maxprocs = procs[i];
      92            0 :                     it       = i;
      93              :                 }
      94              :             }
      95            0 :             IpplTimings::stopTimer(tbasicOp);
      96              : 
      97              :             // Clear all arrays
      98            0 :             IpplTimings::startTimer(tperpReduction);
      99            0 :             reduced.clear();
     100            0 :             reducedRank.clear();
     101            0 :             IpplTimings::stopTimer(tperpReduction);
     102              :         }
     103              : 
     104              :         // Check that no plane was obtained in the repartition
     105           24 :         IpplTimings::startTimer(tbasicOp);
     106           48 :         for (const auto& domain : domains) {
     107          108 :             for (const auto& axis : domain) {
     108           84 :                 if (axis.length() == 1) {
     109            0 :                     return false;
     110              :                 }
     111              :             }
     112              :         }
     113              : 
     114              :         // Update FieldLayout with new indices
     115           24 :         fl.updateLayout(domains);
     116              : 
     117              :         // Update local field with new layout
     118           24 :         bf_m.updateLayout(fl);
     119           24 :         IpplTimings::stopTimer(tbasicOp);
     120              : 
     121           24 :         return true;
     122           24 :     }
     123              : 
     124              :     template <class Field, class Tp>
     125            0 :     int OrthogonalRecursiveBisection<Field, Tp>::findCutAxis(NDIndex<Dim>& dom) {
     126              :         // Find longest domain size
     127            0 :         return std::distance(dom.begin(), std::max_element(dom.begin(), dom.end(),
     128            0 :                                                            [&](const Index& a, const Index& b) {
     129            0 :                                                                return a.length() < b.length();
     130            0 :                                                            }));
     131              :     }
     132              : 
     133              :     template <class Field, class Tp>
     134            0 :     void OrthogonalRecursiveBisection<Field, Tp>::perpendicularReduction(
     135              :         std::vector<Tf>& rankWeights, unsigned int cutAxis, NDIndex<Dim>& dom) {
     136              :         // Check if domains overlap, if not no need for reduction
     137            0 :         NDIndex<Dim> lDom = bf_m.getOwned();
     138            0 :         if (lDom[cutAxis].first() > dom[cutAxis].last()
     139            0 :             || lDom[cutAxis].last() < dom[cutAxis].first()) {
     140            0 :             return;
     141              :         }
     142              : 
     143              :         // Get field's local weights
     144            0 :         int nghost      = bf_m.getNghost();
     145            0 :         const auto data = bf_m.getView();
     146              : 
     147              :         // Determine the iteration bounds of the reduction
     148            0 :         int cutAxisFirst =
     149            0 :             std::max(lDom[cutAxis].first(), dom[cutAxis].first()) - lDom[cutAxis].first() + nghost;
     150            0 :         int cutAxisLast =
     151            0 :             std::min(lDom[cutAxis].last(), dom[cutAxis].last()) - lDom[cutAxis].first() + nghost;
     152              : 
     153              :         // Set iterator for where to write in the reduced array
     154            0 :         unsigned int arrayStart = 0;
     155            0 :         if (dom[cutAxis].first() < lDom[cutAxis].first()) {
     156            0 :             arrayStart = lDom[cutAxis].first() - dom[cutAxis].first();
     157              :         }
     158              : 
     159              :         // Find all the perpendicular axes
     160              :         using exec_space = typename Field::execution_space;
     161              :         using index_type = typename RangePolicy<Dim, exec_space>::index_type;
     162              :         Kokkos::Array<index_type, Dim> begin, end;
     163            0 :         for (unsigned d = 0; d < Dim; d++) {
     164            0 :             if (d == cutAxis) {
     165            0 :                 continue;
     166              :             }
     167              : 
     168            0 :             int inf = std::max(lDom[d].first(), dom[d].first()) - lDom[d].first() + nghost;
     169            0 :             int sup = std::min(lDom[d].last(), dom[d].last()) - lDom[d].first() + nghost;
     170              :             // inf and sup bounds must be within the domain to reduce, if not no need to reduce
     171            0 :             if (sup < inf) {
     172            0 :                 return;
     173              :             }
     174              : 
     175            0 :             begin[d] = inf;
     176              :             // The +1 is for Kokkos loop
     177            0 :             end[d] = sup + 1;
     178              :         }
     179              : 
     180              :         // Iterate along cutAxis
     181            0 :         for (int i = cutAxisFirst; i <= cutAxisLast; i++) {
     182            0 :             begin[cutAxis] = i;
     183            0 :             end[cutAxis]   = i + 1;
     184              : 
     185              :             // Reducing over perpendicular plane defined by cutAxis
     186            0 :             Tf tempRes = Tf(0);
     187              : 
     188              :             using index_array_type = typename RangePolicy<Dim, exec_space>::index_array_type;
     189            0 :             ippl::parallel_reduce(
     190            0 :                 "ORB weight reduction", createRangePolicy<Dim, exec_space>(begin, end),
     191            0 :                 KOKKOS_LAMBDA(const index_array_type& args, Tf& weight) {
     192            0 :                     weight += apply(data, args);
     193              :                 },
     194            0 :                 Kokkos::Sum<Tf>(tempRes));
     195              : 
     196            0 :             Kokkos::fence();
     197              : 
     198            0 :             rankWeights[arrayStart++] = tempRes;
     199              :         }
     200            0 :     }
     201              : 
     202              :     template <class Field, class Tp>
     203            0 :     int OrthogonalRecursiveBisection<Field, Tp>::findMedian(std::vector<Tf>& w) {
     204              :         // Special case when array must be cut in half in order to not have planes
     205            0 :         if (w.size() == 4) {
     206            0 :             return 1;
     207              :         }
     208              :         // Get total sum of array
     209            0 :         Tf tot = std::accumulate(w.begin(), w.end(), Tf(0));
     210              : 
     211              :         // Find position of median as half of total in array
     212            0 :         Tf half = 0.5 * tot;
     213            0 :         Tf curr = Tf(0);
     214              :         // Do not need to iterate to full extent since it must not give planes
     215            0 :         for (unsigned int i = 0; i < w.size() - 1; i++) {
     216            0 :             curr += w[i];
     217            0 :             if (curr >= half) {
     218              :                 // If all particles are in the first plane, cut at 1 so to have size 2
     219            0 :                 if (i == 0) {
     220            0 :                     return 1;
     221              :                 }
     222            0 :                 Tf previous = curr - w[i];
     223              :                 // curr - half < half - previous
     224            0 :                 if ((curr + previous) <= tot
     225            0 :                     && curr != half) {  // if true then take current i, otherwise i-1
     226            0 :                     if (i == w.size() - 2) {
     227            0 :                         return (i - 1);
     228              :                     } else {
     229            0 :                         return i;
     230              :                     }
     231              :                 } else {
     232            0 :                     return (i > 1) ? (i - 1) : 1;
     233              :                 }
     234              :             }
     235              :         }
     236              :         // If all particles are in the last plane, cut two indices before the end so to have size 2
     237            0 :         return w.size() - 3;
     238              :     }
     239              : 
     240              :     template <class Field, class Tp>
     241            0 :     void OrthogonalRecursiveBisection<Field, Tp>::cutDomain(std::vector<NDIndex<Dim>>& domains,
     242              :                                                             std::vector<int>& procs, int it,
     243              :                                                             int cutAxis, int median) {
     244              :         // Cut domains[it] in half at median along cutAxis
     245            0 :         NDIndex<Dim> leftDom, rightDom;
     246            0 :         domains[it].split(leftDom, rightDom, cutAxis, median + domains[it][cutAxis].first());
     247            0 :         domains[it] = leftDom;
     248            0 :         domains.insert(domains.begin() + it + 1, rightDom);
     249              : 
     250              :         // Cut procs in half
     251            0 :         int temp  = procs[it];
     252            0 :         procs[it] = procs[it] / 2;
     253            0 :         procs.insert(procs.begin() + it + 1, temp - procs[it]);
     254            0 :     }
     255              : 
     256              :     template <class Field, class Tp>
     257              :     template <typename Attrib>
     258           24 :     void OrthogonalRecursiveBisection<Field, Tp>::scatterR(const Attrib& r) {
     259              :         using vector_type = typename mesh_type::vector_type;
     260              :         static_assert(
     261              :             Kokkos::SpaceAccessibility<typename Attrib::memory_space,
     262              :                                        typename Field::memory_space>::accessible,
     263              :             "Particle attribute memory space must be accessible from ORB field memory space");
     264              : 
     265              :         // Reset local field
     266           24 :         bf_m = 0.0;
     267              :         // Get local data
     268           24 :         auto view                      = bf_m.getView();
     269           24 :         const mesh_type& mesh          = bf_m.get_mesh();
     270           24 :         const FieldLayout<Dim>& layout = bf_m.getLayout();
     271           24 :         const NDIndex<Dim>& lDom       = layout.getLocalNDIndex();
     272           24 :         const int nghost               = bf_m.getNghost();
     273              : 
     274              :         // Get spacings
     275           24 :         const vector_type& dx     = mesh.getMeshSpacing();
     276           24 :         const vector_type& origin = mesh.getOrigin();
     277           24 :         const vector_type invdx   = 1.0 / dx;
     278              : 
     279              :         using policy_type = Kokkos::RangePolicy<size_t, typename Field::execution_space>;
     280              : 
     281           24 :         Kokkos::parallel_for(
     282           48 :             "ParticleAttrib::scatterR", policy_type(0, r.getParticleCount()),
     283         3120 :             KOKKOS_LAMBDA(const size_t idx) {
     284              :                 // Find nearest grid point
     285         3072 :                 Vector<Tp, Dim> l      = (r(idx) - origin) * invdx + 0.5;
     286         3072 :                 Vector<int, Dim> index = l;
     287         3072 :                 Vector<Tf, Dim> whi    = l - index;
     288         3072 :                 Vector<Tf, Dim> wlo    = 1.0 - whi;
     289              : 
     290         3072 :                 Vector<size_t, Dim> args = index - lDom.first() + nghost;
     291              : 
     292              :                 // Scatter
     293         3072 :                 scatterToField(std::make_index_sequence<1 << Dim>{}, view, wlo, whi, args);
     294         3072 :             });
     295              : 
     296           24 :         bf_m.accumulateHalo();
     297           24 :     }
     298              : 
     299              : }  // namespace ippl
        

Generated by: LCOV version 2.0-1