LCOV - code coverage report
Current view: top level - src/Particle - ParticleSpatialLayout.hpp (source / functions) Coverage Total Hit
Test: final_report.info Lines: 94.2 % 172 162
Test Date: 2025-07-19 00:25:23 Functions: 86.6 % 224 194

            Line data    Source code
       1              : //
       2              : // Class ParticleSpatialLayout
       3              : //   Particle layout based on spatial decomposition.
       4              : //
       5              : //   This is a specialized version of ParticleLayout, which places particles
       6              : //   on processors based on their spatial location relative to a fixed grid.
       7              : //   In particular, this can maintain particles on processors based on a
       8              : //   specified FieldLayout or RegionLayout, so that particles are always on
       9              : //   the same node as the node containing the Field region to which they are
      10              : //   local.  This may also be used if there is no associated Field at all,
      11              : //   in which case a grid is selected based on an even distribution of
      12              : //   particles among processors.
      13              : //
      14              : //   After each 'time step' in a calculation, which is defined as a period
      15              : //   in which the particle positions may change enough to affect the global
      16              : //   layout, the user must call the 'update' routine, which will move
      17              : //   particles between processors, etc.  After the Nth call to update, a
      18              : //   load balancing routine will be called instead.  The user may set the
      19              : //   frequency of load balancing (N), or may supply a function to
      20              : //   determine if load balancing should be done or not.
      21              : //
      22              : #include <memory>
      23              : #include <numeric>
      24              : #include <vector>
      25              : 
      26              : #include "Utility/IpplTimings.h"
      27              : 
      28              : #include "Communicate/Window.h"
      29              : 
      30              : 
      31              : namespace ippl {
      32              : 
      33              :     /*!
      34              :      * We need this struct since Kokkos parallel_scan only accepts
      35              :      * one variable of type ReturnType where to perform the reduction operation.
      36              :      * For more details, see
      37              :      * https://kokkos.github.io/kokkos-core-wiki/API/core/parallel-dispatch/parallel_scan.html.
      38              :      */
      39              :     struct increment_type {
      40              :         size_t count[2];
      41              :         
      42          168 :         KOKKOS_FUNCTION void init() {
      43          168 :             count[0] = 0;
      44          168 :             count[1] = 0;
      45          168 :         }
      46              : 
      47         8448 :         KOKKOS_INLINE_FUNCTION increment_type& operator+=(bool* values) {
      48         8448 :             count[0] += values[0];
      49         8448 :             count[1] += values[1];
      50         8448 :             return *this;
      51              :         }
      52              : 
      53              :         KOKKOS_INLINE_FUNCTION increment_type& operator+=(increment_type values) {
      54              :             count[0] += values.count[0];
      55              :             count[1] += values.count[1];
      56              :             return *this;
      57              :         }
      58              :     };
      59              : 
      60              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
      61          120 :     ParticleSpatialLayout<T, Dim, Mesh, Properties...>::ParticleSpatialLayout(FieldLayout<Dim>& fl,
      62              :                                                                               Mesh& mesh)
      63          120 :         : rlayout_m(fl, mesh)
      64          240 :         , flayout_m(fl)
      65              :     {   
      66          120 :         nRecvs_m.resize(Comm->size());
      67          120 :         if (Comm->size() > 1) {
      68          120 :             window_m.create(*Comm, nRecvs_m.begin(), nRecvs_m.end());
      69              :         }
      70          120 :     }
      71              : 
      72              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
      73           48 :     void ParticleSpatialLayout<T, Dim, Mesh, Properties...>::updateLayout(FieldLayout<Dim>& fl,
      74              :                                                                           Mesh& mesh) {
      75              :         //flayout_m = fl;
      76           48 :         rlayout_m.changeDomain(fl, mesh);
      77           48 :     }
      78              : 
      79              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
      80              :     template <class ParticleContainer>
      81          168 :     void ParticleSpatialLayout<T, Dim, Mesh, Properties...>::update(ParticleContainer& pc) {
      82              : 
      83              :         /* Apply Boundary Conditions */
      84          168 :         static IpplTimings::TimerRef ParticleBCTimer = IpplTimings::getTimer("particleBC");
      85          168 :         IpplTimings::startTimer(ParticleBCTimer);
      86          168 :         this->applyBC(pc.R, rlayout_m.getDomain());
      87          168 :         IpplTimings::stopTimer(ParticleBCTimer);
      88              : 
      89              :         /* Update Timer for the rest of the function */
      90          168 :         static IpplTimings::TimerRef ParticleUpdateTimer = IpplTimings::getTimer("updateParticle");
      91          168 :         IpplTimings::startTimer(ParticleUpdateTimer);
      92              :         
      93          168 :         int nRanks = Comm->size();
      94          168 :         if (nRanks < 2) {
      95            0 :             return;
      96              :         }
      97              : 
      98              :         /* particle MPI exchange:
      99              :          *   1. figure out which particles need to go where -> locateParticles(...)
     100              :          *   2. fill send buffer and send particles
     101              :          *   3. delete invalidated particles
     102              :          *   4. receive particles
     103              :          */
     104              : 
     105              :         // 1.  figure out which particles need to go where -> locateParticles(...) ============= //
     106              :         
     107          168 :         static IpplTimings::TimerRef locateTimer = IpplTimings::getTimer("locateParticles");
     108          168 :         IpplTimings::startTimer(locateTimer);
     109              : 
     110              :         /* Rank-local number of particles */
     111          168 :         size_type localnum = pc.getLocalNum();
     112              : 
     113              :         /* The indices correspond to the indices of the local particles,
     114              :          * the values correspond to the ranks to which the particles need to be sent
     115              :          */        
     116          168 :         locate_type particleRanks("particles' MPI ranks", localnum);
     117              : 
     118              :         /* The indices are the indices of the particles,
     119              :          * the boolean values describe whether the particle has left the current rank 
     120              :          * 0 --> particle valid (inside current rank)
     121              :          * 1 --> particle invalid (left rank)
     122              :          */
     123          168 :         bool_type invalidParticles("validity of particles", localnum);
     124              : 
     125              :         /* The indices are the MPI ranks,
     126              :          * the values are the number of particles are sent to that rank from myrank 
     127              :          */
     128          168 :         locate_type rankSendCount_dview("rankSendCount Device", nRanks);
     129              : 
     130              :         /* The indices have no particluar meaning, 
     131              :          * the values are the MPI ranks to which we need to send
     132              :          */
     133          168 :         locate_type destinationRanks_dview("destinationRanks Device", nRanks);
     134              : 
     135              :         /* nInvalid is the number of invalid particles
     136              :          * nDestinationRanks is the number of MPI ranks we need to send to
     137              :          */ 
     138          168 :         auto [nInvalid, nDestinationRanks] = 
     139          168 :             locateParticles(pc, 
     140              :                 particleRanks, invalidParticles, 
     141              :                 rankSendCount_dview, destinationRanks_dview);
     142              : 
     143              :         /* Host space copy of rankSendCount_dview */
     144          168 :         auto rankSendCount_hview = 
     145          504 :             Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rankSendCount_dview);
     146              :         
     147              :         /* Host Space copy of destinationRanks_dview */
     148          168 :         auto destinationRanks_hview = 
     149          336 :             Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), destinationRanks_dview);
     150              : 
     151          168 :         IpplTimings::stopTimer(locateTimer);
     152              : 
     153              :         // 2. fill send buffer and send particles =============================================== //
     154              : 
     155              :         // 2.1 Remote Memory Access window for one-sided communication
     156              :         
     157          168 :         static IpplTimings::TimerRef preprocTimer = IpplTimings::getTimer("sendPreprocess");
     158          168 :         IpplTimings::startTimer(preprocTimer);
     159              :        
     160          168 :         std::fill(nRecvs_m.begin(), nRecvs_m.end(), 0); 
     161              : 
     162          168 :         window_m.fence(0);
     163              :         
     164              :         // Prepare RMA window for the ranks we need to send to  
     165          468 :         for(size_t ridx=0; ridx < nDestinationRanks; ridx++){
     166          300 :             int rank = destinationRanks_hview[ridx];
     167          300 :             if (rank == Comm->rank()){
     168              :                 // we do not need to send to ourselves
     169          168 :                 continue;
     170              :             }
     171          264 :             window_m.put<size_type>(rankSendCount_hview(rank), rank, Comm->rank());
     172              :         }
     173          168 :         window_m.fence(0);
     174              : 
     175          168 :         IpplTimings::stopTimer(preprocTimer);
     176              : 
     177              :         // 2.2 Particle Sends 
     178              : 
     179          168 :         static IpplTimings::TimerRef sendTimer = IpplTimings::getTimer("particleSend");
     180          168 :         IpplTimings::startTimer(sendTimer);
     181              :         
     182          168 :         std::vector<MPI_Request> requests(0);
     183              : 
     184          168 :         int tag = Comm->next_tag(mpi::tag::P_SPATIAL_LAYOUT, mpi::tag::P_LAYOUT_CYCLE);
     185              : 
     186          600 :         for(size_t ridx=0; ridx < nDestinationRanks; ridx++){
     187          300 :             int rank = destinationRanks_hview[ridx];
     188          300 :             if(rank == Comm->rank()){
     189          168 :                continue;
     190              :             } 
     191          132 :             hash_type hash("hash", rankSendCount_hview(rank));
     192          132 :             fillHash(rank, particleRanks, hash);
     193          132 :             pc.sendToRank(rank, tag, requests, hash);
     194              :         }
     195              :        
     196          168 :         IpplTimings::stopTimer(sendTimer);
     197              : 
     198              : 
     199              :         // 3. Internal destruction of invalid particles ======================================= //
     200              :         
     201          168 :         static IpplTimings::TimerRef destroyTimer = IpplTimings::getTimer("particleDestroy");
     202          168 :         IpplTimings::startTimer(destroyTimer);
     203              : 
     204          168 :         pc.internalDestroy(invalidParticles, nInvalid);
     205          168 :         Kokkos::fence();
     206              : 
     207          168 :         IpplTimings::stopTimer(destroyTimer);
     208              :         
     209              :         // 4. Receive Particles ================================================================ // 
     210              :         
     211          168 :         static IpplTimings::TimerRef recvTimer = IpplTimings::getTimer("particleRecv");
     212          168 :         IpplTimings::startTimer(recvTimer);
     213              : 
     214          504 :         for (int rank = 0; rank < nRanks; ++rank) {
     215          336 :             if (nRecvs_m[rank] > 0) {
     216          132 :                 pc.recvFromRank(rank, tag, nRecvs_m[rank]);
     217              :             }
     218              :         }
     219          168 :         IpplTimings::stopTimer(recvTimer);
     220              : 
     221              : 
     222          168 :         IpplTimings::startTimer(sendTimer);
     223              : 
     224          168 :         if (requests.size() > 0) {
     225          132 :             MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
     226              :         }
     227          168 :         IpplTimings::stopTimer(sendTimer);
     228              : 
     229          168 :         IpplTimings::stopTimer(ParticleUpdateTimer);
     230          168 :     }
     231              : 
     232              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
     233              :     template <size_t... Idx>
     234              :     KOKKOS_INLINE_FUNCTION constexpr bool
     235       756224 :     ParticleSpatialLayout<T, Dim, Mesh, Properties...>::positionInRegion(
     236              :         const std::index_sequence<Idx...>&, const vector_type& pos, const region_type& region) {
     237       756224 :         return ((pos[Idx] > region[Idx].min()) && ...) && ((pos[Idx] <= region[Idx].max()) && ...);
     238              :     };
     239              : 
     240              :     
     241              :     /* Helper function that evaluates the total number of neighbors for the current rank in Dim dimensions.
     242              :     */
     243              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
     244          168 :     detail::size_type ParticleSpatialLayout<T, Dim, Mesh, Properties...>::getNeighborSize(
     245              :         const neighbor_list& neighbors) const {
     246          168 :         size_type totalSize = 0;
     247              : 
     248        30576 :         for (const auto& componentNeighbors : neighbors) {
     249        30408 :             totalSize += componentNeighbors.size();
     250              :         }
     251              : 
     252          168 :         return totalSize;
     253              :     }
     254              : 
     255              : 
     256              :     /**
     257              :      * @brief This function determines to which rank particles need to be sent after the iteration step.
     258              :      *        It starts by first scanning direct rank neighbors, and only does a global scan if there are still 
     259              :      *        unfound particles. It then calculates how many particles need to be sent to each rank and how many 
     260              :      *        ranks are sent to in total.
     261              :      *
     262              :      * @param pc           Particle Container
     263              :      * @param ranks        A vector the length of the number of particles on the current rank, where each value refers
     264              :      *                      to the new rank of the particle
     265              :      * @param invalid      A vector marking the particles that need to be sent away, and thus locally deleted
     266              :      * @param nSends_dview Device view the length of number of ranks, where each value determines the number 
     267              :      *                      of particles sent to that rank from the current rank
     268              :      * @param sends_dview  Device view for the number of ranks that are sent to from current rank
     269              :      *
     270              :      * @return tuple with the number of particles sent away and the number of ranks sent to
     271              :      */
     272              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
     273              :     template <typename ParticleContainer>
     274          168 :     std::pair<detail::size_type, detail::size_type> ParticleSpatialLayout<T, Dim, Mesh, Properties...>::locateParticles(
     275              :         const ParticleContainer& pc, locate_type& ranks, bool_type& invalid, 
     276              :         locate_type& nSends_dview, locate_type& sends_dview) const {
     277              : 
     278          168 :         auto positions           = pc.R.getView();
     279          168 :         region_view_type Regions = rlayout_m.getdLocalRegions();
     280              : 
     281              :         using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<2>, position_execution_space>;
     282              : 
     283          168 :         size_type myRank = Comm->rank();
     284              : 
     285              :         const auto is = std::make_index_sequence<Dim>{};
     286              : 
     287          168 :         const neighbor_list& neighbors = flayout_m.getNeighbors();
     288              : 
     289              :         /// outsideIds: Container of particle IDs that travelled outside of the neighborhood.
     290          168 :         locate_type outsideIds("Particles outside of neighborhood", size_type(pc.getLocalNum()));
     291              : 
     292              :         /// outsideCount: Tracks the number of particles that travelled outside of the neighborhood.
     293          168 :         size_type outsideCount       = 0;
     294              :         /// invalidCount: Tracks the number of particles that need to be sent to other ranks.
     295          168 :         size_type invalidCount       = 0;
     296              : 
     297              :         /// neighborSize: Size of a neighborhood in D dimentions.
     298          168 :         const size_type neighborSize = getNeighborSize(neighbors);
     299              : 
     300              :         /// neighbors_view: Kokkos view with the IDs of the neighboring MPI ranks.
     301          168 :         locate_type neighbors_view("Nearest neighbors IDs", neighborSize);
     302              : 
     303              :         /* red_val: Used to reduce both the number of invalid particles and the number of particles 
     304              :          * outside of the neighborhood (Kokkos::parallel_scan doesn't allow multiple reduction values, so we
     305              :          * use the helper class increment_type). First element updates InvalidCount, second
     306              :          * one updates outsideCount.
     307              :         */
     308              :         increment_type red_val;
     309          168 :         red_val.init();
     310              : 
     311          168 :         auto neighbors_mirror =
     312          336 :             Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), neighbors_view);
     313              : 
     314          168 :         size_t k = 0;
     315              : 
     316        30576 :         for (const auto& componentNeighbors : neighbors) {
     317        42128 :             for (size_t j = 0; j < componentNeighbors.size(); ++j) {
     318        11720 :                 neighbors_mirror(k) = componentNeighbors[j];
     319              :                 //std::cout << "Neighbor: " << neighbors_mirror(k) << std::endl;
     320        11720 :                 k++;
     321              :             }
     322              :         }
     323              : 
     324          168 :         Kokkos::deep_copy(neighbors_view, neighbors_mirror);
     325              : 
     326              :         /*! Begin Kokkos loop:
     327              :          * Step 1: search in current rank
     328              :          * Step 2: search in neighbors
     329              :          * Step 3: save information on whether the particle was located
     330              :          * Step 4: run additional loop on non-located particles
     331              :          */
     332          168 :         static IpplTimings::TimerRef neighborSearch = IpplTimings::getTimer("neighborSearch");
     333          168 :         IpplTimings::startTimer(neighborSearch);
     334              : 
     335          168 :         Kokkos::parallel_scan(
     336              :             "ParticleSpatialLayout::locateParticles()",
     337          336 :             Kokkos::RangePolicy<size_t>(0, ranks.extent(0)),
     338         8784 :             KOKKOS_LAMBDA(const size_type i, increment_type& val, const bool final) {
     339              :                 /* Step 1
     340              :                 * inCurr: True if the particle hasn't left the current MPI rank.
     341              :                 * inNeighbor: True if the particle is found in a neighboring rank.
     342              :                 * found: True either if inCurr = True or inNeighbor = True.
     343              :                 * increment: Helper variable to update red_val.
     344              :                 */
     345         8448 :                 bool inCurr = false;
     346         8448 :                 bool inNeighbor = false;
     347         8448 :                 bool found = false;
     348              :                 bool increment[2];
     349              : 
     350        25344 :                 inCurr = positionInRegion(is, positions(i), Regions(myRank)); 
     351              : 
     352         8448 :                 ranks(i)     = inCurr * myRank;
     353         8448 :                 invalid(i)   = !inCurr;
     354         8448 :                 found =  inCurr || found;
     355              : 
     356              :                 /// Step 2
     357       756224 :                 for (size_t j = 0; j < neighbors_view.extent(0); ++j) {
     358       747776 :                     size_type rank = neighbors_view(j);
     359              : 
     360      2243328 :                     inNeighbor = positionInRegion(is, positions(i), Regions(rank));
     361              : 
     362      1495552 :                     ranks(i) = !(inNeighbor) * ranks(i) + inNeighbor * rank;
     363       747776 :                     found =  inNeighbor || found;
     364              :                 }
     365              :                 /// Step 3
     366              :                 /* isOut: When the last thread has finished the search, checks whether the particle has been found 
     367              :                  * either in the current rank or in a neighboring one.
     368              :                  * Used to avoid race conditions when updating outsideIds.
     369              :                  */
     370         8448 :                 if(final && !found) {
     371            0 :                     outsideIds(val.count[1]) = i;
     372              :                 }
     373              :                 //outsideIds(val.count[1]) = i * isOut;
     374         8448 :                 increment[0] = invalid(i);
     375         8448 :                 increment[1] = !found;
     376         8448 :                 val += increment;
     377              : 
     378              :             },
     379              :             red_val);
     380              : 
     381          168 :         Kokkos::fence();
     382              : 
     383          168 :         invalidCount  = red_val.count[0];
     384          168 :         outsideCount  = red_val.count[1];
     385              : 
     386          168 :         IpplTimings::stopTimer(neighborSearch);
     387              : 
     388              :         /// Step 4 
     389          168 :         static IpplTimings::TimerRef nonNeighboringParticles = IpplTimings::getTimer("nonNeighboringParticles");
     390          168 :         IpplTimings::startTimer(nonNeighboringParticles);
     391          168 :         if (outsideCount > 0) {
     392            0 :             Kokkos::parallel_for(
     393              :                 "ParticleSpatialLayout::leftParticles()",
     394            0 :                 mdrange_type({0, 0}, {outsideCount, Regions.extent(0)}),
     395            0 :                 KOKKOS_LAMBDA(const size_t i, const size_type j) {
     396              :                     /// pID: (local) ID of the particle that is currently being searched.
     397            0 :                     size_type pId = outsideIds(i);
     398              :                     
     399              :                     /// inRegion: Checks whether particle pID is inside region j.
     400            0 :                     bool inRegion = positionInRegion(is, positions(pId), Regions(j));
     401            0 :                     if(inRegion){
     402            0 :                         ranks(pId) = j;
     403              :                     } 
     404              :                 });
     405            0 :             Kokkos::fence();
     406              :         }
     407          168 :         IpplTimings::stopTimer(nonNeighboringParticles);
     408              :         
     409          168 :         Kokkos::parallel_for("Calculate nSends",
     410          336 :             Kokkos::RangePolicy<size_t>(0, ranks.extent(0)),
     411        17232 :             KOKKOS_LAMBDA(const size_t i){
     412         8448 :                 size_type rank = ranks(i); 
     413        16896 :                 Kokkos::atomic_fetch_add(&nSends_dview(rank),1);
     414              :             }
     415              :         ); 
     416              :     
     417              :         // Number of Ranks we need to send to 
     418          168 :         Kokkos::View<size_type> rankSends("Number of Ranks we need to send to");
     419              :         
     420          168 :         Kokkos::parallel_for("Calculate sends",
     421          336 :                Kokkos::RangePolicy<size_t>(0, nSends_dview.extent(0)),
     422         1008 :                KOKKOS_LAMBDA(const size_t rank){
     423          672 :                    if(nSends_dview(rank) != 0){
     424          600 :                        size_type index = Kokkos::atomic_fetch_add(&rankSends(), 1);
     425          600 :                        sends_dview(index) = rank;
     426              :                    }
     427              :                }
     428              :         );
     429              :         size_type temp;
     430          168 :         Kokkos::deep_copy(temp, rankSends);
     431              : 
     432          336 :         return {invalidCount, temp};
     433          168 :     }
     434              : 
     435              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
     436          132 :     void ParticleSpatialLayout<T, Dim, Mesh, Properties...>::fillHash(int rank,
     437              :                                                                       const locate_type& ranks,
     438              :                                                                       hash_type& hash) {
     439              :         /* Compute the prefix sum and fill the hash
     440              :          */
     441              :         using policy_type = Kokkos::RangePolicy<position_execution_space>;
     442          132 :         Kokkos::parallel_scan(
     443          264 :             "ParticleSpatialLayout::fillHash()", policy_type(0, ranks.extent(0)),
     444        12784 :             KOKKOS_LAMBDA(const size_t i, int& idx, const bool final) {
     445         6260 :                 if (final) {
     446        12520 :                     if (rank == ranks(i)) {
     447         5764 :                         hash(idx) = i;
     448              :                     }
     449              :                 }
     450              : 
     451        12520 :                 if (rank == ranks(i)) {
     452         2882 :                     idx += 1;
     453              :                 }
     454              :             });
     455          132 :         Kokkos::fence();
     456              : 
     457          132 :     }
     458              : 
     459              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
     460              :     size_t ParticleSpatialLayout<T, Dim, Mesh, Properties...>::numberOfSends(
     461              :         int rank, const locate_type& ranks) {
     462              :         size_t nSends     = 0;
     463              :         using policy_type = Kokkos::RangePolicy<position_execution_space>;
     464              :         Kokkos::parallel_reduce(
     465              :             "ParticleSpatialLayout::numberOfSends()", policy_type(0, ranks.extent(0)),
     466              :             KOKKOS_LAMBDA(const size_t i, size_t& num) { num += size_t(rank == ranks(i)); },
     467              :             nSends);
     468              :         Kokkos::fence();
     469              :         return nSends;
     470              :     }
     471              : 
     472              : }  // namespace ippl
        

Generated by: LCOV version 2.0-1