LCOV - code coverage report
Current view: top level - src/Particle - ParticleSpatialLayout.hpp (source / functions) Coverage Total Hit
Test: final_report.info Lines: 11.1 % 171 19
Test Date: 2025-07-17 08:40:11 Functions: 21.4 % 224 48

            Line data    Source code
       1              : //
       2              : // Class ParticleSpatialLayout
       3              : //   Particle layout based on spatial decomposition.
       4              : //
       5              : //   This is a specialized version of ParticleLayout, which places particles
       6              : //   on processors based on their spatial location relative to a fixed grid.
       7              : //   In particular, this can maintain particles on processors based on a
       8              : //   specified FieldLayout or RegionLayout, so that particles are always on
       9              : //   the same node as the node containing the Field region to which they are
      10              : //   local.  This may also be used if there is no associated Field at all,
      11              : //   in which case a grid is selected based on an even distribution of
      12              : //   particles among processors.
      13              : //
      14              : //   After each 'time step' in a calculation, which is defined as a period
      15              : //   in which the particle positions may change enough to affect the global
      16              : //   layout, the user must call the 'update' routine, which will move
      17              : //   particles between processors, etc.  After the Nth call to update, a
      18              : //   load balancing routine will be called instead.  The user may set the
      19              : //   frequency of load balancing (N), or may supply a function to
      20              : //   determine if load balancing should be done or not.
      21              : //
      22              : #include <memory>
      23              : #include <numeric>
      24              : #include <vector>
      25              : 
      26              : #include "Utility/IpplTimings.h"
      27              : 
      28              : #include "Communicate/Window.h"
      29              : 
      30              : 
      31              : namespace ippl {
      32              : 
      33              :     /*!
      34              :      * We need this struct since Kokkos parallel_scan only accepts
      35              :      * one variable of type ReturnType where to perform the reduction operation.
      36              :      * For more details, see
      37              :      * https://kokkos.github.io/kokkos-core-wiki/API/core/parallel-dispatch/parallel_scan.html.
      38              :      */
      39              :     struct increment_type {
      40              :         size_t count[2];
      41              :         
      42            0 :         KOKKOS_FUNCTION void init() {
      43            0 :             count[0] = 0;
      44            0 :             count[1] = 0;
      45            0 :         }
      46              : 
      47            0 :         KOKKOS_INLINE_FUNCTION increment_type& operator+=(bool* values) {
      48            0 :             count[0] += values[0];
      49            0 :             count[1] += values[1];
      50            0 :             return *this;
      51              :         }
      52              : 
      53              :         KOKKOS_INLINE_FUNCTION increment_type& operator+=(increment_type values) {
      54              :             count[0] += values.count[0];
      55              :             count[1] += values.count[1];
      56              :             return *this;
      57              :         }
      58              :     };
      59              : 
      60              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
      61           60 :     ParticleSpatialLayout<T, Dim, Mesh, Properties...>::ParticleSpatialLayout(FieldLayout<Dim>& fl,
      62              :                                                                               Mesh& mesh)
      63           60 :         : rlayout_m(fl, mesh)
      64          120 :         , flayout_m(fl)
      65              :     {   
      66           60 :         nRecvs_m.resize(Comm->size());
      67           60 :         window_m.create(*Comm, nRecvs_m.begin(), nRecvs_m.end());
      68           60 :     }
      69              : 
      70              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
      71           24 :     void ParticleSpatialLayout<T, Dim, Mesh, Properties...>::updateLayout(FieldLayout<Dim>& fl,
      72              :                                                                           Mesh& mesh) {
      73              :         //flayout_m = fl;
      74           24 :         rlayout_m.changeDomain(fl, mesh);
      75           24 :     }
      76              : 
      77              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
      78              :     template <class ParticleContainer>
      79           84 :     void ParticleSpatialLayout<T, Dim, Mesh, Properties...>::update(ParticleContainer& pc) {
      80              : 
      81              :         /* Apply Boundary Conditions */
      82           84 :         static IpplTimings::TimerRef ParticleBCTimer = IpplTimings::getTimer("particleBC");
      83           84 :         IpplTimings::startTimer(ParticleBCTimer);
      84           84 :         this->applyBC(pc.R, rlayout_m.getDomain());
      85           84 :         IpplTimings::stopTimer(ParticleBCTimer);
      86              : 
      87              :         /* Update Timer for the rest of the function */
      88           84 :         static IpplTimings::TimerRef ParticleUpdateTimer = IpplTimings::getTimer("updateParticle");
      89           84 :         IpplTimings::startTimer(ParticleUpdateTimer);
      90              :         
      91           84 :         int nRanks = Comm->size();
      92           84 :         if (nRanks < 2) {
      93           84 :             return;
      94              :         }
      95              : 
      96              :         /* particle MPI exchange:
      97              :          *   1. figure out which particles need to go where -> locateParticles(...)
      98              :          *   2. fill send buffer and send particles
      99              :          *   3. delete invalidated particles
     100              :          *   4. receive particles
     101              :          */
     102              : 
     103              :         // 1.  figure out which particles need to go where -> locateParticles(...) ============= //
     104              :         
     105            0 :         static IpplTimings::TimerRef locateTimer = IpplTimings::getTimer("locateParticles");
     106            0 :         IpplTimings::startTimer(locateTimer);
     107              : 
     108              :         /* Rank-local number of particles */
     109            0 :         size_type localnum = pc.getLocalNum();
     110              : 
     111              :         /* The indices correspond to the indices of the local particles,
     112              :          * the values correspond to the ranks to which the particles need to be sent
     113              :          */        
     114            0 :         locate_type particleRanks("particles' MPI ranks", localnum);
     115              : 
     116              :         /* The indices are the indices of the particles,
     117              :          * the boolean values describe whether the particle has left the current rank 
     118              :          * 0 --> particle valid (inside current rank)
     119              :          * 1 --> particle invalid (left rank)
     120              :          */
     121            0 :         bool_type invalidParticles("validity of particles", localnum);
     122              : 
     123              :         /* The indices are the MPI ranks,
     124              :          * the values are the number of particles are sent to that rank from myrank 
     125              :          */
     126            0 :         locate_type rankSendCount_dview("rankSendCount Device", nRanks);
     127              : 
     128              :         /* The indices have no particluar meaning, 
     129              :          * the values are the MPI ranks to which we need to send
     130              :          */
     131            0 :         locate_type destinationRanks_dview("destinationRanks Device", nRanks);
     132              : 
     133              :         /* nInvalid is the number of invalid particles
     134              :          * nDestinationRanks is the number of MPI ranks we need to send to
     135              :          */ 
     136            0 :         auto [nInvalid, nDestinationRanks] = 
     137            0 :             locateParticles(pc, 
     138              :                 particleRanks, invalidParticles, 
     139              :                 rankSendCount_dview, destinationRanks_dview);
     140              : 
     141              :         /* Host space copy of rankSendCount_dview */
     142            0 :         auto rankSendCount_hview = 
     143            0 :             Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rankSendCount_dview);
     144              :         
     145              :         /* Host Space copy of destinationRanks_dview */
     146            0 :         auto destinationRanks_hview = 
     147            0 :             Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), destinationRanks_dview);
     148              : 
     149            0 :         IpplTimings::stopTimer(locateTimer);
     150              : 
     151              :         // 2. fill send buffer and send particles =============================================== //
     152              : 
     153              :         // 2.1 Remote Memory Access window for one-sided communication
     154              :         
     155            0 :         static IpplTimings::TimerRef preprocTimer = IpplTimings::getTimer("sendPreprocess");
     156            0 :         IpplTimings::startTimer(preprocTimer);
     157              :        
     158            0 :         std::fill(nRecvs_m.begin(), nRecvs_m.end(), 0); 
     159              : 
     160            0 :         window_m.fence(0);
     161              :         
     162              :         // Prepare RMA window for the ranks we need to send to  
     163            0 :         for(size_t ridx=0; ridx < nDestinationRanks; ridx++){
     164            0 :             int rank = destinationRanks_hview[ridx];
     165            0 :             if (rank == Comm->rank()){
     166              :                 // we do not need to send to ourselves
     167            0 :                 continue;
     168              :             }
     169            0 :             window_m.put<size_type>(rankSendCount_hview(rank), rank, Comm->rank());
     170              :         }
     171            0 :         window_m.fence(0);
     172              : 
     173            0 :         IpplTimings::stopTimer(preprocTimer);
     174              : 
     175              :         // 2.2 Particle Sends 
     176              : 
     177            0 :         static IpplTimings::TimerRef sendTimer = IpplTimings::getTimer("particleSend");
     178            0 :         IpplTimings::startTimer(sendTimer);
     179              :         
     180            0 :         std::vector<MPI_Request> requests(0);
     181              : 
     182            0 :         int tag = Comm->next_tag(mpi::tag::P_SPATIAL_LAYOUT, mpi::tag::P_LAYOUT_CYCLE);
     183              : 
     184            0 :         for(size_t ridx=0; ridx < nDestinationRanks; ridx++){
     185            0 :             int rank = destinationRanks_hview[ridx];
     186            0 :             if(rank == Comm->rank()){
     187            0 :                continue;
     188              :             } 
     189            0 :             hash_type hash("hash", rankSendCount_hview(rank));
     190            0 :             fillHash(rank, particleRanks, hash);
     191            0 :             pc.sendToRank(rank, tag, requests, hash);
     192              :         }
     193              :        
     194            0 :         IpplTimings::stopTimer(sendTimer);
     195              : 
     196              : 
     197              :         // 3. Internal destruction of invalid particles ======================================= //
     198              :         
     199            0 :         static IpplTimings::TimerRef destroyTimer = IpplTimings::getTimer("particleDestroy");
     200            0 :         IpplTimings::startTimer(destroyTimer);
     201              : 
     202            0 :         pc.internalDestroy(invalidParticles, nInvalid);
     203            0 :         Kokkos::fence();
     204              : 
     205            0 :         IpplTimings::stopTimer(destroyTimer);
     206              :         
     207              :         // 4. Receive Particles ================================================================ // 
     208              :         
     209            0 :         static IpplTimings::TimerRef recvTimer = IpplTimings::getTimer("particleRecv");
     210            0 :         IpplTimings::startTimer(recvTimer);
     211              : 
     212            0 :         for (int rank = 0; rank < nRanks; ++rank) {
     213            0 :             if (nRecvs_m[rank] > 0) {
     214            0 :                 pc.recvFromRank(rank, tag, nRecvs_m[rank]);
     215              :             }
     216              :         }
     217            0 :         IpplTimings::stopTimer(recvTimer);
     218              : 
     219              : 
     220            0 :         IpplTimings::startTimer(sendTimer);
     221              : 
     222            0 :         if (requests.size() > 0) {
     223            0 :             MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
     224              :         }
     225            0 :         IpplTimings::stopTimer(sendTimer);
     226              : 
     227            0 :         IpplTimings::stopTimer(ParticleUpdateTimer);
     228            0 :     }
     229              : 
     230              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
     231              :     template <size_t... Idx>
     232              :     KOKKOS_INLINE_FUNCTION constexpr bool
     233            0 :     ParticleSpatialLayout<T, Dim, Mesh, Properties...>::positionInRegion(
     234              :         const std::index_sequence<Idx...>&, const vector_type& pos, const region_type& region) {
     235            0 :         return ((pos[Idx] > region[Idx].min()) && ...) && ((pos[Idx] <= region[Idx].max()) && ...);
     236              :     };
     237              : 
     238              :     
     239              :     /* Helper function that evaluates the total number of neighbors for the current rank in Dim dimensions.
     240              :     */
     241              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
     242            0 :     detail::size_type ParticleSpatialLayout<T, Dim, Mesh, Properties...>::getNeighborSize(
     243              :         const neighbor_list& neighbors) const {
     244            0 :         size_type totalSize = 0;
     245              : 
     246            0 :         for (const auto& componentNeighbors : neighbors) {
     247            0 :             totalSize += componentNeighbors.size();
     248              :         }
     249              : 
     250            0 :         return totalSize;
     251              :     }
     252              : 
     253              : 
     254              :     /**
     255              :      * @brief This function determines to which rank particles need to be sent after the iteration step.
     256              :      *        It starts by first scanning direct rank neighbors, and only does a global scan if there are still 
     257              :      *        unfound particles. It then calculates how many particles need to be sent to each rank and how many 
     258              :      *        ranks are sent to in total.
     259              :      *
     260              :      * @param pc           Particle Container
     261              :      * @param ranks        A vector the length of the number of particles on the current rank, where each value refers
     262              :      *                      to the new rank of the particle
     263              :      * @param invalid      A vector marking the particles that need to be sent away, and thus locally deleted
     264              :      * @param nSends_dview Device view the length of number of ranks, where each value determines the number 
     265              :      *                      of particles sent to that rank from the current rank
     266              :      * @param sends_dview  Device view for the number of ranks that are sent to from current rank
     267              :      *
     268              :      * @return tuple with the number of particles sent away and the number of ranks sent to
     269              :      */
     270              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
     271              :     template <typename ParticleContainer>
     272            0 :     std::pair<detail::size_type, detail::size_type> ParticleSpatialLayout<T, Dim, Mesh, Properties...>::locateParticles(
     273              :         const ParticleContainer& pc, locate_type& ranks, bool_type& invalid, 
     274              :         locate_type& nSends_dview, locate_type& sends_dview) const {
     275              : 
     276            0 :         auto positions           = pc.R.getView();
     277            0 :         region_view_type Regions = rlayout_m.getdLocalRegions();
     278              : 
     279              :         using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<2>, position_execution_space>;
     280              : 
     281            0 :         size_type myRank = Comm->rank();
     282              : 
     283              :         const auto is = std::make_index_sequence<Dim>{};
     284              : 
     285            0 :         const neighbor_list& neighbors = flayout_m.getNeighbors();
     286              : 
     287              :         /// outsideIds: Container of particle IDs that travelled outside of the neighborhood.
     288            0 :         locate_type outsideIds("Particles outside of neighborhood", size_type(pc.getLocalNum()));
     289              : 
     290              :         /// outsideCount: Tracks the number of particles that travelled outside of the neighborhood.
     291            0 :         size_type outsideCount       = 0;
     292              :         /// invalidCount: Tracks the number of particles that need to be sent to other ranks.
     293            0 :         size_type invalidCount       = 0;
     294              : 
     295              :         /// neighborSize: Size of a neighborhood in D dimentions.
     296            0 :         const size_type neighborSize = getNeighborSize(neighbors);
     297              : 
     298              :         /// neighbors_view: Kokkos view with the IDs of the neighboring MPI ranks.
     299            0 :         locate_type neighbors_view("Nearest neighbors IDs", neighborSize);
     300              : 
     301              :         /* red_val: Used to reduce both the number of invalid particles and the number of particles 
     302              :          * outside of the neighborhood (Kokkos::parallel_scan doesn't allow multiple reduction values, so we
     303              :          * use the helper class increment_type). First element updates InvalidCount, second
     304              :          * one updates outsideCount.
     305              :         */
     306              :         increment_type red_val;
     307            0 :         red_val.init();
     308              : 
     309            0 :         auto neighbors_mirror =
     310            0 :             Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), neighbors_view);
     311              : 
     312            0 :         size_t k = 0;
     313              : 
     314            0 :         for (const auto& componentNeighbors : neighbors) {
     315            0 :             for (size_t j = 0; j < componentNeighbors.size(); ++j) {
     316            0 :                 neighbors_mirror(k) = componentNeighbors[j];
     317              :                 //std::cout << "Neighbor: " << neighbors_mirror(k) << std::endl;
     318            0 :                 k++;
     319              :             }
     320              :         }
     321              : 
     322            0 :         Kokkos::deep_copy(neighbors_view, neighbors_mirror);
     323              : 
     324              :         /*! Begin Kokkos loop:
     325              :          * Step 1: search in current rank
     326              :          * Step 2: search in neighbors
     327              :          * Step 3: save information on whether the particle was located
     328              :          * Step 4: run additional loop on non-located particles
     329              :          */
     330            0 :         static IpplTimings::TimerRef neighborSearch = IpplTimings::getTimer("neighborSearch");
     331            0 :         IpplTimings::startTimer(neighborSearch);
     332              : 
     333            0 :         Kokkos::parallel_scan(
     334              :             "ParticleSpatialLayout::locateParticles()",
     335            0 :             Kokkos::RangePolicy<size_t>(0, ranks.extent(0)),
     336            0 :             KOKKOS_LAMBDA(const size_type i, increment_type& val, const bool final) {
     337              :                 /* Step 1
     338              :                 * inCurr: True if the particle hasn't left the current MPI rank.
     339              :                 * inNeighbor: True if the particle is found in a neighboring rank.
     340              :                 * found: True either if inCurr = True or inNeighbor = True.
     341              :                 * increment: Helper variable to update red_val.
     342              :                 */
     343            0 :                 bool inCurr = false;
     344            0 :                 bool inNeighbor = false;
     345            0 :                 bool found = false;
     346              :                 bool increment[2];
     347              : 
     348            0 :                 inCurr = positionInRegion(is, positions(i), Regions(myRank)); 
     349              : 
     350            0 :                 ranks(i)     = inCurr * myRank;
     351            0 :                 invalid(i)   = !inCurr;
     352            0 :                 found =  inCurr || found;
     353              : 
     354              :                 /// Step 2
     355            0 :                 for (size_t j = 0; j < neighbors_view.extent(0); ++j) {
     356            0 :                     size_type rank = neighbors_view(j);
     357              : 
     358            0 :                     inNeighbor = positionInRegion(is, positions(i), Regions(rank));
     359              : 
     360            0 :                     ranks(i) = !(inNeighbor) * ranks(i) + inNeighbor * rank;
     361            0 :                     found =  inNeighbor || found;
     362              :                 }
     363              :                 /// Step 3
     364              :                 /* isOut: When the last thread has finished the search, checks whether the particle has been found 
     365              :                  * either in the current rank or in a neighboring one.
     366              :                  * Used to avoid race conditions when updating outsideIds.
     367              :                  */
     368            0 :                 if(final && !found) {
     369            0 :                     outsideIds(val.count[1]) = i;
     370              :                 }
     371              :                 //outsideIds(val.count[1]) = i * isOut;
     372            0 :                 increment[0] = invalid(i);
     373            0 :                 increment[1] = !found;
     374            0 :                 val += increment;
     375              : 
     376              :             },
     377              :             red_val);
     378              : 
     379            0 :         Kokkos::fence();
     380              : 
     381            0 :         invalidCount  = red_val.count[0];
     382            0 :         outsideCount  = red_val.count[1];
     383              : 
     384            0 :         IpplTimings::stopTimer(neighborSearch);
     385              : 
     386              :         /// Step 4 
     387            0 :         static IpplTimings::TimerRef nonNeighboringParticles = IpplTimings::getTimer("nonNeighboringParticles");
     388            0 :         IpplTimings::startTimer(nonNeighboringParticles);
     389            0 :         if (outsideCount > 0) {
     390            0 :             Kokkos::parallel_for(
     391              :                 "ParticleSpatialLayout::leftParticles()",
     392            0 :                 mdrange_type({0, 0}, {outsideCount, Regions.extent(0)}),
     393            0 :                 KOKKOS_LAMBDA(const size_t i, const size_type j) {
     394              :                     /// pID: (local) ID of the particle that is currently being searched.
     395            0 :                     size_type pId = outsideIds(i);
     396              :                     
     397              :                     /// inRegion: Checks whether particle pID is inside region j.
     398            0 :                     bool inRegion = positionInRegion(is, positions(pId), Regions(j));
     399            0 :                     if(inRegion){
     400            0 :                         ranks(pId) = j;
     401              :                     } 
     402              :                 });
     403            0 :             Kokkos::fence();
     404              :         }
     405            0 :         IpplTimings::stopTimer(nonNeighboringParticles);
     406              :         
     407            0 :         Kokkos::parallel_for("Calculate nSends",
     408            0 :             Kokkos::RangePolicy<size_t>(0, ranks.extent(0)),
     409            0 :             KOKKOS_LAMBDA(const size_t i){
     410            0 :                 size_type rank = ranks(i); 
     411            0 :                 Kokkos::atomic_fetch_add(&nSends_dview(rank),1);
     412              :             }
     413              :         ); 
     414              :     
     415              :         // Number of Ranks we need to send to 
     416            0 :         Kokkos::View<size_type> rankSends("Number of Ranks we need to send to");
     417              :         
     418            0 :         Kokkos::parallel_for("Calculate sends",
     419            0 :                Kokkos::RangePolicy<size_t>(0, nSends_dview.extent(0)),
     420            0 :                KOKKOS_LAMBDA(const size_t rank){
     421            0 :                    if(nSends_dview(rank) != 0){
     422            0 :                        size_type index = Kokkos::atomic_fetch_add(&rankSends(), 1);
     423            0 :                        sends_dview(index) = rank;
     424              :                    }
     425              :                }
     426              :         );
     427              :         size_type temp;
     428            0 :         Kokkos::deep_copy(temp, rankSends);
     429              : 
     430            0 :         return {invalidCount, temp};
     431            0 :     }
     432              : 
     433              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
     434            0 :     void ParticleSpatialLayout<T, Dim, Mesh, Properties...>::fillHash(int rank,
     435              :                                                                       const locate_type& ranks,
     436              :                                                                       hash_type& hash) {
     437              :         /* Compute the prefix sum and fill the hash
     438              :          */
     439              :         using policy_type = Kokkos::RangePolicy<position_execution_space>;
     440            0 :         Kokkos::parallel_scan(
     441            0 :             "ParticleSpatialLayout::fillHash()", policy_type(0, ranks.extent(0)),
     442            0 :             KOKKOS_LAMBDA(const size_t i, int& idx, const bool final) {
     443            0 :                 if (final) {
     444            0 :                     if (rank == ranks(i)) {
     445            0 :                         hash(idx) = i;
     446              :                     }
     447              :                 }
     448              : 
     449            0 :                 if (rank == ranks(i)) {
     450            0 :                     idx += 1;
     451              :                 }
     452              :             });
     453            0 :         Kokkos::fence();
     454              : 
     455            0 :     }
     456              : 
     457              :     template <typename T, unsigned Dim, class Mesh, typename... Properties>
     458              :     size_t ParticleSpatialLayout<T, Dim, Mesh, Properties...>::numberOfSends(
     459              :         int rank, const locate_type& ranks) {
     460              :         size_t nSends     = 0;
     461              :         using policy_type = Kokkos::RangePolicy<position_execution_space>;
     462              :         Kokkos::parallel_reduce(
     463              :             "ParticleSpatialLayout::numberOfSends()", policy_type(0, ranks.extent(0)),
     464              :             KOKKOS_LAMBDA(const size_t i, size_t& num) { num += size_t(rank == ranks(i)); },
     465              :             nSends);
     466              :         Kokkos::fence();
     467              :         return nSends;
     468              :     }
     469              : 
     470              : }  // namespace ippl
        

Generated by: LCOV version 2.0-1