Line data Source code
1 : //
2 : // Class ParticleSpatialLayout
3 : // Particle layout based on spatial decomposition.
4 : //
5 : // This is a specialized version of ParticleLayout, which places particles
6 : // on processors based on their spatial location relative to a fixed grid.
7 : // In particular, this can maintain particles on processors based on a
8 : // specified FieldLayout or RegionLayout, so that particles are always on
9 : // the same node as the node containing the Field region to which they are
10 : // local. This may also be used if there is no associated Field at all,
11 : // in which case a grid is selected based on an even distribution of
12 : // particles among processors.
13 : //
14 : // After each 'time step' in a calculation, which is defined as a period
15 : // in which the particle positions may change enough to affect the global
16 : // layout, the user must call the 'update' routine, which will move
17 : // particles between processors, etc. After the Nth call to update, a
18 : // load balancing routine will be called instead. The user may set the
19 : // frequency of load balancing (N), or may supply a function to
20 : // determine if load balancing should be done or not.
21 : //
22 : #include <memory>
23 : #include <numeric>
24 : #include <vector>
25 :
26 : #include "Utility/IpplTimings.h"
27 :
28 : #include "Communicate/Window.h"
29 :
30 :
31 : namespace ippl {
32 :
33 : /*!
34 : * We need this struct since Kokkos parallel_scan only accepts
35 : * one variable of type ReturnType where to perform the reduction operation.
36 : * For more details, see
37 : * https://kokkos.github.io/kokkos-core-wiki/API/core/parallel-dispatch/parallel_scan.html.
38 : */
39 : struct increment_type {
40 : size_t count[2];
41 :
42 0 : KOKKOS_FUNCTION void init() {
43 0 : count[0] = 0;
44 0 : count[1] = 0;
45 0 : }
46 :
47 0 : KOKKOS_INLINE_FUNCTION increment_type& operator+=(bool* values) {
48 0 : count[0] += values[0];
49 0 : count[1] += values[1];
50 0 : return *this;
51 : }
52 :
53 : KOKKOS_INLINE_FUNCTION increment_type& operator+=(increment_type values) {
54 : count[0] += values.count[0];
55 : count[1] += values.count[1];
56 : return *this;
57 : }
58 : };
59 :
60 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
61 60 : ParticleSpatialLayout<T, Dim, Mesh, Properties...>::ParticleSpatialLayout(FieldLayout<Dim>& fl,
62 : Mesh& mesh)
63 60 : : rlayout_m(fl, mesh)
64 120 : , flayout_m(fl)
65 : {
66 60 : nRecvs_m.resize(Comm->size());
67 60 : window_m.create(*Comm, nRecvs_m.begin(), nRecvs_m.end());
68 60 : }
69 :
70 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
71 24 : void ParticleSpatialLayout<T, Dim, Mesh, Properties...>::updateLayout(FieldLayout<Dim>& fl,
72 : Mesh& mesh) {
73 : //flayout_m = fl;
74 24 : rlayout_m.changeDomain(fl, mesh);
75 24 : }
76 :
77 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
78 : template <class ParticleContainer>
79 84 : void ParticleSpatialLayout<T, Dim, Mesh, Properties...>::update(ParticleContainer& pc) {
80 :
81 : /* Apply Boundary Conditions */
82 84 : static IpplTimings::TimerRef ParticleBCTimer = IpplTimings::getTimer("particleBC");
83 84 : IpplTimings::startTimer(ParticleBCTimer);
84 84 : this->applyBC(pc.R, rlayout_m.getDomain());
85 84 : IpplTimings::stopTimer(ParticleBCTimer);
86 :
87 : /* Update Timer for the rest of the function */
88 84 : static IpplTimings::TimerRef ParticleUpdateTimer = IpplTimings::getTimer("updateParticle");
89 84 : IpplTimings::startTimer(ParticleUpdateTimer);
90 :
91 84 : int nRanks = Comm->size();
92 84 : if (nRanks < 2) {
93 84 : return;
94 : }
95 :
96 : /* particle MPI exchange:
97 : * 1. figure out which particles need to go where -> locateParticles(...)
98 : * 2. fill send buffer and send particles
99 : * 3. delete invalidated particles
100 : * 4. receive particles
101 : */
102 :
103 : // 1. figure out which particles need to go where -> locateParticles(...) ============= //
104 :
105 0 : static IpplTimings::TimerRef locateTimer = IpplTimings::getTimer("locateParticles");
106 0 : IpplTimings::startTimer(locateTimer);
107 :
108 : /* Rank-local number of particles */
109 0 : size_type localnum = pc.getLocalNum();
110 :
111 : /* The indices correspond to the indices of the local particles,
112 : * the values correspond to the ranks to which the particles need to be sent
113 : */
114 0 : locate_type particleRanks("particles' MPI ranks", localnum);
115 :
116 : /* The indices are the indices of the particles,
117 : * the boolean values describe whether the particle has left the current rank
118 : * 0 --> particle valid (inside current rank)
119 : * 1 --> particle invalid (left rank)
120 : */
121 0 : bool_type invalidParticles("validity of particles", localnum);
122 :
123 : /* The indices are the MPI ranks,
124 : * the values are the number of particles are sent to that rank from myrank
125 : */
126 0 : locate_type rankSendCount_dview("rankSendCount Device", nRanks);
127 :
128 : /* The indices have no particluar meaning,
129 : * the values are the MPI ranks to which we need to send
130 : */
131 0 : locate_type destinationRanks_dview("destinationRanks Device", nRanks);
132 :
133 : /* nInvalid is the number of invalid particles
134 : * nDestinationRanks is the number of MPI ranks we need to send to
135 : */
136 0 : auto [nInvalid, nDestinationRanks] =
137 0 : locateParticles(pc,
138 : particleRanks, invalidParticles,
139 : rankSendCount_dview, destinationRanks_dview);
140 :
141 : /* Host space copy of rankSendCount_dview */
142 0 : auto rankSendCount_hview =
143 0 : Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rankSendCount_dview);
144 :
145 : /* Host Space copy of destinationRanks_dview */
146 0 : auto destinationRanks_hview =
147 0 : Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), destinationRanks_dview);
148 :
149 0 : IpplTimings::stopTimer(locateTimer);
150 :
151 : // 2. fill send buffer and send particles =============================================== //
152 :
153 : // 2.1 Remote Memory Access window for one-sided communication
154 :
155 0 : static IpplTimings::TimerRef preprocTimer = IpplTimings::getTimer("sendPreprocess");
156 0 : IpplTimings::startTimer(preprocTimer);
157 :
158 0 : std::fill(nRecvs_m.begin(), nRecvs_m.end(), 0);
159 :
160 0 : window_m.fence(0);
161 :
162 : // Prepare RMA window for the ranks we need to send to
163 0 : for(size_t ridx=0; ridx < nDestinationRanks; ridx++){
164 0 : int rank = destinationRanks_hview[ridx];
165 0 : if (rank == Comm->rank()){
166 : // we do not need to send to ourselves
167 0 : continue;
168 : }
169 0 : window_m.put<size_type>(rankSendCount_hview(rank), rank, Comm->rank());
170 : }
171 0 : window_m.fence(0);
172 :
173 0 : IpplTimings::stopTimer(preprocTimer);
174 :
175 : // 2.2 Particle Sends
176 :
177 0 : static IpplTimings::TimerRef sendTimer = IpplTimings::getTimer("particleSend");
178 0 : IpplTimings::startTimer(sendTimer);
179 :
180 0 : std::vector<MPI_Request> requests(0);
181 :
182 0 : int tag = Comm->next_tag(mpi::tag::P_SPATIAL_LAYOUT, mpi::tag::P_LAYOUT_CYCLE);
183 :
184 0 : for(size_t ridx=0; ridx < nDestinationRanks; ridx++){
185 0 : int rank = destinationRanks_hview[ridx];
186 0 : if(rank == Comm->rank()){
187 0 : continue;
188 : }
189 0 : hash_type hash("hash", rankSendCount_hview(rank));
190 0 : fillHash(rank, particleRanks, hash);
191 0 : pc.sendToRank(rank, tag, requests, hash);
192 : }
193 :
194 0 : IpplTimings::stopTimer(sendTimer);
195 :
196 :
197 : // 3. Internal destruction of invalid particles ======================================= //
198 :
199 0 : static IpplTimings::TimerRef destroyTimer = IpplTimings::getTimer("particleDestroy");
200 0 : IpplTimings::startTimer(destroyTimer);
201 :
202 0 : pc.internalDestroy(invalidParticles, nInvalid);
203 0 : Kokkos::fence();
204 :
205 0 : IpplTimings::stopTimer(destroyTimer);
206 :
207 : // 4. Receive Particles ================================================================ //
208 :
209 0 : static IpplTimings::TimerRef recvTimer = IpplTimings::getTimer("particleRecv");
210 0 : IpplTimings::startTimer(recvTimer);
211 :
212 0 : for (int rank = 0; rank < nRanks; ++rank) {
213 0 : if (nRecvs_m[rank] > 0) {
214 0 : pc.recvFromRank(rank, tag, nRecvs_m[rank]);
215 : }
216 : }
217 0 : IpplTimings::stopTimer(recvTimer);
218 :
219 :
220 0 : IpplTimings::startTimer(sendTimer);
221 :
222 0 : if (requests.size() > 0) {
223 0 : MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
224 : }
225 0 : IpplTimings::stopTimer(sendTimer);
226 :
227 0 : IpplTimings::stopTimer(ParticleUpdateTimer);
228 0 : }
229 :
230 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
231 : template <size_t... Idx>
232 : KOKKOS_INLINE_FUNCTION constexpr bool
233 0 : ParticleSpatialLayout<T, Dim, Mesh, Properties...>::positionInRegion(
234 : const std::index_sequence<Idx...>&, const vector_type& pos, const region_type& region) {
235 0 : return ((pos[Idx] > region[Idx].min()) && ...) && ((pos[Idx] <= region[Idx].max()) && ...);
236 : };
237 :
238 :
239 : /* Helper function that evaluates the total number of neighbors for the current rank in Dim dimensions.
240 : */
241 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
242 0 : detail::size_type ParticleSpatialLayout<T, Dim, Mesh, Properties...>::getNeighborSize(
243 : const neighbor_list& neighbors) const {
244 0 : size_type totalSize = 0;
245 :
246 0 : for (const auto& componentNeighbors : neighbors) {
247 0 : totalSize += componentNeighbors.size();
248 : }
249 :
250 0 : return totalSize;
251 : }
252 :
253 :
254 : /**
255 : * @brief This function determines to which rank particles need to be sent after the iteration step.
256 : * It starts by first scanning direct rank neighbors, and only does a global scan if there are still
257 : * unfound particles. It then calculates how many particles need to be sent to each rank and how many
258 : * ranks are sent to in total.
259 : *
260 : * @param pc Particle Container
261 : * @param ranks A vector the length of the number of particles on the current rank, where each value refers
262 : * to the new rank of the particle
263 : * @param invalid A vector marking the particles that need to be sent away, and thus locally deleted
264 : * @param nSends_dview Device view the length of number of ranks, where each value determines the number
265 : * of particles sent to that rank from the current rank
266 : * @param sends_dview Device view for the number of ranks that are sent to from current rank
267 : *
268 : * @return tuple with the number of particles sent away and the number of ranks sent to
269 : */
270 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
271 : template <typename ParticleContainer>
272 0 : std::pair<detail::size_type, detail::size_type> ParticleSpatialLayout<T, Dim, Mesh, Properties...>::locateParticles(
273 : const ParticleContainer& pc, locate_type& ranks, bool_type& invalid,
274 : locate_type& nSends_dview, locate_type& sends_dview) const {
275 :
276 0 : auto positions = pc.R.getView();
277 0 : region_view_type Regions = rlayout_m.getdLocalRegions();
278 :
279 : using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<2>, position_execution_space>;
280 :
281 0 : size_type myRank = Comm->rank();
282 :
283 : const auto is = std::make_index_sequence<Dim>{};
284 :
285 0 : const neighbor_list& neighbors = flayout_m.getNeighbors();
286 :
287 : /// outsideIds: Container of particle IDs that travelled outside of the neighborhood.
288 0 : locate_type outsideIds("Particles outside of neighborhood", size_type(pc.getLocalNum()));
289 :
290 : /// outsideCount: Tracks the number of particles that travelled outside of the neighborhood.
291 0 : size_type outsideCount = 0;
292 : /// invalidCount: Tracks the number of particles that need to be sent to other ranks.
293 0 : size_type invalidCount = 0;
294 :
295 : /// neighborSize: Size of a neighborhood in D dimentions.
296 0 : const size_type neighborSize = getNeighborSize(neighbors);
297 :
298 : /// neighbors_view: Kokkos view with the IDs of the neighboring MPI ranks.
299 0 : locate_type neighbors_view("Nearest neighbors IDs", neighborSize);
300 :
301 : /* red_val: Used to reduce both the number of invalid particles and the number of particles
302 : * outside of the neighborhood (Kokkos::parallel_scan doesn't allow multiple reduction values, so we
303 : * use the helper class increment_type). First element updates InvalidCount, second
304 : * one updates outsideCount.
305 : */
306 : increment_type red_val;
307 0 : red_val.init();
308 :
309 0 : auto neighbors_mirror =
310 0 : Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), neighbors_view);
311 :
312 0 : size_t k = 0;
313 :
314 0 : for (const auto& componentNeighbors : neighbors) {
315 0 : for (size_t j = 0; j < componentNeighbors.size(); ++j) {
316 0 : neighbors_mirror(k) = componentNeighbors[j];
317 : //std::cout << "Neighbor: " << neighbors_mirror(k) << std::endl;
318 0 : k++;
319 : }
320 : }
321 :
322 0 : Kokkos::deep_copy(neighbors_view, neighbors_mirror);
323 :
324 : /*! Begin Kokkos loop:
325 : * Step 1: search in current rank
326 : * Step 2: search in neighbors
327 : * Step 3: save information on whether the particle was located
328 : * Step 4: run additional loop on non-located particles
329 : */
330 0 : static IpplTimings::TimerRef neighborSearch = IpplTimings::getTimer("neighborSearch");
331 0 : IpplTimings::startTimer(neighborSearch);
332 :
333 0 : Kokkos::parallel_scan(
334 : "ParticleSpatialLayout::locateParticles()",
335 0 : Kokkos::RangePolicy<size_t>(0, ranks.extent(0)),
336 0 : KOKKOS_LAMBDA(const size_type i, increment_type& val, const bool final) {
337 : /* Step 1
338 : * inCurr: True if the particle hasn't left the current MPI rank.
339 : * inNeighbor: True if the particle is found in a neighboring rank.
340 : * found: True either if inCurr = True or inNeighbor = True.
341 : * increment: Helper variable to update red_val.
342 : */
343 0 : bool inCurr = false;
344 0 : bool inNeighbor = false;
345 0 : bool found = false;
346 : bool increment[2];
347 :
348 0 : inCurr = positionInRegion(is, positions(i), Regions(myRank));
349 :
350 0 : ranks(i) = inCurr * myRank;
351 0 : invalid(i) = !inCurr;
352 0 : found = inCurr || found;
353 :
354 : /// Step 2
355 0 : for (size_t j = 0; j < neighbors_view.extent(0); ++j) {
356 0 : size_type rank = neighbors_view(j);
357 :
358 0 : inNeighbor = positionInRegion(is, positions(i), Regions(rank));
359 :
360 0 : ranks(i) = !(inNeighbor) * ranks(i) + inNeighbor * rank;
361 0 : found = inNeighbor || found;
362 : }
363 : /// Step 3
364 : /* isOut: When the last thread has finished the search, checks whether the particle has been found
365 : * either in the current rank or in a neighboring one.
366 : * Used to avoid race conditions when updating outsideIds.
367 : */
368 0 : if(final && !found) {
369 0 : outsideIds(val.count[1]) = i;
370 : }
371 : //outsideIds(val.count[1]) = i * isOut;
372 0 : increment[0] = invalid(i);
373 0 : increment[1] = !found;
374 0 : val += increment;
375 :
376 : },
377 : red_val);
378 :
379 0 : Kokkos::fence();
380 :
381 0 : invalidCount = red_val.count[0];
382 0 : outsideCount = red_val.count[1];
383 :
384 0 : IpplTimings::stopTimer(neighborSearch);
385 :
386 : /// Step 4
387 0 : static IpplTimings::TimerRef nonNeighboringParticles = IpplTimings::getTimer("nonNeighboringParticles");
388 0 : IpplTimings::startTimer(nonNeighboringParticles);
389 0 : if (outsideCount > 0) {
390 0 : Kokkos::parallel_for(
391 : "ParticleSpatialLayout::leftParticles()",
392 0 : mdrange_type({0, 0}, {outsideCount, Regions.extent(0)}),
393 0 : KOKKOS_LAMBDA(const size_t i, const size_type j) {
394 : /// pID: (local) ID of the particle that is currently being searched.
395 0 : size_type pId = outsideIds(i);
396 :
397 : /// inRegion: Checks whether particle pID is inside region j.
398 0 : bool inRegion = positionInRegion(is, positions(pId), Regions(j));
399 0 : if(inRegion){
400 0 : ranks(pId) = j;
401 : }
402 : });
403 0 : Kokkos::fence();
404 : }
405 0 : IpplTimings::stopTimer(nonNeighboringParticles);
406 :
407 0 : Kokkos::parallel_for("Calculate nSends",
408 0 : Kokkos::RangePolicy<size_t>(0, ranks.extent(0)),
409 0 : KOKKOS_LAMBDA(const size_t i){
410 0 : size_type rank = ranks(i);
411 0 : Kokkos::atomic_fetch_add(&nSends_dview(rank),1);
412 : }
413 : );
414 :
415 : // Number of Ranks we need to send to
416 0 : Kokkos::View<size_type> rankSends("Number of Ranks we need to send to");
417 :
418 0 : Kokkos::parallel_for("Calculate sends",
419 0 : Kokkos::RangePolicy<size_t>(0, nSends_dview.extent(0)),
420 0 : KOKKOS_LAMBDA(const size_t rank){
421 0 : if(nSends_dview(rank) != 0){
422 0 : size_type index = Kokkos::atomic_fetch_add(&rankSends(), 1);
423 0 : sends_dview(index) = rank;
424 : }
425 : }
426 : );
427 : size_type temp;
428 0 : Kokkos::deep_copy(temp, rankSends);
429 :
430 0 : return {invalidCount, temp};
431 0 : }
432 :
433 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
434 0 : void ParticleSpatialLayout<T, Dim, Mesh, Properties...>::fillHash(int rank,
435 : const locate_type& ranks,
436 : hash_type& hash) {
437 : /* Compute the prefix sum and fill the hash
438 : */
439 : using policy_type = Kokkos::RangePolicy<position_execution_space>;
440 0 : Kokkos::parallel_scan(
441 0 : "ParticleSpatialLayout::fillHash()", policy_type(0, ranks.extent(0)),
442 0 : KOKKOS_LAMBDA(const size_t i, int& idx, const bool final) {
443 0 : if (final) {
444 0 : if (rank == ranks(i)) {
445 0 : hash(idx) = i;
446 : }
447 : }
448 :
449 0 : if (rank == ranks(i)) {
450 0 : idx += 1;
451 : }
452 : });
453 0 : Kokkos::fence();
454 :
455 0 : }
456 :
457 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
458 : size_t ParticleSpatialLayout<T, Dim, Mesh, Properties...>::numberOfSends(
459 : int rank, const locate_type& ranks) {
460 : size_t nSends = 0;
461 : using policy_type = Kokkos::RangePolicy<position_execution_space>;
462 : Kokkos::parallel_reduce(
463 : "ParticleSpatialLayout::numberOfSends()", policy_type(0, ranks.extent(0)),
464 : KOKKOS_LAMBDA(const size_t i, size_t& num) { num += size_t(rank == ranks(i)); },
465 : nSends);
466 : Kokkos::fence();
467 : return nSends;
468 : }
469 :
470 : } // namespace ippl
|