Line data Source code
1 : //
2 : // Class ParticleSpatialLayout
3 : // Particle layout based on spatial decomposition.
4 : //
5 : // This is a specialized version of ParticleLayout, which places particles
6 : // on processors based on their spatial location relative to a fixed grid.
7 : // In particular, this can maintain particles on processors based on a
8 : // specified FieldLayout or RegionLayout, so that particles are always on
9 : // the same node as the node containing the Field region to which they are
10 : // local. This may also be used if there is no associated Field at all,
11 : // in which case a grid is selected based on an even distribution of
12 : // particles among processors.
13 : //
14 : // After each 'time step' in a calculation, which is defined as a period
15 : // in which the particle positions may change enough to affect the global
16 : // layout, the user must call the 'update' routine, which will move
17 : // particles between processors, etc. After the Nth call to update, a
18 : // load balancing routine will be called instead. The user may set the
19 : // frequency of load balancing (N), or may supply a function to
20 : // determine if load balancing should be done or not.
21 : //
22 : #include <memory>
23 : #include <numeric>
24 : #include <vector>
25 :
26 : #include "Utility/IpplTimings.h"
27 :
28 : #include "Communicate/Window.h"
29 :
30 :
31 : namespace ippl {
32 :
33 : /*!
34 : * We need this struct since Kokkos parallel_scan only accepts
35 : * one variable of type ReturnType where to perform the reduction operation.
36 : * For more details, see
37 : * https://kokkos.github.io/kokkos-core-wiki/API/core/parallel-dispatch/parallel_scan.html.
38 : */
39 : struct increment_type {
40 : size_t count[2];
41 :
42 168 : KOKKOS_FUNCTION void init() {
43 168 : count[0] = 0;
44 168 : count[1] = 0;
45 168 : }
46 :
47 8448 : KOKKOS_INLINE_FUNCTION increment_type& operator+=(bool* values) {
48 8448 : count[0] += values[0];
49 8448 : count[1] += values[1];
50 8448 : return *this;
51 : }
52 :
53 : KOKKOS_INLINE_FUNCTION increment_type& operator+=(increment_type values) {
54 : count[0] += values.count[0];
55 : count[1] += values.count[1];
56 : return *this;
57 : }
58 : };
59 :
60 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
61 120 : ParticleSpatialLayout<T, Dim, Mesh, Properties...>::ParticleSpatialLayout(FieldLayout<Dim>& fl,
62 : Mesh& mesh)
63 120 : : rlayout_m(fl, mesh)
64 240 : , flayout_m(fl)
65 : {
66 120 : nRecvs_m.resize(Comm->size());
67 120 : if (Comm->size() > 1) {
68 120 : window_m.create(*Comm, nRecvs_m.begin(), nRecvs_m.end());
69 : }
70 120 : }
71 :
72 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
73 48 : void ParticleSpatialLayout<T, Dim, Mesh, Properties...>::updateLayout(FieldLayout<Dim>& fl,
74 : Mesh& mesh) {
75 : //flayout_m = fl;
76 48 : rlayout_m.changeDomain(fl, mesh);
77 48 : }
78 :
79 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
80 : template <class ParticleContainer>
81 168 : void ParticleSpatialLayout<T, Dim, Mesh, Properties...>::update(ParticleContainer& pc) {
82 :
83 : /* Apply Boundary Conditions */
84 168 : static IpplTimings::TimerRef ParticleBCTimer = IpplTimings::getTimer("particleBC");
85 168 : IpplTimings::startTimer(ParticleBCTimer);
86 168 : this->applyBC(pc.R, rlayout_m.getDomain());
87 168 : IpplTimings::stopTimer(ParticleBCTimer);
88 :
89 : /* Update Timer for the rest of the function */
90 168 : static IpplTimings::TimerRef ParticleUpdateTimer = IpplTimings::getTimer("updateParticle");
91 168 : IpplTimings::startTimer(ParticleUpdateTimer);
92 :
93 168 : int nRanks = Comm->size();
94 168 : if (nRanks < 2) {
95 0 : return;
96 : }
97 :
98 : /* particle MPI exchange:
99 : * 1. figure out which particles need to go where -> locateParticles(...)
100 : * 2. fill send buffer and send particles
101 : * 3. delete invalidated particles
102 : * 4. receive particles
103 : */
104 :
105 : // 1. figure out which particles need to go where -> locateParticles(...) ============= //
106 :
107 168 : static IpplTimings::TimerRef locateTimer = IpplTimings::getTimer("locateParticles");
108 168 : IpplTimings::startTimer(locateTimer);
109 :
110 : /* Rank-local number of particles */
111 168 : size_type localnum = pc.getLocalNum();
112 :
113 : /* The indices correspond to the indices of the local particles,
114 : * the values correspond to the ranks to which the particles need to be sent
115 : */
116 168 : locate_type particleRanks("particles' MPI ranks", localnum);
117 :
118 : /* The indices are the indices of the particles,
119 : * the boolean values describe whether the particle has left the current rank
120 : * 0 --> particle valid (inside current rank)
121 : * 1 --> particle invalid (left rank)
122 : */
123 168 : bool_type invalidParticles("validity of particles", localnum);
124 :
125 : /* The indices are the MPI ranks,
126 : * the values are the number of particles are sent to that rank from myrank
127 : */
128 168 : locate_type rankSendCount_dview("rankSendCount Device", nRanks);
129 :
130 : /* The indices have no particluar meaning,
131 : * the values are the MPI ranks to which we need to send
132 : */
133 168 : locate_type destinationRanks_dview("destinationRanks Device", nRanks);
134 :
135 : /* nInvalid is the number of invalid particles
136 : * nDestinationRanks is the number of MPI ranks we need to send to
137 : */
138 168 : auto [nInvalid, nDestinationRanks] =
139 168 : locateParticles(pc,
140 : particleRanks, invalidParticles,
141 : rankSendCount_dview, destinationRanks_dview);
142 :
143 : /* Host space copy of rankSendCount_dview */
144 168 : auto rankSendCount_hview =
145 504 : Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rankSendCount_dview);
146 :
147 : /* Host Space copy of destinationRanks_dview */
148 168 : auto destinationRanks_hview =
149 336 : Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), destinationRanks_dview);
150 :
151 168 : IpplTimings::stopTimer(locateTimer);
152 :
153 : // 2. fill send buffer and send particles =============================================== //
154 :
155 : // 2.1 Remote Memory Access window for one-sided communication
156 :
157 168 : static IpplTimings::TimerRef preprocTimer = IpplTimings::getTimer("sendPreprocess");
158 168 : IpplTimings::startTimer(preprocTimer);
159 :
160 168 : std::fill(nRecvs_m.begin(), nRecvs_m.end(), 0);
161 :
162 168 : window_m.fence(0);
163 :
164 : // Prepare RMA window for the ranks we need to send to
165 468 : for(size_t ridx=0; ridx < nDestinationRanks; ridx++){
166 300 : int rank = destinationRanks_hview[ridx];
167 300 : if (rank == Comm->rank()){
168 : // we do not need to send to ourselves
169 168 : continue;
170 : }
171 264 : window_m.put<size_type>(rankSendCount_hview(rank), rank, Comm->rank());
172 : }
173 168 : window_m.fence(0);
174 :
175 168 : IpplTimings::stopTimer(preprocTimer);
176 :
177 : // 2.2 Particle Sends
178 :
179 168 : static IpplTimings::TimerRef sendTimer = IpplTimings::getTimer("particleSend");
180 168 : IpplTimings::startTimer(sendTimer);
181 :
182 168 : std::vector<MPI_Request> requests(0);
183 :
184 168 : int tag = Comm->next_tag(mpi::tag::P_SPATIAL_LAYOUT, mpi::tag::P_LAYOUT_CYCLE);
185 :
186 600 : for(size_t ridx=0; ridx < nDestinationRanks; ridx++){
187 300 : int rank = destinationRanks_hview[ridx];
188 300 : if(rank == Comm->rank()){
189 168 : continue;
190 : }
191 132 : hash_type hash("hash", rankSendCount_hview(rank));
192 132 : fillHash(rank, particleRanks, hash);
193 132 : pc.sendToRank(rank, tag, requests, hash);
194 : }
195 :
196 168 : IpplTimings::stopTimer(sendTimer);
197 :
198 :
199 : // 3. Internal destruction of invalid particles ======================================= //
200 :
201 168 : static IpplTimings::TimerRef destroyTimer = IpplTimings::getTimer("particleDestroy");
202 168 : IpplTimings::startTimer(destroyTimer);
203 :
204 168 : pc.internalDestroy(invalidParticles, nInvalid);
205 168 : Kokkos::fence();
206 :
207 168 : IpplTimings::stopTimer(destroyTimer);
208 :
209 : // 4. Receive Particles ================================================================ //
210 :
211 168 : static IpplTimings::TimerRef recvTimer = IpplTimings::getTimer("particleRecv");
212 168 : IpplTimings::startTimer(recvTimer);
213 :
214 504 : for (int rank = 0; rank < nRanks; ++rank) {
215 336 : if (nRecvs_m[rank] > 0) {
216 132 : pc.recvFromRank(rank, tag, nRecvs_m[rank]);
217 : }
218 : }
219 168 : IpplTimings::stopTimer(recvTimer);
220 :
221 :
222 168 : IpplTimings::startTimer(sendTimer);
223 :
224 168 : if (requests.size() > 0) {
225 132 : MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
226 : }
227 168 : IpplTimings::stopTimer(sendTimer);
228 :
229 168 : IpplTimings::stopTimer(ParticleUpdateTimer);
230 168 : }
231 :
232 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
233 : template <size_t... Idx>
234 : KOKKOS_INLINE_FUNCTION constexpr bool
235 756224 : ParticleSpatialLayout<T, Dim, Mesh, Properties...>::positionInRegion(
236 : const std::index_sequence<Idx...>&, const vector_type& pos, const region_type& region) {
237 756224 : return ((pos[Idx] > region[Idx].min()) && ...) && ((pos[Idx] <= region[Idx].max()) && ...);
238 : };
239 :
240 :
241 : /* Helper function that evaluates the total number of neighbors for the current rank in Dim dimensions.
242 : */
243 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
244 168 : detail::size_type ParticleSpatialLayout<T, Dim, Mesh, Properties...>::getNeighborSize(
245 : const neighbor_list& neighbors) const {
246 168 : size_type totalSize = 0;
247 :
248 30576 : for (const auto& componentNeighbors : neighbors) {
249 30408 : totalSize += componentNeighbors.size();
250 : }
251 :
252 168 : return totalSize;
253 : }
254 :
255 :
256 : /**
257 : * @brief This function determines to which rank particles need to be sent after the iteration step.
258 : * It starts by first scanning direct rank neighbors, and only does a global scan if there are still
259 : * unfound particles. It then calculates how many particles need to be sent to each rank and how many
260 : * ranks are sent to in total.
261 : *
262 : * @param pc Particle Container
263 : * @param ranks A vector the length of the number of particles on the current rank, where each value refers
264 : * to the new rank of the particle
265 : * @param invalid A vector marking the particles that need to be sent away, and thus locally deleted
266 : * @param nSends_dview Device view the length of number of ranks, where each value determines the number
267 : * of particles sent to that rank from the current rank
268 : * @param sends_dview Device view for the number of ranks that are sent to from current rank
269 : *
270 : * @return tuple with the number of particles sent away and the number of ranks sent to
271 : */
272 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
273 : template <typename ParticleContainer>
274 168 : std::pair<detail::size_type, detail::size_type> ParticleSpatialLayout<T, Dim, Mesh, Properties...>::locateParticles(
275 : const ParticleContainer& pc, locate_type& ranks, bool_type& invalid,
276 : locate_type& nSends_dview, locate_type& sends_dview) const {
277 :
278 168 : auto positions = pc.R.getView();
279 168 : region_view_type Regions = rlayout_m.getdLocalRegions();
280 :
281 : using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<2>, position_execution_space>;
282 :
283 168 : size_type myRank = Comm->rank();
284 :
285 : const auto is = std::make_index_sequence<Dim>{};
286 :
287 168 : const neighbor_list& neighbors = flayout_m.getNeighbors();
288 :
289 : /// outsideIds: Container of particle IDs that travelled outside of the neighborhood.
290 168 : locate_type outsideIds("Particles outside of neighborhood", size_type(pc.getLocalNum()));
291 :
292 : /// outsideCount: Tracks the number of particles that travelled outside of the neighborhood.
293 168 : size_type outsideCount = 0;
294 : /// invalidCount: Tracks the number of particles that need to be sent to other ranks.
295 168 : size_type invalidCount = 0;
296 :
297 : /// neighborSize: Size of a neighborhood in D dimentions.
298 168 : const size_type neighborSize = getNeighborSize(neighbors);
299 :
300 : /// neighbors_view: Kokkos view with the IDs of the neighboring MPI ranks.
301 168 : locate_type neighbors_view("Nearest neighbors IDs", neighborSize);
302 :
303 : /* red_val: Used to reduce both the number of invalid particles and the number of particles
304 : * outside of the neighborhood (Kokkos::parallel_scan doesn't allow multiple reduction values, so we
305 : * use the helper class increment_type). First element updates InvalidCount, second
306 : * one updates outsideCount.
307 : */
308 : increment_type red_val;
309 168 : red_val.init();
310 :
311 168 : auto neighbors_mirror =
312 336 : Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), neighbors_view);
313 :
314 168 : size_t k = 0;
315 :
316 30576 : for (const auto& componentNeighbors : neighbors) {
317 42128 : for (size_t j = 0; j < componentNeighbors.size(); ++j) {
318 11720 : neighbors_mirror(k) = componentNeighbors[j];
319 : //std::cout << "Neighbor: " << neighbors_mirror(k) << std::endl;
320 11720 : k++;
321 : }
322 : }
323 :
324 168 : Kokkos::deep_copy(neighbors_view, neighbors_mirror);
325 :
326 : /*! Begin Kokkos loop:
327 : * Step 1: search in current rank
328 : * Step 2: search in neighbors
329 : * Step 3: save information on whether the particle was located
330 : * Step 4: run additional loop on non-located particles
331 : */
332 168 : static IpplTimings::TimerRef neighborSearch = IpplTimings::getTimer("neighborSearch");
333 168 : IpplTimings::startTimer(neighborSearch);
334 :
335 168 : Kokkos::parallel_scan(
336 : "ParticleSpatialLayout::locateParticles()",
337 336 : Kokkos::RangePolicy<size_t>(0, ranks.extent(0)),
338 8784 : KOKKOS_LAMBDA(const size_type i, increment_type& val, const bool final) {
339 : /* Step 1
340 : * inCurr: True if the particle hasn't left the current MPI rank.
341 : * inNeighbor: True if the particle is found in a neighboring rank.
342 : * found: True either if inCurr = True or inNeighbor = True.
343 : * increment: Helper variable to update red_val.
344 : */
345 8448 : bool inCurr = false;
346 8448 : bool inNeighbor = false;
347 8448 : bool found = false;
348 : bool increment[2];
349 :
350 25344 : inCurr = positionInRegion(is, positions(i), Regions(myRank));
351 :
352 8448 : ranks(i) = inCurr * myRank;
353 8448 : invalid(i) = !inCurr;
354 8448 : found = inCurr || found;
355 :
356 : /// Step 2
357 756224 : for (size_t j = 0; j < neighbors_view.extent(0); ++j) {
358 747776 : size_type rank = neighbors_view(j);
359 :
360 2243328 : inNeighbor = positionInRegion(is, positions(i), Regions(rank));
361 :
362 1495552 : ranks(i) = !(inNeighbor) * ranks(i) + inNeighbor * rank;
363 747776 : found = inNeighbor || found;
364 : }
365 : /// Step 3
366 : /* isOut: When the last thread has finished the search, checks whether the particle has been found
367 : * either in the current rank or in a neighboring one.
368 : * Used to avoid race conditions when updating outsideIds.
369 : */
370 8448 : if(final && !found) {
371 0 : outsideIds(val.count[1]) = i;
372 : }
373 : //outsideIds(val.count[1]) = i * isOut;
374 8448 : increment[0] = invalid(i);
375 8448 : increment[1] = !found;
376 8448 : val += increment;
377 :
378 : },
379 : red_val);
380 :
381 168 : Kokkos::fence();
382 :
383 168 : invalidCount = red_val.count[0];
384 168 : outsideCount = red_val.count[1];
385 :
386 168 : IpplTimings::stopTimer(neighborSearch);
387 :
388 : /// Step 4
389 168 : static IpplTimings::TimerRef nonNeighboringParticles = IpplTimings::getTimer("nonNeighboringParticles");
390 168 : IpplTimings::startTimer(nonNeighboringParticles);
391 168 : if (outsideCount > 0) {
392 0 : Kokkos::parallel_for(
393 : "ParticleSpatialLayout::leftParticles()",
394 0 : mdrange_type({0, 0}, {outsideCount, Regions.extent(0)}),
395 0 : KOKKOS_LAMBDA(const size_t i, const size_type j) {
396 : /// pID: (local) ID of the particle that is currently being searched.
397 0 : size_type pId = outsideIds(i);
398 :
399 : /// inRegion: Checks whether particle pID is inside region j.
400 0 : bool inRegion = positionInRegion(is, positions(pId), Regions(j));
401 0 : if(inRegion){
402 0 : ranks(pId) = j;
403 : }
404 : });
405 0 : Kokkos::fence();
406 : }
407 168 : IpplTimings::stopTimer(nonNeighboringParticles);
408 :
409 168 : Kokkos::parallel_for("Calculate nSends",
410 336 : Kokkos::RangePolicy<size_t>(0, ranks.extent(0)),
411 17232 : KOKKOS_LAMBDA(const size_t i){
412 8448 : size_type rank = ranks(i);
413 16896 : Kokkos::atomic_fetch_add(&nSends_dview(rank),1);
414 : }
415 : );
416 :
417 : // Number of Ranks we need to send to
418 168 : Kokkos::View<size_type> rankSends("Number of Ranks we need to send to");
419 :
420 168 : Kokkos::parallel_for("Calculate sends",
421 336 : Kokkos::RangePolicy<size_t>(0, nSends_dview.extent(0)),
422 1008 : KOKKOS_LAMBDA(const size_t rank){
423 672 : if(nSends_dview(rank) != 0){
424 600 : size_type index = Kokkos::atomic_fetch_add(&rankSends(), 1);
425 600 : sends_dview(index) = rank;
426 : }
427 : }
428 : );
429 : size_type temp;
430 168 : Kokkos::deep_copy(temp, rankSends);
431 :
432 336 : return {invalidCount, temp};
433 168 : }
434 :
435 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
436 132 : void ParticleSpatialLayout<T, Dim, Mesh, Properties...>::fillHash(int rank,
437 : const locate_type& ranks,
438 : hash_type& hash) {
439 : /* Compute the prefix sum and fill the hash
440 : */
441 : using policy_type = Kokkos::RangePolicy<position_execution_space>;
442 132 : Kokkos::parallel_scan(
443 264 : "ParticleSpatialLayout::fillHash()", policy_type(0, ranks.extent(0)),
444 12784 : KOKKOS_LAMBDA(const size_t i, int& idx, const bool final) {
445 6260 : if (final) {
446 12520 : if (rank == ranks(i)) {
447 5764 : hash(idx) = i;
448 : }
449 : }
450 :
451 12520 : if (rank == ranks(i)) {
452 2882 : idx += 1;
453 : }
454 : });
455 132 : Kokkos::fence();
456 :
457 132 : }
458 :
459 : template <typename T, unsigned Dim, class Mesh, typename... Properties>
460 : size_t ParticleSpatialLayout<T, Dim, Mesh, Properties...>::numberOfSends(
461 : int rank, const locate_type& ranks) {
462 : size_t nSends = 0;
463 : using policy_type = Kokkos::RangePolicy<position_execution_space>;
464 : Kokkos::parallel_reduce(
465 : "ParticleSpatialLayout::numberOfSends()", policy_type(0, ranks.extent(0)),
466 : KOKKOS_LAMBDA(const size_t i, size_t& num) { num += size_t(rank == ranks(i)); },
467 : nSends);
468 : Kokkos::fence();
469 : return nSends;
470 : }
471 :
472 : } // namespace ippl
|