Line data Source code
1 : #include "Utility/IpplTimings.h"
2 :
3 : namespace ippl {
4 :
5 : template <class Field, class Tp>
6 24 : void OrthogonalRecursiveBisection<Field, Tp>::initialize(FieldLayout<Dim>& fl, mesh_type& mesh,
7 : const Field& rho) {
8 24 : bf_m.initialize(mesh, fl);
9 24 : bf_m = rho;
10 24 : }
11 :
12 : template <class Field, class Tp>
13 : template <typename Attrib>
14 24 : bool OrthogonalRecursiveBisection<Field, Tp>::binaryRepartition(
15 : const Attrib& R, FieldLayout<Dim>& fl, const bool& isFirstRepartition) {
16 : // Timings
17 24 : static IpplTimings::TimerRef tbasicOp = IpplTimings::getTimer("basicOperations");
18 24 : static IpplTimings::TimerRef tperpReduction = IpplTimings::getTimer("perpReduction");
19 24 : static IpplTimings::TimerRef tallReduce = IpplTimings::getTimer("allReduce");
20 24 : static IpplTimings::TimerRef tscatter = IpplTimings::getTimer("scatterR");
21 :
22 : // Scattering of particle positions in field
23 : // In case of first repartition we know the density from the
24 : // analytical expression and we use that for load balancing
25 : // and create particles. Note the particles are created only
26 : // after the first repartition and hence we cannot call scatter
27 : // before it.
28 24 : IpplTimings::startTimer(tscatter);
29 24 : if (!isFirstRepartition) {
30 24 : scatterR(R);
31 : }
32 :
33 24 : IpplTimings::stopTimer(tscatter);
34 :
35 24 : IpplTimings::startTimer(tbasicOp);
36 :
37 : // Get number of ranks
38 24 : auto& comm = fl.comm;
39 24 : int nprocs = comm.size();
40 :
41 : // Start with whole domain and total number of nodes
42 48 : std::vector<NDIndex<Dim>> domains = {fl.getDomain()};
43 48 : std::vector<int> procs = {nprocs};
44 :
45 : // Arrays for reduction
46 24 : std::vector<Tf> reduced, reducedRank;
47 :
48 : // Start recursive repartition loop
49 24 : unsigned int it = 0;
50 24 : int maxprocs = nprocs;
51 24 : IpplTimings::stopTimer(tbasicOp);
52 :
53 24 : while (maxprocs > 1) {
54 : // Find cut axis
55 0 : IpplTimings::startTimer(tbasicOp);
56 0 : int cutAxis = findCutAxis(domains[it]);
57 0 : IpplTimings::stopTimer(tbasicOp);
58 :
59 : // Reserve space
60 0 : IpplTimings::startTimer(tperpReduction);
61 0 : reduced.resize(domains[it][cutAxis].length());
62 0 : reducedRank.resize(domains[it][cutAxis].length());
63 :
64 0 : std::fill(reducedRank.begin(), reducedRank.end(), 0.0);
65 0 : std::fill(reduced.begin(), reduced.end(), 0.0);
66 :
67 : // Peform reduction with field of weights and communicate to the other ranks
68 0 : perpendicularReduction(reducedRank, cutAxis, domains[it]);
69 0 : IpplTimings::stopTimer(tperpReduction);
70 :
71 : // Communicate to all the reduced weights
72 0 : IpplTimings::startTimer(tallReduce);
73 0 : comm.allreduce(reducedRank.data(), reduced.data(), reducedRank.size(), std::plus<Tf>());
74 0 : IpplTimings::stopTimer(tallReduce);
75 :
76 : // Find median of reduced weights
77 0 : IpplTimings::startTimer(tbasicOp);
78 : // Initialize median to some value (1 is lower bound value)
79 0 : int median = 1;
80 0 : median = findMedian(reduced);
81 0 : IpplTimings::stopTimer(tbasicOp);
82 :
83 : // Cut domains and procs
84 0 : IpplTimings::startTimer(tbasicOp);
85 0 : cutDomain(domains, procs, it, cutAxis, median);
86 :
87 : // Update max procs
88 0 : maxprocs = 0;
89 0 : for (unsigned int i = 0; i < procs.size(); i++) {
90 0 : if (procs[i] > maxprocs) {
91 0 : maxprocs = procs[i];
92 0 : it = i;
93 : }
94 : }
95 0 : IpplTimings::stopTimer(tbasicOp);
96 :
97 : // Clear all arrays
98 0 : IpplTimings::startTimer(tperpReduction);
99 0 : reduced.clear();
100 0 : reducedRank.clear();
101 0 : IpplTimings::stopTimer(tperpReduction);
102 : }
103 :
104 : // Check that no plane was obtained in the repartition
105 24 : IpplTimings::startTimer(tbasicOp);
106 48 : for (const auto& domain : domains) {
107 108 : for (const auto& axis : domain) {
108 84 : if (axis.length() == 1) {
109 0 : return false;
110 : }
111 : }
112 : }
113 :
114 : // Update FieldLayout with new indices
115 24 : fl.updateLayout(domains);
116 :
117 : // Update local field with new layout
118 24 : bf_m.updateLayout(fl);
119 24 : IpplTimings::stopTimer(tbasicOp);
120 :
121 24 : return true;
122 24 : }
123 :
124 : template <class Field, class Tp>
125 0 : int OrthogonalRecursiveBisection<Field, Tp>::findCutAxis(NDIndex<Dim>& dom) {
126 : // Find longest domain size
127 0 : return std::distance(dom.begin(), std::max_element(dom.begin(), dom.end(),
128 0 : [&](const Index& a, const Index& b) {
129 0 : return a.length() < b.length();
130 0 : }));
131 : }
132 :
133 : template <class Field, class Tp>
134 0 : void OrthogonalRecursiveBisection<Field, Tp>::perpendicularReduction(
135 : std::vector<Tf>& rankWeights, unsigned int cutAxis, NDIndex<Dim>& dom) {
136 : // Check if domains overlap, if not no need for reduction
137 0 : NDIndex<Dim> lDom = bf_m.getOwned();
138 0 : if (lDom[cutAxis].first() > dom[cutAxis].last()
139 0 : || lDom[cutAxis].last() < dom[cutAxis].first()) {
140 0 : return;
141 : }
142 :
143 : // Get field's local weights
144 0 : int nghost = bf_m.getNghost();
145 0 : const auto data = bf_m.getView();
146 :
147 : // Determine the iteration bounds of the reduction
148 0 : int cutAxisFirst =
149 0 : std::max(lDom[cutAxis].first(), dom[cutAxis].first()) - lDom[cutAxis].first() + nghost;
150 0 : int cutAxisLast =
151 0 : std::min(lDom[cutAxis].last(), dom[cutAxis].last()) - lDom[cutAxis].first() + nghost;
152 :
153 : // Set iterator for where to write in the reduced array
154 0 : unsigned int arrayStart = 0;
155 0 : if (dom[cutAxis].first() < lDom[cutAxis].first()) {
156 0 : arrayStart = lDom[cutAxis].first() - dom[cutAxis].first();
157 : }
158 :
159 : // Find all the perpendicular axes
160 : using exec_space = typename Field::execution_space;
161 : using index_type = typename RangePolicy<Dim, exec_space>::index_type;
162 : Kokkos::Array<index_type, Dim> begin, end;
163 0 : for (unsigned d = 0; d < Dim; d++) {
164 0 : if (d == cutAxis) {
165 0 : continue;
166 : }
167 :
168 0 : int inf = std::max(lDom[d].first(), dom[d].first()) - lDom[d].first() + nghost;
169 0 : int sup = std::min(lDom[d].last(), dom[d].last()) - lDom[d].first() + nghost;
170 : // inf and sup bounds must be within the domain to reduce, if not no need to reduce
171 0 : if (sup < inf) {
172 0 : return;
173 : }
174 :
175 0 : begin[d] = inf;
176 : // The +1 is for Kokkos loop
177 0 : end[d] = sup + 1;
178 : }
179 :
180 : // Iterate along cutAxis
181 0 : for (int i = cutAxisFirst; i <= cutAxisLast; i++) {
182 0 : begin[cutAxis] = i;
183 0 : end[cutAxis] = i + 1;
184 :
185 : // Reducing over perpendicular plane defined by cutAxis
186 0 : Tf tempRes = Tf(0);
187 :
188 : using index_array_type = typename RangePolicy<Dim, exec_space>::index_array_type;
189 0 : ippl::parallel_reduce(
190 0 : "ORB weight reduction", createRangePolicy<Dim, exec_space>(begin, end),
191 0 : KOKKOS_LAMBDA(const index_array_type& args, Tf& weight) {
192 0 : weight += apply(data, args);
193 : },
194 0 : Kokkos::Sum<Tf>(tempRes));
195 :
196 0 : Kokkos::fence();
197 :
198 0 : rankWeights[arrayStart++] = tempRes;
199 : }
200 0 : }
201 :
202 : template <class Field, class Tp>
203 0 : int OrthogonalRecursiveBisection<Field, Tp>::findMedian(std::vector<Tf>& w) {
204 : // Special case when array must be cut in half in order to not have planes
205 0 : if (w.size() == 4) {
206 0 : return 1;
207 : }
208 : // Get total sum of array
209 0 : Tf tot = std::accumulate(w.begin(), w.end(), Tf(0));
210 :
211 : // Find position of median as half of total in array
212 0 : Tf half = 0.5 * tot;
213 0 : Tf curr = Tf(0);
214 : // Do not need to iterate to full extent since it must not give planes
215 0 : for (unsigned int i = 0; i < w.size() - 1; i++) {
216 0 : curr += w[i];
217 0 : if (curr >= half) {
218 : // If all particles are in the first plane, cut at 1 so to have size 2
219 0 : if (i == 0) {
220 0 : return 1;
221 : }
222 0 : Tf previous = curr - w[i];
223 : // curr - half < half - previous
224 0 : if ((curr + previous) <= tot
225 0 : && curr != half) { // if true then take current i, otherwise i-1
226 0 : if (i == w.size() - 2) {
227 0 : return (i - 1);
228 : } else {
229 0 : return i;
230 : }
231 : } else {
232 0 : return (i > 1) ? (i - 1) : 1;
233 : }
234 : }
235 : }
236 : // If all particles are in the last plane, cut two indices before the end so to have size 2
237 0 : return w.size() - 3;
238 : }
239 :
240 : template <class Field, class Tp>
241 0 : void OrthogonalRecursiveBisection<Field, Tp>::cutDomain(std::vector<NDIndex<Dim>>& domains,
242 : std::vector<int>& procs, int it,
243 : int cutAxis, int median) {
244 : // Cut domains[it] in half at median along cutAxis
245 0 : NDIndex<Dim> leftDom, rightDom;
246 0 : domains[it].split(leftDom, rightDom, cutAxis, median + domains[it][cutAxis].first());
247 0 : domains[it] = leftDom;
248 0 : domains.insert(domains.begin() + it + 1, rightDom);
249 :
250 : // Cut procs in half
251 0 : int temp = procs[it];
252 0 : procs[it] = procs[it] / 2;
253 0 : procs.insert(procs.begin() + it + 1, temp - procs[it]);
254 0 : }
255 :
256 : template <class Field, class Tp>
257 : template <typename Attrib>
258 24 : void OrthogonalRecursiveBisection<Field, Tp>::scatterR(const Attrib& r) {
259 : using vector_type = typename mesh_type::vector_type;
260 : static_assert(
261 : Kokkos::SpaceAccessibility<typename Attrib::memory_space,
262 : typename Field::memory_space>::accessible,
263 : "Particle attribute memory space must be accessible from ORB field memory space");
264 :
265 : // Reset local field
266 24 : bf_m = 0.0;
267 : // Get local data
268 24 : auto view = bf_m.getView();
269 24 : const mesh_type& mesh = bf_m.get_mesh();
270 24 : const FieldLayout<Dim>& layout = bf_m.getLayout();
271 24 : const NDIndex<Dim>& lDom = layout.getLocalNDIndex();
272 24 : const int nghost = bf_m.getNghost();
273 :
274 : // Get spacings
275 24 : const vector_type& dx = mesh.getMeshSpacing();
276 24 : const vector_type& origin = mesh.getOrigin();
277 24 : const vector_type invdx = 1.0 / dx;
278 :
279 : using policy_type = Kokkos::RangePolicy<size_t, typename Field::execution_space>;
280 :
281 24 : Kokkos::parallel_for(
282 48 : "ParticleAttrib::scatterR", policy_type(0, r.getParticleCount()),
283 3120 : KOKKOS_LAMBDA(const size_t idx) {
284 : // Find nearest grid point
285 3072 : Vector<Tp, Dim> l = (r(idx) - origin) * invdx + 0.5;
286 3072 : Vector<int, Dim> index = l;
287 3072 : Vector<Tf, Dim> whi = l - index;
288 3072 : Vector<Tf, Dim> wlo = 1.0 - whi;
289 :
290 3072 : Vector<size_t, Dim> args = index - lDom.first() + nghost;
291 :
292 : // Scatter
293 3072 : scatterToField(std::make_index_sequence<1 << Dim>{}, view, wlo, whi, args);
294 3072 : });
295 :
296 24 : bf_m.accumulateHalo();
297 24 : }
298 :
299 : } // namespace ippl
|