Line data Source code
1 : //
2 : // Class HaloCells
3 : // The guard / ghost cells of BareField.
4 : //
5 :
6 : #include <memory>
7 : #include <vector>
8 :
9 : #include "Utility/IpplException.h"
10 :
11 : #include "Communicate/Communicator.h"
12 :
13 : namespace ippl {
14 : namespace detail {
15 : template <typename T, unsigned Dim, class... ViewArgs>
16 1556 : HaloCells<T, Dim, ViewArgs...>::HaloCells() {}
17 :
18 : template <typename T, unsigned Dim, class... ViewArgs>
19 140 : void HaloCells<T, Dim, ViewArgs...>::accumulateHalo(view_type& view, Layout_t* layout) {
20 140 : exchangeBoundaries<lhs_plus_assign>(view, layout, HALO_TO_INTERNAL);
21 140 : }
22 :
23 : template <typename T, unsigned Dim, class... ViewArgs>
24 8 : void HaloCells<T, Dim, ViewArgs...>::accumulateHalo_noghost(view_type& view, Layout_t* layout, int nghost) {
25 8 : exchangeBoundaries<lhs_plus_assign>(view, layout, HALO_TO_INTERNAL_NOGHOST, nghost);
26 8 : }
27 : template <typename T, unsigned Dim, class... ViewArgs>
28 292 : void HaloCells<T, Dim, ViewArgs...>::fillHalo(view_type& view, Layout_t* layout) {
29 292 : exchangeBoundaries<assign>(view, layout, INTERNAL_TO_HALO);
30 292 : }
31 :
32 : template <typename T, unsigned Dim, class... ViewArgs>
33 : template <class Op>
34 440 : void HaloCells<T, Dim, ViewArgs...>::exchangeBoundaries(view_type& view, Layout_t* layout,
35 : SendOrder order, int nghost) {
36 : using neighbor_list = typename Layout_t::neighbor_list;
37 : using range_list = typename Layout_t::neighbor_range_list;
38 :
39 440 : auto& comm = layout->comm;
40 :
41 440 : const neighbor_list& neighbors = layout->getNeighbors();
42 440 : const range_list &sendRanges = layout->getNeighborsSendRange(),
43 440 : &recvRanges = layout->getNeighborsRecvRange();
44 :
45 440 : auto ldom = layout->getLocalNDIndex();
46 1864 : for (const auto& axis : ldom) {
47 1424 : if ((axis.length() == 1) && (Dim != 1)) {
48 0 : throw std::runtime_error(
49 : "HaloCells: Cannot do neighbour exchange when domain decomposition "
50 : "contains planes!");
51 : }
52 : }
53 :
54 : // needed for the NOGHOST approach - we want to remove the ghost
55 : // cells on the boundaries of the global domain from the halo
56 : // exchange when we set HALO_TO_INTERNAL_NOGHOST
57 440 : const auto domain = layout->getDomain();
58 440 : const auto& ldomains = layout->getHostLocalDomains();
59 :
60 440 : size_t totalRequests = 0;
61 66696 : for (const auto& componentNeighbors : neighbors) {
62 66256 : totalRequests += componentNeighbors.size();
63 : }
64 :
65 440 : int me=Comm->rank();
66 :
67 : using memory_space = typename view_type::memory_space;
68 : using buffer_type = mpi::Communicator::buffer_type<memory_space>;
69 440 : std::vector<MPI_Request> requests(totalRequests);
70 : // sending loop
71 440 : constexpr size_t cubeCount = detail::countHypercubes(Dim) - 1;
72 440 : size_t requestIndex = 0;
73 66696 : for (size_t index = 0; index < cubeCount; index++) {
74 66256 : int tag = mpi::tag::HALO + index;
75 66256 : const auto& componentNeighbors = neighbors[index];
76 75360 : for (size_t i = 0; i < componentNeighbors.size(); i++) {
77 9104 : int targetRank = componentNeighbors[i];
78 :
79 : bound_type range;
80 9104 : if (order == INTERNAL_TO_HALO) {
81 : /*We store only the sending and receiving ranges
82 : * of INTERNAL_TO_HALO and use the fact that the
83 : * sending range of HALO_TO_INTERNAL is the receiving
84 : * range of INTERNAL_TO_HALO and vice versa
85 : */
86 292 : range = sendRanges[index][i];
87 8812 : } else if (order == HALO_TO_INTERNAL_NOGHOST) {
88 8 : range = recvRanges[index][i];
89 :
90 24 : for (size_t j = 0; j < Dim; ++j) {
91 32 : bool isLower = ((range.lo[j] + ldomains[me][j].first()
92 16 : - nghost) == domain[j].min());
93 16 : bool isUpper = ((range.hi[j] - 1 +
94 16 : ldomains[me][j].first() - nghost)
95 16 : == domain[j].max());
96 16 : range.lo[j] += isLower * (nghost);
97 16 : range.hi[j] -= isUpper * (nghost);
98 : }
99 : } else {
100 8804 : range = recvRanges[index][i];
101 : }
102 :
103 : size_type nsends;
104 9104 : pack(range, view, haloData_m, nsends);
105 :
106 9104 : buffer_type buf = comm.template getBuffer<memory_space, T>(nsends);
107 :
108 9104 : comm.isend(targetRank, tag, haloData_m, *buf, requests[requestIndex++], nsends);
109 9104 : buf->resetWritePos();
110 : }
111 : }
112 :
113 : // receiving loop
114 66696 : for (size_t index = 0; index < cubeCount; index++) {
115 66256 : int tag = mpi::tag::HALO + Layout_t::getMatchingIndex(index);
116 66256 : const auto& componentNeighbors = neighbors[index];
117 75360 : for (size_t i = 0; i < componentNeighbors.size(); i++) {
118 9104 : int sourceRank = componentNeighbors[i];
119 :
120 : bound_type range;
121 9104 : if (order == INTERNAL_TO_HALO) {
122 292 : range = recvRanges[index][i];
123 8812 : } else if (order == HALO_TO_INTERNAL_NOGHOST) {
124 8 : range = sendRanges[index][i];
125 :
126 24 : for (size_t j = 0; j < Dim; ++j) {
127 32 : bool isLower = ((range.lo[j] + ldomains[me][j].first()
128 16 : - nghost) == domain[j].min());
129 16 : bool isUpper = ((range.hi[j] - 1 +
130 16 : ldomains[me][j].first() - nghost)
131 16 : == domain[j].max());
132 16 : range.lo[j] += isLower * (nghost);
133 16 : range.hi[j] -= isUpper * (nghost);
134 : }
135 : } else {
136 8804 : range = sendRanges[index][i];
137 : }
138 :
139 9104 : size_type nrecvs = range.size();
140 :
141 9104 : buffer_type buf = comm.template getBuffer<memory_space, T>(nrecvs);
142 :
143 9104 : comm.recv(sourceRank, tag, haloData_m, *buf, nrecvs * sizeof(T), nrecvs);
144 9104 : buf->resetReadPos();
145 :
146 9104 : unpack<Op>(range, view, haloData_m);
147 : }
148 : }
149 :
150 440 : if (totalRequests > 0) {
151 440 : MPI_Waitall(totalRequests, requests.data(), MPI_STATUSES_IGNORE);
152 : }
153 :
154 440 : comm.freeAllBuffers();
155 440 : }
156 :
157 : template <typename T, unsigned Dim, class... ViewArgs>
158 9200 : void HaloCells<T, Dim, ViewArgs...>::pack(const bound_type& range, const view_type& view,
159 : databuffer_type& fd, size_type& nsends) {
160 9200 : auto subview = makeSubview(view, range);
161 :
162 9200 : auto& buffer = fd.buffer;
163 :
164 9200 : size_t size = subview.size();
165 9200 : nsends = size;
166 9200 : if (buffer.size() < size) {
167 620 : int overalloc = Comm->getDefaultOverallocation();
168 620 : Kokkos::realloc(buffer, size * overalloc);
169 : }
170 :
171 : using index_array_type =
172 : typename RangePolicy<Dim, typename view_type::execution_space>::index_array_type;
173 9200 : ippl::parallel_for(
174 9200 : "HaloCells::pack()", getRangePolicy(subview),
175 1596584 : KOKKOS_LAMBDA(const index_array_type& args) {
176 789092 : int l = 0;
177 :
178 5189108 : for (unsigned d1 = 0; d1 < Dim; d1++) {
179 4400016 : int next = args[d1];
180 14673980 : for (unsigned d2 = 0; d2 < d1; d2++) {
181 0 : next *= subview.extent(d2);
182 : }
183 4400016 : l += next;
184 : }
185 :
186 789092 : buffer(l) = apply(subview, args);
187 : });
188 9200 : Kokkos::fence();
189 9200 : }
190 :
191 : template <typename T, unsigned Dim, class... ViewArgs>
192 : template <typename Op>
193 9200 : void HaloCells<T, Dim, ViewArgs...>::unpack(const bound_type& range, const view_type& view,
194 : databuffer_type& fd) {
195 9200 : auto subview = makeSubview(view, range);
196 9200 : auto buffer = fd.buffer;
197 :
198 : // 29. November 2020
199 : // https://stackoverflow.com/questions/3735398/operator-as-template-parameter
200 : Op op;
201 :
202 : using index_array_type =
203 : typename RangePolicy<Dim, typename view_type::execution_space>::index_array_type;
204 9200 : ippl::parallel_for(
205 9200 : "HaloCells::unpack()", getRangePolicy(subview),
206 1596584 : KOKKOS_LAMBDA(const index_array_type& args) {
207 789092 : int l = 0;
208 :
209 5189108 : for (unsigned d1 = 0; d1 < Dim; d1++) {
210 4400016 : int next = args[d1];
211 14673980 : for (unsigned d2 = 0; d2 < d1; d2++) {
212 0 : next *= subview.extent(d2);
213 : }
214 4400016 : l += next;
215 : }
216 :
217 1578184 : op(apply(subview, args), buffer(l));
218 : });
219 9200 : Kokkos::fence();
220 9200 : }
221 :
222 : template <typename T, unsigned Dim, class... ViewArgs>
223 18400 : auto HaloCells<T, Dim, ViewArgs...>::makeSubview(const view_type& view,
224 : const bound_type& intersect) {
225 36800 : auto makeSub = [&]<size_t... Idx>(const std::index_sequence<Idx...>&) {
226 : return Kokkos::subview(view,
227 117656 : Kokkos::make_pair(intersect.lo[Idx], intersect.hi[Idx])...);
228 : };
229 18400 : return makeSub(std::make_index_sequence<Dim>{});
230 : }
231 :
232 : template <typename T, unsigned Dim, class... ViewArgs>
233 : template <typename Op>
234 72 : void HaloCells<T, Dim, ViewArgs...>::applyPeriodicSerialDim(view_type& view,
235 : const Layout_t* layout,
236 : const int nghost) {
237 72 : int myRank = layout->comm.rank();
238 72 : const auto& lDomains = layout->getHostLocalDomains();
239 72 : const auto& domain = layout->getDomain();
240 :
241 : using exec_space = typename view_type::execution_space;
242 : using index_type = typename RangePolicy<Dim, exec_space>::index_type;
243 :
244 : Kokkos::Array<index_type, Dim> ext, begin, end;
245 :
246 324 : for (size_t i = 0; i < Dim; ++i) {
247 252 : ext[i] = view.extent(i);
248 252 : begin[i] = 0;
249 : }
250 :
251 : Op op;
252 :
253 324 : for (unsigned d = 0; d < Dim; ++d) {
254 252 : end = ext;
255 252 : end[d] = nghost;
256 :
257 252 : if (lDomains[myRank][d].length() == domain[d].length()) {
258 180 : int N = view.extent(d) - 1;
259 :
260 : using index_array_type =
261 : typename RangePolicy<Dim,
262 : typename view_type::execution_space>::index_array_type;
263 180 : ippl::parallel_for(
264 180 : "applyPeriodicSerialDim", createRangePolicy<Dim, exec_space>(begin, end),
265 7368984 : KOKKOS_LAMBDA(index_array_type & coords) {
266 : // The ghosts are filled starting from the inside
267 : // of the domain proceeding outwards for both lower
268 : // and upper faces. The extra brackets and explicit
269 : // mention
270 :
271 : // nghost + i
272 0 : coords[d] += nghost;
273 0 : auto&& left = apply(view, coords);
274 :
275 : // N - nghost - i
276 0 : coords[d] = N - coords[d];
277 0 : auto&& right = apply(view, coords);
278 :
279 : // nghost - 1 - i
280 0 : coords[d] += 2 * nghost - 1 - N;
281 0 : op(apply(view, coords), right);
282 :
283 : // N - (nghost - 1 - i) = N - (nghost - 1) + i
284 0 : coords[d] = N - coords[d];
285 0 : op(apply(view, coords), left);
286 : });
287 :
288 360 : Kokkos::fence();
289 : }
290 : }
291 72 : }
292 : } // namespace detail
293 : } // namespace ippl
|