Line data Source code
1 : //
2 : // Class FFTOpenPoissonSolver
3 : // FFT-based Poisson Solver for open boundaries.
4 : // Solves laplace(phi) = -rho, and E = -grad(phi).
5 : //
6 : //
7 :
8 : // Communication specific functions (pack and unpack).
9 : template <typename Tb, typename Tf>
10 0 : void pack(const ippl::NDIndex<3> intersect, Kokkos::View<Tf***>& view,
11 : ippl::detail::FieldBufferData<Tb>& fd, int nghost, const ippl::NDIndex<3> ldom,
12 : ippl::mpi::Communicator::size_type& nsends) {
13 0 : Kokkos::View<Tb*>& buffer = fd.buffer;
14 :
15 0 : size_t size = intersect.size();
16 0 : nsends = size;
17 0 : if (buffer.size() < size) {
18 0 : const int overalloc = ippl::Comm->getDefaultOverallocation();
19 0 : Kokkos::realloc(buffer, size * overalloc);
20 : }
21 :
22 0 : const int first0 = intersect[0].first() + nghost - ldom[0].first();
23 0 : const int first1 = intersect[1].first() + nghost - ldom[1].first();
24 0 : const int first2 = intersect[2].first() + nghost - ldom[2].first();
25 :
26 0 : const int last0 = intersect[0].last() + nghost - ldom[0].first() + 1;
27 0 : const int last1 = intersect[1].last() + nghost - ldom[1].first() + 1;
28 0 : const int last2 = intersect[2].last() + nghost - ldom[2].first() + 1;
29 :
30 : using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<3>>;
31 : // This type casting to long int is necessary as otherwise Kokkos complains for
32 : // intel compilers
33 0 : Kokkos::parallel_for(
34 : "pack()",
35 0 : mdrange_type({first0, first1, first2}, {(long int)last0, (long int)last1, (long int)last2}),
36 0 : KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) {
37 0 : const int ig = i - first0;
38 0 : const int jg = j - first1;
39 0 : const int kg = k - first2;
40 :
41 0 : int l = ig + jg * intersect[0].length()
42 0 : + kg * intersect[1].length() * intersect[0].length();
43 :
44 0 : Kokkos::complex<Tb> val = view(i, j, k);
45 :
46 0 : buffer(l) = Kokkos::real(val);
47 : });
48 0 : Kokkos::fence();
49 0 : }
50 :
51 : template <int tensorRank, typename Tb, typename Tf>
52 0 : void unpack_impl(const ippl::NDIndex<3> intersect, const Kokkos::View<Tf***>& view,
53 : ippl::detail::FieldBufferData<Tb>& fd, int nghost, const ippl::NDIndex<3> ldom,
54 : size_t dim1 = 0, size_t dim2 = 0, bool x = false, bool y = false, bool z = false) {
55 0 : Kokkos::View<Tb*>& buffer = fd.buffer;
56 :
57 0 : const int first0 = intersect[0].first() + nghost - ldom[0].first();
58 0 : const int first1 = intersect[1].first() + nghost - ldom[1].first();
59 0 : const int first2 = intersect[2].first() + nghost - ldom[2].first();
60 :
61 0 : const int last0 = intersect[0].last() + nghost - ldom[0].first() + 1;
62 0 : const int last1 = intersect[1].last() + nghost - ldom[1].first() + 1;
63 0 : const int last2 = intersect[2].last() + nghost - ldom[2].first() + 1;
64 :
65 : using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<3>>;
66 0 : Kokkos::parallel_for(
67 0 : "pack()", mdrange_type({first0, first1, first2}, {last0, last1, last2}),
68 0 : KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) {
69 0 : int ig = i - first0;
70 0 : int jg = j - first1;
71 0 : int kg = k - first2;
72 :
73 0 : ig = x * (intersect[0].length() - 2 * ig - 1) + ig;
74 0 : jg = y * (intersect[1].length() - 2 * jg - 1) + jg;
75 0 : kg = z * (intersect[2].length() - 2 * kg - 1) + kg;
76 :
77 0 : int l = ig + jg * intersect[0].length()
78 0 : + kg * intersect[1].length() * intersect[0].length();
79 :
80 0 : ippl::detail::ViewAccess<tensorRank, decltype(view)>::get(view, dim1, dim2, i, j, k) =
81 0 : buffer(l);
82 : });
83 0 : Kokkos::fence();
84 0 : }
85 :
86 : template <typename Tb, typename Tf>
87 0 : void unpack(const ippl::NDIndex<3> intersect, const Kokkos::View<Tf***>& view,
88 : ippl::detail::FieldBufferData<Tb>& fd, int nghost, const ippl::NDIndex<3> ldom,
89 : bool x = false, bool y = false, bool z = false) {
90 0 : unpack_impl<0, Tb, Tf>(intersect, view, fd, nghost, ldom, 0, 0, x, y, z);
91 0 : }
92 :
93 : template <typename Tb, typename Tf>
94 0 : void unpack(const ippl::NDIndex<3> intersect, const Kokkos::View<ippl::Vector<Tf, 3>***>& view,
95 : size_t dim1, ippl::detail::FieldBufferData<Tb>& fd, int nghost,
96 : const ippl::NDIndex<3> ldom) {
97 0 : unpack_impl<1, Tb, ippl::Vector<Tf, 3>>(intersect, view, fd, nghost, ldom, dim1);
98 0 : }
99 :
100 : template <typename Tb, typename Tf>
101 0 : void unpack(const ippl::NDIndex<3> intersect,
102 : const Kokkos::View<ippl::Vector<ippl::Vector<Tf, 3>, 3>***>& view,
103 : ippl::detail::FieldBufferData<Tb>& fd, int nghost, const ippl::NDIndex<3> ldom,
104 : size_t dim1, size_t dim2) {
105 0 : unpack_impl<2, Tb, ippl::Vector<ippl::Vector<Tf, 3>, 3>>(intersect, view, fd, nghost, ldom,
106 : dim1, dim2);
107 0 : }
108 :
109 : template <typename Tb, typename Tf, unsigned Dim>
110 0 : void solver_send(int TAG, int id, int i, const ippl::NDIndex<Dim> intersection,
111 : const ippl::NDIndex<Dim> ldom, int nghost, Kokkos::View<Tf***>& view,
112 : ippl::detail::FieldBufferData<Tb>& fd, std::vector<MPI_Request>& requests) {
113 : using memory_space = typename Kokkos::View<Tf***>::memory_space;
114 :
115 0 : requests.resize(requests.size() + 1);
116 :
117 : ippl::mpi::Communicator::size_type nsends;
118 0 : pack(intersection, view, fd, nghost, ldom, nsends);
119 :
120 : // Buffer message indicates the domain intersection (x, y, z, xy, yz, xz, xyz).
121 0 : ippl::mpi::Communicator::buffer_type<memory_space> buf =
122 : ippl::Comm->getBuffer<memory_space, Tf>(nsends);
123 :
124 0 : int tag = TAG + id;
125 :
126 0 : ippl::Comm->isend(i, tag, fd, *buf, requests.back(), nsends);
127 0 : buf->resetWritePos();
128 0 : }
129 :
130 : template <typename Tb, typename Tf, unsigned Dim>
131 0 : void solver_recv(int TAG, int id, int i, const ippl::NDIndex<Dim> intersection,
132 : const ippl::NDIndex<Dim> ldom, int nghost, Kokkos::View<Tf***>& view,
133 : ippl::detail::FieldBufferData<Tb>& fd, bool x = false, bool y = false,
134 : bool z = false) {
135 : using memory_space = typename Kokkos::View<Tf***>::memory_space;
136 :
137 : ippl::mpi::Communicator::size_type nrecvs;
138 0 : nrecvs = intersection.size();
139 :
140 : // Buffer message indicates the domain intersection (x, y, z, xy, yz, xz, xyz).
141 0 : ippl::mpi::Communicator::buffer_type<memory_space> buf =
142 : ippl::Comm->getBuffer<memory_space, Tf>(nrecvs);
143 :
144 0 : int tag = TAG + id;
145 :
146 0 : ippl::Comm->recv(i, tag, fd, *buf, nrecvs * sizeof(Tf), nrecvs);
147 0 : buf->resetReadPos();
148 :
149 0 : unpack(intersection, view, fd, nghost, ldom, x, y, z);
150 0 : }
151 :
152 : namespace ippl {
153 :
154 : /////////////////////////////////////////////////////////////////////////
155 : // constructor and destructor
156 : template <typename FieldLHS, typename FieldRHS>
157 : FFTOpenPoissonSolver<FieldLHS, FieldRHS>::FFTOpenPoissonSolver()
158 : : Base()
159 : , mesh_mp(nullptr)
160 : , layout_mp(nullptr)
161 : , mesh2_m(nullptr)
162 : , layout2_m(nullptr)
163 : , meshComplex_m(nullptr)
164 : , layoutComplex_m(nullptr)
165 : , mesh4_m(nullptr)
166 : , layout4_m(nullptr)
167 : , mesh2n1_m(nullptr)
168 : , layout2n1_m(nullptr)
169 : , isGradFD_m(false) {
170 : setDefaultParameters();
171 : }
172 :
173 : template <typename FieldLHS, typename FieldRHS>
174 0 : FFTOpenPoissonSolver<FieldLHS, FieldRHS>::FFTOpenPoissonSolver(rhs_type& rhs,
175 : ParameterList& params)
176 0 : : mesh_mp(nullptr)
177 0 : , layout_mp(nullptr)
178 0 : , mesh2_m(nullptr)
179 0 : , layout2_m(nullptr)
180 0 : , meshComplex_m(nullptr)
181 0 : , layoutComplex_m(nullptr)
182 0 : , mesh4_m(nullptr)
183 0 : , layout4_m(nullptr)
184 0 : , mesh2n1_m(nullptr)
185 0 : , layout2n1_m(nullptr)
186 0 : , isGradFD_m(false) {
187 : using T = typename FieldLHS::value_type::value_type;
188 : static_assert(std::is_floating_point<T>::value, "Not a floating point type");
189 :
190 0 : setDefaultParameters();
191 0 : this->params_m.merge(params);
192 0 : this->params_m.update("output_type", Base::SOL);
193 :
194 0 : this->setRhs(rhs);
195 0 : }
196 :
197 : template <typename FieldLHS, typename FieldRHS>
198 0 : FFTOpenPoissonSolver<FieldLHS, FieldRHS>::FFTOpenPoissonSolver(lhs_type& lhs, rhs_type& rhs,
199 : ParameterList& params)
200 0 : : mesh_mp(nullptr)
201 0 : , layout_mp(nullptr)
202 0 : , mesh2_m(nullptr)
203 0 : , layout2_m(nullptr)
204 0 : , meshComplex_m(nullptr)
205 0 : , layoutComplex_m(nullptr)
206 0 : , mesh4_m(nullptr)
207 0 : , layout4_m(nullptr)
208 0 : , mesh2n1_m(nullptr)
209 0 : , layout2n1_m(nullptr)
210 0 : , isGradFD_m(false) {
211 : using T = typename FieldLHS::value_type::value_type;
212 : static_assert(std::is_floating_point<T>::value, "Not a floating point type");
213 :
214 0 : setDefaultParameters();
215 0 : this->params_m.merge(params);
216 :
217 0 : this->setLhs(lhs);
218 0 : this->setRhs(rhs);
219 0 : }
220 :
221 : /////////////////////////////////////////////////////////////////////////
222 : // override setRhs to call class-specific initialization
223 : template <typename FieldLHS, typename FieldRHS>
224 0 : void FFTOpenPoissonSolver<FieldLHS, FieldRHS>::setRhs(rhs_type& rhs) {
225 0 : Base::setRhs(rhs);
226 :
227 : // start a timer
228 0 : static IpplTimings::TimerRef initialize = IpplTimings::getTimer("Initialize");
229 0 : IpplTimings::startTimer(initialize);
230 :
231 0 : initializeFields();
232 :
233 0 : IpplTimings::stopTimer(initialize);
234 0 : }
235 :
236 : /////////////////////////////////////////////////////////////////////////
237 : // allows user to set gradient of phi = Efield instead of spectral
238 : // calculation of Efield (which uses FFTs)
239 :
240 : template <typename FieldLHS, typename FieldRHS>
241 : void FFTOpenPoissonSolver<FieldLHS, FieldRHS>::setGradFD() {
242 : // get the output type (sol, grad, or sol & grad)
243 : const int out = this->params_m.template get<int>("output_type");
244 :
245 : if (out != Base::SOL_AND_GRAD) {
246 : throw IpplException(
247 : "FFTOpenPoissonSolver::setGradFD()",
248 : "Cannot use gradient for Efield computation unless output type is SOL_AND_GRAD");
249 : } else {
250 : isGradFD_m = true;
251 : }
252 : }
253 :
254 : /////////////////////////////////////////////////////////////////////////
255 : // initializeFields method, called in constructor
256 :
257 : template <typename FieldLHS, typename FieldRHS>
258 0 : void FFTOpenPoissonSolver<FieldLHS, FieldRHS>::initializeFields() {
259 : // get algorithm and hessian flag from parameter list
260 0 : const int alg = this->params_m.template get<int>("algorithm");
261 0 : const bool hessian = this->params_m.template get<bool>("hessian");
262 :
263 : // first check if valid algorithm choice
264 0 : if ((alg != Algorithm::VICO) && (alg != Algorithm::HOCKNEY)
265 0 : && (alg != Algorithm::BIHARMONIC) && (alg != Algorithm::DCT_VICO)) {
266 0 : throw IpplException("FFTOpenPoissonSolver::initializeFields()",
267 : "Currently only HOCKNEY, VICO, DCT_VICO, and BIHARMONIC are "
268 : "supported for open BCs");
269 : }
270 :
271 : // get layout and mesh
272 0 : layout_mp = &(this->rhs_mp->getLayout());
273 0 : mesh_mp = &(this->rhs_mp->get_mesh());
274 0 : mpi::Communicator comm = layout_mp->comm;
275 :
276 : // get mesh spacing and origin
277 0 : hr_m = mesh_mp->getMeshSpacing();
278 0 : vector_type origin = mesh_mp->getOrigin();
279 :
280 : // create domain for the real fields
281 0 : domain_m = layout_mp->getDomain();
282 :
283 : // get the mesh spacings and sizes for each dimension
284 0 : for (unsigned int i = 0; i < Dim; ++i) {
285 0 : nr_m[i] = domain_m[i].length();
286 :
287 : // create the doubled domain for the FFT procedure
288 0 : domain2_m[i] = Index(2 * nr_m[i]);
289 : }
290 :
291 : // define decomposition (parallel / serial)
292 0 : std::array<bool, Dim> isParallel = layout_mp->isParallel();
293 :
294 : // create double sized mesh and layout objects using the previously defined domain2_m
295 : using mesh_type = typename lhs_type::Mesh_t;
296 0 : mesh2_m = std::unique_ptr<mesh_type>(new mesh_type(domain2_m, hr_m, origin));
297 0 : layout2_m = std::unique_ptr<FieldLayout_t>(new FieldLayout_t(comm, domain2_m, isParallel));
298 :
299 : // create the domain for the transformed (complex) fields
300 : // since we use HeFFTe for the transforms it doesn't require permuting to the right
301 : // one of the dimensions has only (n/2 +1) as our original fields are fully real
302 : // the dimension is given by the user via r2c_direction
303 0 : unsigned int RCDirection = this->params_m.template get<int>("r2c_direction");
304 0 : for (unsigned int i = 0; i < Dim; ++i) {
305 0 : if (i == RCDirection) {
306 0 : domainComplex_m[RCDirection] = Index(nr_m[RCDirection] + 1);
307 : } else {
308 0 : domainComplex_m[i] = Index(2 * nr_m[i]);
309 : }
310 : }
311 :
312 : // create mesh and layout for the real to complex FFT transformed fields
313 0 : meshComplex_m = std::unique_ptr<mesh_type>(new mesh_type(domainComplex_m, hr_m, origin));
314 0 : layoutComplex_m =
315 0 : std::unique_ptr<FieldLayout_t>(new FieldLayout_t(comm, domainComplex_m, isParallel));
316 :
317 : // initialize fields
318 0 : storage_field.initialize(*mesh2_m, *layout2_m);
319 0 : rho2tr_m.initialize(*meshComplex_m, *layoutComplex_m);
320 0 : grntr_m.initialize(*meshComplex_m, *layoutComplex_m);
321 :
322 0 : int out = this->params_m.template get<int>("output_type");
323 0 : if (((out == Base::GRAD || out == Base::SOL_AND_GRAD) && !isGradFD_m) || hessian) {
324 0 : temp_m.initialize(*meshComplex_m, *layoutComplex_m);
325 : }
326 :
327 0 : if (hessian) {
328 0 : hess_m.initialize(*mesh_mp, *layout_mp);
329 : }
330 :
331 : // create the FFT object
332 0 : fft_m = std::make_unique<FFT_t>(*layout2_m, *layoutComplex_m, this->params_m);
333 :
334 : // if Vico, also need to create mesh and layout for 4N Fourier domain
335 : // on this domain, the truncated Green's function is defined
336 : // also need to create the 4N complex grid, on which precomputation step done
337 0 : if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) {
338 : // start a timer
339 0 : static IpplTimings::TimerRef initialize_vico =
340 0 : IpplTimings::getTimer("Initialize: extra Vico");
341 0 : IpplTimings::startTimer(initialize_vico);
342 :
343 0 : for (unsigned int i = 0; i < Dim; ++i) {
344 0 : domain4_m[i] = Index(4 * nr_m[i]);
345 : }
346 :
347 : // 4N grid
348 : using mesh_type = typename lhs_type::Mesh_t;
349 0 : mesh4_m = std::unique_ptr<mesh_type>(new mesh_type(domain4_m, hr_m, origin));
350 0 : layout4_m =
351 0 : std::unique_ptr<FieldLayout_t>(new FieldLayout_t(comm, domain4_m, isParallel));
352 :
353 : // initialize fields
354 0 : grnL_m.initialize(*mesh4_m, *layout4_m);
355 :
356 : // create a Complex-to-Complex FFT object to transform for layout4
357 0 : fft4n_m = std::make_unique<FFT<CCTransform, CxField_gt>>(*layout4_m, this->params_m);
358 :
359 0 : IpplTimings::stopTimer(initialize_vico);
360 : }
361 :
362 : // if DCT_VICO, need 2N+1 mesh, layout, domain, and green's function field for
363 : // precomputation
364 0 : if (alg == Algorithm::DCT_VICO) {
365 : // start a timer
366 0 : static IpplTimings::TimerRef initialize_vico =
367 0 : IpplTimings::getTimer("Initialize: extra Vico");
368 0 : IpplTimings::startTimer(initialize_vico);
369 :
370 : // 2N+1 domain for DCT
371 0 : for (unsigned int i = 0; i < Dim; ++i) {
372 0 : domain2n1_m[i] = Index(2 * nr_m[i] + 1);
373 : }
374 :
375 : // 2N+1 grid
376 0 : mesh2n1_m = std::unique_ptr<mesh_type>(new mesh_type(domain2n1_m, hr_m, origin));
377 0 : layout2n1_m =
378 0 : std::unique_ptr<FieldLayout_t>(new FieldLayout_t(comm, domain2n1_m, isParallel));
379 :
380 : // initialize fields
381 0 : grn2n1_m.initialize(*mesh2n1_m, *layout2n1_m); // 2N+1 grnL
382 :
383 : // create real to real FFT object for 2N+1 grid
384 0 : fft2n1_m = std::make_unique<FFT<Cos1Transform, Field_t>>(*layout2n1_m, this->params_m);
385 :
386 0 : IpplTimings::stopTimer(initialize_vico);
387 : }
388 :
389 : // these are fields that are used for calculating the Green's function for Hockney
390 0 : if (alg == Algorithm::HOCKNEY) {
391 : // start a timer
392 0 : static IpplTimings::TimerRef initialize_hockney =
393 0 : IpplTimings::getTimer("Initialize: extra Hockney");
394 0 : IpplTimings::startTimer(initialize_hockney);
395 :
396 0 : for (unsigned int d = 0; d < Dim; ++d) {
397 0 : grnIField_m[d].initialize(*mesh2_m, *layout2_m);
398 :
399 : // get number of ghost points and the Kokkos view to iterate over field
400 0 : auto view = grnIField_m[d].getView();
401 0 : const int nghost = grnIField_m[d].getNghost();
402 0 : const auto& ldom = layout2_m->getLocalNDIndex();
403 :
404 : // the length of the physical domain
405 0 : const int size = nr_m[d];
406 :
407 : // Kokkos parallel for loop to initialize grnIField[d]
408 0 : switch (d) {
409 0 : case 0:
410 0 : Kokkos::parallel_for(
411 : "Helper index Green field initialization",
412 0 : grnIField_m[d].getFieldRangePolicy(),
413 0 : KOKKOS_LAMBDA(const int i, const int j, const int k) {
414 : // go from local indices to global
415 0 : const int ig = i + ldom[0].first() - nghost;
416 0 : const int jg = j + ldom[1].first() - nghost;
417 0 : const int kg = k + ldom[2].first() - nghost;
418 :
419 : // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere
420 0 : const bool outsideN = (ig >= size);
421 0 : view(i, j, k) =
422 0 : (2 * size * outsideN - ig) * (2 * size * outsideN - ig);
423 :
424 : // add 1.0 if at (0,0,0) to avoid singularity
425 0 : const bool isOrig = ((ig == 0) && (jg == 0) && (kg == 0));
426 0 : view(i, j, k) += isOrig * 1.0;
427 : });
428 0 : break;
429 0 : case 1:
430 0 : Kokkos::parallel_for(
431 : "Helper index Green field initialization",
432 0 : grnIField_m[d].getFieldRangePolicy(),
433 0 : KOKKOS_LAMBDA(const int i, const int j, const int k) {
434 : // go from local indices to global
435 0 : const int jg = j + ldom[1].first() - nghost;
436 :
437 : // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere
438 0 : const bool outsideN = (jg >= size);
439 0 : view(i, j, k) =
440 0 : (2 * size * outsideN - jg) * (2 * size * outsideN - jg);
441 : });
442 0 : break;
443 0 : case 2:
444 0 : Kokkos::parallel_for(
445 : "Helper index Green field initialization",
446 0 : grnIField_m[d].getFieldRangePolicy(),
447 0 : KOKKOS_LAMBDA(const int i, const int j, const int k) {
448 : // go from local indices to global
449 0 : const int kg = k + ldom[2].first() - nghost;
450 :
451 : // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere
452 0 : const bool outsideN = (kg >= size);
453 0 : view(i, j, k) =
454 0 : (2 * size * outsideN - kg) * (2 * size * outsideN - kg);
455 : });
456 0 : break;
457 : }
458 : }
459 0 : IpplTimings::stopTimer(initialize_hockney);
460 : }
461 :
462 0 : static IpplTimings::TimerRef warmup = IpplTimings::getTimer("Warmup");
463 0 : IpplTimings::startTimer(warmup);
464 :
465 : // "empty" transforms to warmup all the FFTs
466 0 : fft_m->warmup(rho2_mr, rho2tr_m);
467 0 : if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) {
468 0 : fft4n_m->warmup(grnL_m);
469 : }
470 0 : if (alg == Algorithm::DCT_VICO) {
471 0 : fft2n1_m->warmup(grn2n1_m);
472 : }
473 :
474 0 : IpplTimings::stopTimer(warmup);
475 :
476 0 : rho2_mr = 0.0;
477 0 : rho2tr_m = 0.0;
478 0 : grnL_m = 0.0;
479 0 : grn2n1_m = 0.0;
480 :
481 : // call greensFunction and we will get the transformed G in the class attribute
482 : // this is done in initialization so that we already have the precomputed fct
483 : // for all timesteps (green's fct will only change if mesh size changes)
484 0 : static IpplTimings::TimerRef ginit = IpplTimings::getTimer("Green Init");
485 0 : IpplTimings::startTimer(ginit);
486 0 : greensFunction();
487 0 : IpplTimings::stopTimer(ginit);
488 0 : };
489 :
490 : /////////////////////////////////////////////////////////////////////////
491 : // compute electric potential by solving Poisson's eq given a field rho and mesh spacings hr
492 : template <typename FieldLHS, typename FieldRHS>
493 0 : void FFTOpenPoissonSolver<FieldLHS, FieldRHS>::solve() {
494 : // start a timer
495 0 : static IpplTimings::TimerRef solve = IpplTimings::getTimer("Solve");
496 0 : IpplTimings::startTimer(solve);
497 :
498 : // get the output type (sol, grad, or sol & grad)
499 0 : const int out = this->params_m.template get<int>("output_type");
500 :
501 : // get the algorithm (hockney, vico, or biharmonic)
502 0 : const int alg = this->params_m.template get<int>("algorithm");
503 :
504 : // get hessian flag (if true, we compute the Hessian)
505 0 : const bool hessian = this->params_m.template get<bool>("hessian");
506 :
507 : // set the mesh & spacing, which may change each timestep
508 0 : mesh_mp = &(this->rhs_mp->get_mesh());
509 :
510 : // check whether the mesh spacing has changed with respect to the old one
511 : // if yes, update and set green flag to true
512 0 : bool green = false;
513 0 : for (unsigned int i = 0; i < Dim; ++i) {
514 0 : if (hr_m[i] != mesh_mp->getMeshSpacing(i)) {
515 0 : hr_m[i] = mesh_mp->getMeshSpacing(i);
516 0 : green = true;
517 : }
518 : }
519 :
520 : // set mesh spacing on the other grids again
521 0 : mesh2_m->setMeshSpacing(hr_m);
522 0 : meshComplex_m->setMeshSpacing(hr_m);
523 :
524 : // field object on the doubled grid; zero-padded
525 0 : rho2_mr = 0.0;
526 :
527 : // start a timer
528 0 : static IpplTimings::TimerRef stod = IpplTimings::getTimer("Solve: Physical to double");
529 0 : IpplTimings::startTimer(stod);
530 :
531 : // store rho (RHS) in the lower left quadrant of the doubled grid
532 : // with or without communication (if only 1 rank)
533 :
534 0 : const int ranks = Comm->size();
535 :
536 0 : auto view2 = rho2_mr.getView();
537 0 : auto view1 = this->rhs_mp->getView();
538 :
539 0 : const int nghost2 = rho2_mr.getNghost();
540 0 : const int nghost1 = this->rhs_mp->getNghost();
541 :
542 0 : const auto& ldom2 = layout2_m->getLocalNDIndex();
543 0 : const auto& ldom1 = layout_mp->getLocalNDIndex();
544 :
545 0 : if (ranks > 1) {
546 : // COMMUNICATION
547 0 : const auto& lDomains2 = layout2_m->getHostLocalDomains();
548 :
549 : // send
550 0 : std::vector<MPI_Request> requests(0);
551 :
552 0 : for (int i = 0; i < ranks; ++i) {
553 0 : if (lDomains2[i].touches(ldom1)) {
554 0 : auto intersection = lDomains2[i].intersect(ldom1);
555 :
556 0 : solver_send(mpi::tag::OPEN_SOLVER, 0, i, intersection, ldom1, nghost1, view1,
557 0 : fd_m, requests);
558 : }
559 : }
560 :
561 : // receive
562 0 : const auto& lDomains1 = layout_mp->getHostLocalDomains();
563 :
564 0 : for (int i = 0; i < ranks; ++i) {
565 0 : if (lDomains1[i].touches(ldom2)) {
566 0 : auto intersection = lDomains1[i].intersect(ldom2);
567 :
568 : mpi::Communicator::size_type nrecvs;
569 0 : nrecvs = intersection.size();
570 :
571 0 : buffer_type buf = Comm->getBuffer<memory_space, Trhs>(nrecvs);
572 :
573 0 : Comm->recv(i, mpi::tag::OPEN_SOLVER, fd_m, *buf, nrecvs * sizeof(Trhs), nrecvs);
574 0 : buf->resetReadPos();
575 :
576 0 : unpack(intersection, view2, fd_m, nghost2, ldom2);
577 0 : }
578 : }
579 :
580 : // wait for all messages to be received
581 0 : if (requests.size() > 0) {
582 0 : MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
583 : }
584 0 : ippl::Comm->freeAllBuffers();
585 :
586 0 : } else {
587 0 : Kokkos::parallel_for(
588 0 : "Write rho on the doubled grid", this->rhs_mp->getFieldRangePolicy(),
589 0 : KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) {
590 0 : const size_t ig2 = i + ldom2[0].first() - nghost2;
591 0 : const size_t jg2 = j + ldom2[1].first() - nghost2;
592 0 : const size_t kg2 = k + ldom2[2].first() - nghost2;
593 :
594 0 : const size_t ig1 = i + ldom1[0].first() - nghost1;
595 0 : const size_t jg1 = j + ldom1[1].first() - nghost1;
596 0 : const size_t kg1 = k + ldom1[2].first() - nghost1;
597 :
598 : // write physical rho on [0,N-1] of doubled field
599 0 : const bool isQuadrant1 = ((ig1 == ig2) && (jg1 == jg2) && (kg1 == kg2));
600 0 : view2(i, j, k) = view1(i, j, k) * isQuadrant1;
601 : });
602 : }
603 :
604 0 : IpplTimings::stopTimer(stod);
605 :
606 : // start a timer
607 0 : static IpplTimings::TimerRef fftrho = IpplTimings::getTimer("FFT: Rho");
608 0 : IpplTimings::startTimer(fftrho);
609 :
610 : // forward FFT of the charge density field on doubled grid
611 0 : fft_m->transform(FORWARD, rho2_mr, rho2tr_m);
612 :
613 0 : IpplTimings::stopTimer(fftrho);
614 :
615 : // call greensFunction to recompute if the mesh spacing has changed
616 0 : if (green) {
617 0 : greensFunction();
618 : }
619 :
620 : // multiply FFT(rho2)*FFT(green)
621 : // convolution becomes multiplication in FFT
622 : // minus sign since we are solving laplace(phi) = -rho
623 0 : rho2tr_m = -rho2tr_m * grntr_m;
624 :
625 : // if output_type is SOL or SOL_AND_GRAD, we caculate solution
626 0 : if ((out == Base::SOL) || (out == Base::SOL_AND_GRAD)) {
627 : // start a timer
628 0 : static IpplTimings::TimerRef fftc = IpplTimings::getTimer("FFT: Convolution");
629 0 : IpplTimings::startTimer(fftc);
630 :
631 : // inverse FFT of the product and store the electrostatic potential in rho2_mr
632 0 : fft_m->transform(BACKWARD, rho2_mr, rho2tr_m);
633 :
634 0 : IpplTimings::stopTimer(fftc);
635 : // Hockney: multiply the rho2_mr field by the total number of points to account for
636 : // double counting (rho and green) of normalization factor in forward transform
637 : // also multiply by the mesh spacing^3 (to account for discretization)
638 : // Vico: need to multiply by normalization factor of 1/4N^3,
639 : // since only backward transform was performed on the 4N grid
640 : // DCT_VICO: need to multiply by a factor of (2N)^3 to match the normalization factor in
641 : // the transform.
642 0 : for (unsigned int i = 0; i < Dim; ++i) {
643 0 : switch (alg) {
644 0 : case Algorithm::HOCKNEY:
645 0 : rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i];
646 0 : break;
647 0 : case Algorithm::VICO:
648 : case Algorithm::BIHARMONIC:
649 0 : rho2_mr = rho2_mr * 2.0 * (1.0 / 4.0);
650 0 : break;
651 0 : case Algorithm::DCT_VICO:
652 0 : rho2_mr = rho2_mr * (1.0 / 4.0);
653 0 : break;
654 0 : default:
655 0 : throw IpplException(
656 : "FFTOpenPoissonSolver::initializeFields()",
657 : "Currently only HOCKNEY, VICO, DCT_VICO, and BIHARMONIC are "
658 : "supported for open BCs");
659 : }
660 : }
661 :
662 : // start a timer
663 0 : static IpplTimings::TimerRef dtos = IpplTimings::getTimer("Solve: Double to physical");
664 0 : IpplTimings::startTimer(dtos);
665 :
666 : // get the physical part only --> physical electrostatic potential is now given in RHS
667 : // need communication if more than one rank
668 :
669 0 : if (ranks > 1) {
670 : // COMMUNICATION
671 :
672 : // send
673 0 : const auto& lDomains1 = layout_mp->getHostLocalDomains();
674 :
675 0 : std::vector<MPI_Request> requests(0);
676 :
677 0 : for (int i = 0; i < ranks; ++i) {
678 0 : if (lDomains1[i].touches(ldom2)) {
679 0 : auto intersection = lDomains1[i].intersect(ldom2);
680 :
681 0 : solver_send(mpi::tag::OPEN_SOLVER, 0, i, intersection, ldom2, nghost2,
682 0 : view2, fd_m, requests);
683 : }
684 : }
685 :
686 : // receive
687 0 : const auto& lDomains2 = layout2_m->getHostLocalDomains();
688 :
689 0 : for (int i = 0; i < ranks; ++i) {
690 0 : if (ldom1.touches(lDomains2[i])) {
691 0 : auto intersection = ldom1.intersect(lDomains2[i]);
692 :
693 : mpi::Communicator::size_type nrecvs;
694 0 : nrecvs = intersection.size();
695 :
696 0 : buffer_type buf = Comm->getBuffer<memory_space, Trhs>(nrecvs);
697 :
698 0 : Comm->recv(i, mpi::tag::OPEN_SOLVER, fd_m, *buf, nrecvs * sizeof(Trhs),
699 : nrecvs);
700 0 : buf->resetReadPos();
701 :
702 0 : unpack(intersection, view1, fd_m, nghost1, ldom1);
703 0 : }
704 : }
705 :
706 : // wait for all messages to be received
707 0 : if (requests.size() > 0) {
708 0 : MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
709 : }
710 0 : ippl::Comm->freeAllBuffers();
711 :
712 0 : } else {
713 0 : Kokkos::parallel_for(
714 : "Write the solution into the LHS on physical grid",
715 0 : this->rhs_mp->getFieldRangePolicy(),
716 0 : KOKKOS_LAMBDA(const int i, const int j, const int k) {
717 0 : const int ig2 = i + ldom2[0].first() - nghost2;
718 0 : const int jg2 = j + ldom2[1].first() - nghost2;
719 0 : const int kg2 = k + ldom2[2].first() - nghost2;
720 :
721 0 : const int ig = i + ldom1[0].first() - nghost1;
722 0 : const int jg = j + ldom1[1].first() - nghost1;
723 0 : const int kg = k + ldom1[2].first() - nghost1;
724 :
725 : // take [0,N-1] as physical solution
726 0 : const bool isQuadrant1 = ((ig == ig2) && (jg == jg2) && (kg == kg2));
727 0 : view1(i, j, k) = view2(i, j, k) * isQuadrant1;
728 : });
729 : }
730 0 : IpplTimings::stopTimer(dtos);
731 : }
732 :
733 : // if we want finite differences Efield = -grad(phi)
734 : // instead of computing it in Fourier domain
735 : // this is only possible if SOL_AND_GRAD is the output type
736 0 : if (isGradFD_m && (out == Base::SOL_AND_GRAD)) {
737 0 : *(this->lhs_mp) = -grad(*this->rhs_mp);
738 : }
739 :
740 : // if output_type is GRAD or SOL_AND_GRAD, we calculate E-field (gradient in Fourier domain)
741 0 : if (((out == Base::GRAD) || (out == Base::SOL_AND_GRAD)) && (!isGradFD_m)) {
742 : // start a timer
743 0 : static IpplTimings::TimerRef efield = IpplTimings::getTimer("Solve: Electric field");
744 0 : IpplTimings::startTimer(efield);
745 :
746 : // get E field view (LHS)
747 0 : auto viewL = this->lhs_mp->getView();
748 0 : const int nghostL = this->lhs_mp->getNghost();
749 :
750 : // get rho2tr_m view (as we want to multiply by ik then transform)
751 0 : auto viewR = rho2tr_m.getView();
752 0 : const int nghostR = rho2tr_m.getNghost();
753 0 : const auto& ldomR = layoutComplex_m->getLocalNDIndex();
754 :
755 : // use temp_m as a temporary complex field
756 0 : auto view_g = temp_m.getView();
757 :
758 : // define some constants
759 0 : const scalar_type pi = Kokkos::numbers::pi_v<scalar_type>;
760 0 : const Kokkos::complex<Trhs> I = {0.0, 1.0};
761 :
762 : // define some member variables in local scope for the parallel_for
763 0 : vector_type hsize = hr_m;
764 0 : Vector<int, Dim> N = nr_m;
765 :
766 : // loop over each component (E = vector field)
767 0 : for (size_t gd = 0; gd < Dim; ++gd) {
768 : // loop over rho2tr_m to multiply by -ik (gradient in Fourier space)
769 0 : Kokkos::parallel_for(
770 0 : "Gradient - E field", rho2tr_m.getFieldRangePolicy(),
771 0 : KOKKOS_LAMBDA(const int i, const int j, const int k) {
772 : // global indices for 2N rhotr_m
773 0 : const int ig = i + ldomR[0].first() - nghostR;
774 0 : const int jg = j + ldomR[1].first() - nghostR;
775 0 : const int kg = k + ldomR[2].first() - nghostR;
776 :
777 0 : Vector<int, 3> iVec = {ig, jg, kg};
778 :
779 : scalar_type k_gd;
780 0 : const scalar_type Len = N[gd] * hsize[gd];
781 0 : const bool shift = (iVec[gd] > N[gd]);
782 0 : const bool notMid = (iVec[gd] != N[gd]);
783 :
784 0 : k_gd = notMid * (pi / Len) * (iVec[gd] - shift * 2 * N[gd]);
785 :
786 0 : view_g(i, j, k) = -(I * k_gd) * viewR(i, j, k);
787 0 : });
788 :
789 : // start a timer
790 0 : static IpplTimings::TimerRef ffte = IpplTimings::getTimer("FFT: Efield");
791 0 : IpplTimings::startTimer(ffte);
792 :
793 : // transform to get E-field
794 0 : fft_m->transform(BACKWARD, rho2_mr, temp_m);
795 :
796 0 : IpplTimings::stopTimer(ffte);
797 :
798 : // apply proper normalization
799 0 : for (unsigned int i = 0; i < Dim; ++i) {
800 0 : switch (alg) {
801 0 : case Algorithm::HOCKNEY:
802 0 : rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i];
803 0 : break;
804 0 : case Algorithm::VICO:
805 : case Algorithm::BIHARMONIC:
806 0 : rho2_mr = rho2_mr * 2.0 * (1.0 / 4.0);
807 0 : break;
808 0 : case Algorithm::DCT_VICO:
809 0 : rho2_mr = rho2_mr * (1.0 / 4.0);
810 0 : break;
811 0 : default:
812 0 : throw IpplException(
813 : "FFTOpenPoissonSolver::initializeFields()",
814 : "Currently only HOCKNEY, VICO, DCT_VICO, and BIHARMONIC are "
815 : "supported for open BCs");
816 : }
817 : }
818 :
819 : // start a timer
820 0 : static IpplTimings::TimerRef edtos =
821 0 : IpplTimings::getTimer("Efield: double to phys.");
822 0 : IpplTimings::startTimer(edtos);
823 :
824 : // restrict to physical grid (N^3) and assign to LHS (E-field)
825 : // communication needed if more than one rank
826 0 : if (ranks > 1) {
827 : // COMMUNICATION
828 :
829 : // send
830 0 : const auto& lDomains1 = layout_mp->getHostLocalDomains();
831 0 : std::vector<MPI_Request> requests(0);
832 :
833 0 : for (int i = 0; i < ranks; ++i) {
834 0 : if (lDomains1[i].touches(ldom2)) {
835 0 : auto intersection = lDomains1[i].intersect(ldom2);
836 :
837 0 : solver_send(mpi::tag::OPEN_SOLVER, 0, i, intersection, ldom2, nghost2,
838 0 : view2, fd_m, requests);
839 : }
840 : }
841 :
842 : // receive
843 0 : const auto& lDomains2 = layout2_m->getHostLocalDomains();
844 :
845 0 : for (int i = 0; i < ranks; ++i) {
846 0 : if (ldom1.touches(lDomains2[i])) {
847 0 : auto intersection = ldom1.intersect(lDomains2[i]);
848 :
849 : mpi::Communicator::size_type nrecvs;
850 0 : nrecvs = intersection.size();
851 :
852 0 : buffer_type buf = Comm->getBuffer<memory_space, Trhs>(nrecvs);
853 :
854 0 : Comm->recv(i, mpi::tag::OPEN_SOLVER, fd_m, *buf, nrecvs * sizeof(Trhs),
855 : nrecvs);
856 0 : buf->resetReadPos();
857 :
858 0 : unpack(intersection, viewL, gd, fd_m, nghostL, ldom1);
859 0 : }
860 : }
861 :
862 : // wait for all messages to be received
863 0 : if (requests.size() > 0) {
864 0 : MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
865 : }
866 0 : ippl::Comm->freeAllBuffers();
867 :
868 0 : } else {
869 0 : Kokkos::parallel_for(
870 0 : "Write the E-field on physical grid", this->lhs_mp->getFieldRangePolicy(),
871 0 : KOKKOS_LAMBDA(const int i, const int j, const int k) {
872 0 : const int ig2 = i + ldom2[0].first() - nghost2;
873 0 : const int jg2 = j + ldom2[1].first() - nghost2;
874 0 : const int kg2 = k + ldom2[2].first() - nghost2;
875 :
876 0 : const int ig = i + ldom1[0].first() - nghostL;
877 0 : const int jg = j + ldom1[1].first() - nghostL;
878 0 : const int kg = k + ldom1[2].first() - nghostL;
879 :
880 : // take [0,N-1] as physical solution
881 0 : const bool isQuadrant1 = ((ig == ig2) && (jg == jg2) && (kg == kg2));
882 0 : viewL(i, j, k)[gd] = view2(i, j, k) * isQuadrant1;
883 : });
884 : }
885 0 : IpplTimings::stopTimer(edtos);
886 : }
887 0 : IpplTimings::stopTimer(efield);
888 0 : }
889 :
890 : // if user asked for Hessian, compute in Fourier domain (-k^2 multiplication)
891 0 : if (hessian) {
892 : // start a timer
893 0 : static IpplTimings::TimerRef hess = IpplTimings::getTimer("Solve: Hessian");
894 0 : IpplTimings::startTimer(hess);
895 :
896 : // get Hessian matrix view (LHS)
897 0 : auto viewH = hess_m.getView();
898 0 : const int nghostH = hess_m.getNghost();
899 :
900 : // get rho2tr_m view (as we want to multiply by -k^2 then transform)
901 0 : auto viewR = rho2tr_m.getView();
902 0 : const int nghostR = rho2tr_m.getNghost();
903 0 : const auto& ldomR = layoutComplex_m->getLocalNDIndex();
904 :
905 : // use temp_m as a temporary complex field
906 0 : auto view_g = temp_m.getView();
907 :
908 : // define some constants
909 0 : const scalar_type pi = Kokkos::numbers::pi_v<scalar_type>;
910 :
911 : // define some member variables in local scope for the parallel_for
912 0 : vector_type hsize = hr_m;
913 0 : Vector<int, Dim> N = nr_m;
914 :
915 : // loop over each component (Hessian = Matrix field)
916 0 : for (size_t row = 0; row < Dim; ++row) {
917 0 : for (size_t col = 0; col < Dim; ++col) {
918 : // loop over rho2tr_m to multiply by -k^2 (second derivative in Fourier space)
919 : // if diagonal element (row = col), do not need N/2 term = 0
920 : // else, if mixed derivative, need kVec = 0 at N/2
921 :
922 0 : Kokkos::parallel_for(
923 0 : "Hessian", rho2tr_m.getFieldRangePolicy(),
924 0 : KOKKOS_LAMBDA(const int i, const int j, const int k) {
925 : // global indices for 2N rhotr_m
926 0 : const int ig = i + ldomR[0].first() - nghostR;
927 0 : const int jg = j + ldomR[1].first() - nghostR;
928 0 : const int kg = k + ldomR[2].first() - nghostR;
929 :
930 0 : Vector<int, 3> iVec = {ig, jg, kg};
931 0 : Vector_t kVec;
932 :
933 0 : for (size_t d = 0; d < Dim; ++d) {
934 0 : const scalar_type Len = N[d] * hsize[d];
935 0 : const bool shift = (iVec[d] > N[d]);
936 0 : const bool isMid = (iVec[d] == N[d]);
937 0 : const bool notDiag = (row != col);
938 :
939 0 : kVec[d] = (1 - (notDiag * isMid)) * (pi / Len)
940 0 : * (iVec[d] - shift * 2 * N[d]);
941 : }
942 :
943 0 : view_g(i, j, k) = -(kVec[col] * kVec[row]) * viewR(i, j, k);
944 0 : });
945 :
946 : // start a timer
947 0 : static IpplTimings::TimerRef ffth = IpplTimings::getTimer("FFT: Hessian");
948 0 : IpplTimings::startTimer(ffth);
949 :
950 : // transform to get Hessian
951 0 : fft_m->transform(BACKWARD, rho2_mr, temp_m);
952 :
953 0 : IpplTimings::stopTimer(ffth);
954 :
955 : // apply proper normalization
956 0 : for (unsigned int i = 0; i < Dim; ++i) {
957 0 : switch (alg) {
958 0 : case Algorithm::HOCKNEY:
959 0 : rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i];
960 0 : break;
961 0 : case Algorithm::VICO:
962 : case Algorithm::BIHARMONIC:
963 0 : rho2_mr = rho2_mr * 2.0 * (1.0 / 4.0);
964 0 : break;
965 0 : case Algorithm::DCT_VICO:
966 0 : rho2_mr = rho2_mr * (1.0 / 4.0);
967 0 : break;
968 0 : default:
969 0 : throw IpplException(
970 : "FFTOpenPoissonSolver::initializeFields()",
971 : "Currently only HOCKNEY, VICO, DCT_VICO, and BIHARMONIC are "
972 : "supported for open BCs");
973 : }
974 : }
975 :
976 : // start a timer
977 0 : static IpplTimings::TimerRef hdtos =
978 0 : IpplTimings::getTimer("Hessian: double to phys.");
979 0 : IpplTimings::startTimer(hdtos);
980 :
981 : // restrict to physical grid (N^3) and assign to Matrix field (Hessian)
982 : // communication needed if more than one rank
983 0 : if (ranks > 1) {
984 : // COMMUNICATION
985 :
986 : // send
987 0 : const auto& lDomains1 = layout_mp->getHostLocalDomains();
988 0 : std::vector<MPI_Request> requests(0);
989 :
990 0 : for (int i = 0; i < ranks; ++i) {
991 0 : if (lDomains1[i].touches(ldom2)) {
992 0 : auto intersection = lDomains1[i].intersect(ldom2);
993 :
994 0 : solver_send(mpi::tag::OPEN_SOLVER, 0, i, intersection, ldom2,
995 0 : nghost2, view2, fd_m, requests);
996 : }
997 : }
998 :
999 : // receive
1000 0 : const auto& lDomains2 = layout2_m->getHostLocalDomains();
1001 :
1002 0 : for (int i = 0; i < ranks; ++i) {
1003 0 : if (ldom1.touches(lDomains2[i])) {
1004 0 : auto intersection = ldom1.intersect(lDomains2[i]);
1005 :
1006 : mpi::Communicator::size_type nrecvs;
1007 0 : nrecvs = intersection.size();
1008 :
1009 0 : buffer_type buf = Comm->getBuffer<memory_space, Trhs>(nrecvs);
1010 :
1011 0 : Comm->recv(i, mpi::tag::OPEN_SOLVER, fd_m, *buf,
1012 : nrecvs * sizeof(Trhs), nrecvs);
1013 0 : buf->resetReadPos();
1014 :
1015 0 : unpack(intersection, viewH, fd_m, nghostH, ldom1, row, col);
1016 0 : }
1017 : }
1018 :
1019 : // wait for all messages to be received
1020 0 : if (requests.size() > 0) {
1021 0 : MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
1022 : }
1023 0 : ippl::Comm->freeAllBuffers();
1024 :
1025 0 : } else {
1026 0 : Kokkos::parallel_for(
1027 0 : "Write Hessian on physical grid", hess_m.getFieldRangePolicy(),
1028 0 : KOKKOS_LAMBDA(const int i, const int j, const int k) {
1029 0 : const int ig2 = i + ldom2[0].first() - nghost2;
1030 0 : const int jg2 = j + ldom2[1].first() - nghost2;
1031 0 : const int kg2 = k + ldom2[2].first() - nghost2;
1032 :
1033 0 : const int ig = i + ldom1[0].first() - nghostH;
1034 0 : const int jg = j + ldom1[1].first() - nghostH;
1035 0 : const int kg = k + ldom1[2].first() - nghostH;
1036 :
1037 : // take [0,N-1] as physical solution
1038 0 : const bool isQuadrant1 =
1039 0 : ((ig == ig2) && (jg == jg2) && (kg == kg2));
1040 0 : viewH(i, j, k)[row][col] = view2(i, j, k) * isQuadrant1;
1041 : });
1042 : }
1043 0 : IpplTimings::stopTimer(hdtos);
1044 : }
1045 : }
1046 0 : IpplTimings::stopTimer(hess);
1047 0 : }
1048 0 : IpplTimings::stopTimer(solve);
1049 0 : };
1050 :
1051 : ////////////////////////////////////////////////////////////////////////
1052 : // calculate FFT of the Green's function
1053 :
1054 : template <typename FieldLHS, typename FieldRHS>
1055 0 : void FFTOpenPoissonSolver<FieldLHS, FieldRHS>::greensFunction() {
1056 0 : const scalar_type pi = Kokkos::numbers::pi_v<scalar_type>;
1057 0 : grn_mr = 0.0;
1058 :
1059 0 : const int alg = this->params_m.template get<int>("algorithm");
1060 :
1061 0 : if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) {
1062 0 : Vector_t l(hr_m * nr_m);
1063 0 : Vector_t hs_m;
1064 0 : double L_sum(0.0);
1065 :
1066 : // compute length of the physical domain
1067 : // compute Fourier domain spacing
1068 0 : for (unsigned int i = 0; i < Dim; ++i) {
1069 0 : hs_m[i] = pi * 0.5 / l[i];
1070 0 : L_sum = L_sum + l[i] * l[i];
1071 : }
1072 :
1073 : // define the origin of the 4N grid
1074 0 : vector_type origin;
1075 :
1076 0 : for (unsigned int i = 0; i < Dim; ++i) {
1077 0 : origin[i] = -2 * nr_m[i] * pi / l[i];
1078 : }
1079 :
1080 : // set mesh for the 4N mesh
1081 0 : mesh4_m->setMeshSpacing(hs_m);
1082 :
1083 : // size of truncation window
1084 0 : L_sum = std::sqrt(L_sum);
1085 : // we choose a window 10% larger than domain (arbitrary choice)
1086 0 : L_sum = 1.1 * L_sum;
1087 :
1088 : // initialize grnL_m
1089 0 : typename CxField_gt::view_type view_g = grnL_m.getView();
1090 0 : const int nghost_g = grnL_m.getNghost();
1091 0 : const auto& ldom_g = layout4_m->getLocalNDIndex();
1092 :
1093 0 : Vector<int, Dim> size = nr_m;
1094 :
1095 : // Kokkos parallel for loop to assign analytic grnL_m
1096 0 : if (alg == Algorithm::VICO) {
1097 0 : Kokkos::parallel_for(
1098 0 : "Initialize Green's function ", grnL_m.getFieldRangePolicy(),
1099 0 : KOKKOS_LAMBDA(const int i, const int j, const int k) {
1100 : // go from local indices to global
1101 0 : const int ig = i + ldom_g[0].first() - nghost_g;
1102 0 : const int jg = j + ldom_g[1].first() - nghost_g;
1103 0 : const int kg = k + ldom_g[2].first() - nghost_g;
1104 :
1105 0 : bool isOutside = (ig > 2 * size[0] - 1);
1106 0 : const Tg t = ig * hs_m[0] + isOutside * origin[0];
1107 :
1108 0 : isOutside = (jg > 2 * size[1] - 1);
1109 0 : const Tg u = jg * hs_m[1] + isOutside * origin[1];
1110 :
1111 0 : isOutside = (kg > 2 * size[2] - 1);
1112 0 : const Tg v = kg * hs_m[2] + isOutside * origin[2];
1113 :
1114 0 : Tg s = (t * t) + (u * u) + (v * v);
1115 0 : s = Kokkos::sqrt(s);
1116 :
1117 : // assign the green's function value
1118 : // if (0,0,0), assign L^2/2 (analytical limit of sinc)
1119 :
1120 0 : const bool isOrig = ((ig == 0 && jg == 0 && kg == 0));
1121 0 : const Tg analyticLim = -L_sum * L_sum * 0.5;
1122 0 : const Tg value = -2.0 * (Kokkos::sin(0.5 * L_sum * s) / (s + isOrig * 1.0))
1123 0 : * (Kokkos::sin(0.5 * L_sum * s) / (s + isOrig * 1.0));
1124 :
1125 0 : view_g(i, j, k) = (!isOrig) * value + isOrig * analyticLim;
1126 : });
1127 0 : } else if (alg == Algorithm::BIHARMONIC) {
1128 0 : Kokkos::parallel_for(
1129 0 : "Initialize Green's function ", grnL_m.getFieldRangePolicy(),
1130 0 : KOKKOS_LAMBDA(const int i, const int j, const int k) {
1131 : // go from local indices to global
1132 0 : const int ig = i + ldom_g[0].first() - nghost_g;
1133 0 : const int jg = j + ldom_g[1].first() - nghost_g;
1134 0 : const int kg = k + ldom_g[2].first() - nghost_g;
1135 :
1136 0 : bool isOutside = (ig > 2 * size[0] - 1);
1137 0 : const Tg t = ig * hs_m[0] + isOutside * origin[0];
1138 :
1139 0 : isOutside = (jg > 2 * size[1] - 1);
1140 0 : const Tg u = jg * hs_m[1] + isOutside * origin[1];
1141 :
1142 0 : isOutside = (kg > 2 * size[2] - 1);
1143 0 : const Tg v = kg * hs_m[2] + isOutside * origin[2];
1144 :
1145 0 : Tg s = (t * t) + (u * u) + (v * v);
1146 0 : s = Kokkos::sqrt(s);
1147 :
1148 : // assign value and replace with analytic limit at origin (0,0,0)
1149 0 : const bool isOrig = ((ig == 0 && jg == 0 && kg == 0));
1150 0 : const Tg analyticLim = -L_sum * L_sum * L_sum * L_sum / 8.0;
1151 0 : const Tg value = -((2 - (L_sum * L_sum * s * s)) * Kokkos::cos(L_sum * s)
1152 0 : + 2 * L_sum * s * Kokkos::sin(L_sum * s) - 2)
1153 0 : / (2 * s * s * s * s + isOrig * 1.0);
1154 :
1155 0 : view_g(i, j, k) = (!isOrig) * value + isOrig * analyticLim;
1156 : });
1157 : }
1158 :
1159 : // start a timer
1160 0 : static IpplTimings::TimerRef fft4 = IpplTimings::getTimer("FFT: Precomputation");
1161 0 : IpplTimings::startTimer(fft4);
1162 :
1163 : // inverse Fourier transform of the green's function for precomputation
1164 0 : fft4n_m->transform(BACKWARD, grnL_m);
1165 :
1166 0 : IpplTimings::stopTimer(fft4);
1167 :
1168 : // Restrict transformed grnL_m to 2N domain after precomputation step
1169 :
1170 : // get the field data first
1171 0 : typename Field_t::view_type view = grn_mr.getView();
1172 0 : const int nghost = grn_mr.getNghost();
1173 0 : const auto& ldom = layout2_m->getLocalNDIndex();
1174 :
1175 : // start a timer
1176 0 : static IpplTimings::TimerRef ifftshift = IpplTimings::getTimer("Vico shift loop");
1177 0 : IpplTimings::startTimer(ifftshift);
1178 :
1179 : // get number of ranks to see if need communication
1180 0 : const int ranks = Comm->size();
1181 :
1182 0 : if (ranks > 1) {
1183 0 : communicateVico(size, view_g, ldom_g, nghost_g, view, ldom, nghost);
1184 : } else {
1185 : // restrict the green's function to a (2N)^3 grid from the (4N)^3 grid
1186 : using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<3>>;
1187 0 : Kokkos::parallel_for(
1188 : "Restrict domain of Green's function from 4N to 2N",
1189 0 : mdrange_type({nghost, nghost, nghost}, {view.extent(0) - nghost - size[0],
1190 0 : view.extent(1) - nghost - size[1],
1191 0 : view.extent(2) - nghost - size[2]}),
1192 0 : KOKKOS_LAMBDA(const int i, const int j, const int k) {
1193 : // go from local indices to global
1194 0 : const int ig = i + ldom[0].first() - nghost;
1195 0 : const int jg = j + ldom[1].first() - nghost;
1196 0 : const int kg = k + ldom[2].first() - nghost;
1197 :
1198 0 : const int ig2 = i + ldom_g[0].first() - nghost_g;
1199 0 : const int jg2 = j + ldom_g[1].first() - nghost_g;
1200 0 : const int kg2 = k + ldom_g[2].first() - nghost_g;
1201 :
1202 0 : const bool isOrig = (ig == ig2) && (jg == jg2) && (kg == kg2);
1203 0 : view(i, j, k) = isOrig * real(view_g(i, j, k));
1204 :
1205 : // Now fill the rest of the field
1206 0 : const int s = 2 * size[0] - ig - 1 - ldom_g[0].first() + nghost_g;
1207 0 : const int p = 2 * size[1] - jg - 1 - ldom_g[1].first() + nghost_g;
1208 0 : const int q = 2 * size[2] - kg - 1 - ldom_g[2].first() + nghost_g;
1209 :
1210 0 : view(s, j, k) = real(view_g(i + 1, j, k));
1211 0 : view(i, p, k) = real(view_g(i, j + 1, k));
1212 0 : view(i, j, q) = real(view_g(i, j, k + 1));
1213 0 : view(s, j, q) = real(view_g(i + 1, j, k + 1));
1214 0 : view(s, p, k) = real(view_g(i + 1, j + 1, k));
1215 0 : view(i, p, q) = real(view_g(i, j + 1, k + 1));
1216 0 : view(s, p, q) = real(view_g(i + 1, j + 1, k + 1));
1217 : });
1218 : }
1219 0 : IpplTimings::stopTimer(ifftshift);
1220 :
1221 0 : } else if (alg == Algorithm::DCT_VICO) {
1222 0 : Vector_t l(hr_m * nr_m);
1223 0 : Vector_t hs_m;
1224 0 : double L_sum(0.0);
1225 :
1226 : // compute length of the physical domain
1227 : // compute Fourier domain spacing
1228 0 : for (unsigned int i = 0; i < Dim; ++i) {
1229 0 : hs_m[i] = Kokkos::numbers::pi_v<Trhs> * 0.5 / l[i];
1230 0 : L_sum = L_sum + l[i] * l[i];
1231 : }
1232 :
1233 : // define the origin of the 2N+1 grid
1234 0 : Vector_t origin = 0.0;
1235 :
1236 : // set mesh for the 2N+1 mesh
1237 0 : mesh2n1_m->setMeshSpacing(hs_m);
1238 :
1239 : // size of truncation window
1240 0 : L_sum = Kokkos::sqrt(L_sum);
1241 0 : L_sum = 1.1 * L_sum;
1242 :
1243 : // initialize grn2n1_m
1244 0 : Vector<int, Dim> size = nr_m;
1245 :
1246 : // initialize grn2n1_m
1247 0 : typename Field_t::view_type view_g2n1 = grn2n1_m.getView();
1248 0 : const int nghost_g2n1 = grn2n1_m.getNghost();
1249 0 : const auto& ldom_g2n1 = layout2n1_m->getLocalNDIndex();
1250 :
1251 0 : Kokkos::parallel_for(
1252 0 : "Initialize 2N+1 Green's function ", grn2n1_m.getFieldRangePolicy(),
1253 0 : KOKKOS_LAMBDA(const int i, const int j, const int k) {
1254 : // go from local indices to global
1255 0 : const int ig = i + ldom_g2n1[0].first() - nghost_g2n1;
1256 0 : const int jg = j + ldom_g2n1[1].first() - nghost_g2n1;
1257 0 : const int kg = k + ldom_g2n1[2].first() - nghost_g2n1;
1258 :
1259 0 : double t = ig * hs_m[0];
1260 0 : double u = jg * hs_m[1];
1261 0 : double v = kg * hs_m[2];
1262 :
1263 0 : double s = (t * t) + (u * u) + (v * v);
1264 0 : s = Kokkos::sqrt(s);
1265 :
1266 0 : const bool isOrig = ((ig == 0 && jg == 0 && kg == 0));
1267 0 : const double val = -2.0 * (Kokkos::sin(0.5 * L_sum * s) / (s + isOrig * 1.0))
1268 0 : * (Kokkos::sin(0.5 * L_sum * s) / (s + isOrig * 1.0));
1269 0 : const double analyticLim = -L_sum * L_sum * 0.5;
1270 :
1271 0 : view_g2n1(i, j, k) = ((!isOrig) * val) + (isOrig * analyticLim);
1272 : });
1273 :
1274 : // start a timer
1275 0 : static IpplTimings::TimerRef fft4 = IpplTimings::getTimer("FFT: Precomputation");
1276 0 : IpplTimings::startTimer(fft4);
1277 :
1278 : // inverse DCT transform of 2N+1 green's function for the precomputation
1279 0 : fft2n1_m->transform(BACKWARD, grn2n1_m);
1280 :
1281 0 : IpplTimings::stopTimer(fft4);
1282 :
1283 : // Restrict transformed grn2n1_m to 2N domain after precomputation step
1284 :
1285 : // get the field data first
1286 0 : typename Field_t::view_type view = grn_mr.getView();
1287 0 : const int nghost = grn_mr.getNghost();
1288 0 : const auto& ldom = layout2_m->getLocalNDIndex();
1289 :
1290 : // start a timer
1291 0 : static IpplTimings::TimerRef ifftshift = IpplTimings::getTimer("Vico shift loop");
1292 0 : IpplTimings::startTimer(ifftshift);
1293 :
1294 : // get number of ranks to see if need communication
1295 0 : int ranks = ippl::Comm->size();
1296 :
1297 : // range type for Kokkos loop
1298 : using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<3>>;
1299 :
1300 0 : if (ranks > 1) {
1301 0 : communicateVico(size, view_g2n1, ldom_g2n1, nghost_g2n1, view, ldom, nghost);
1302 : } else {
1303 : // restrict the green's function to a (2N)^3 grid from the (2N+1)^3 grid
1304 0 : Kokkos::parallel_for(
1305 : "Restrict domain of Green's function from 2N+1 to 2N",
1306 0 : mdrange_type({nghost, nghost, nghost}, {view.extent(0) - nghost - size[0],
1307 0 : view.extent(1) - nghost - size[1],
1308 0 : view.extent(2) - nghost - size[2]}),
1309 0 : KOKKOS_LAMBDA(const int i, const int j, const int k) {
1310 : // go from local indices to global
1311 0 : const int ig = i + ldom[0].first() - nghost;
1312 0 : const int jg = j + ldom[1].first() - nghost;
1313 0 : const int kg = k + ldom[2].first() - nghost;
1314 :
1315 0 : const int ig2 = i + ldom_g2n1[0].first() - nghost_g2n1;
1316 0 : const int jg2 = j + ldom_g2n1[1].first() - nghost_g2n1;
1317 0 : const int kg2 = k + ldom_g2n1[2].first() - nghost_g2n1;
1318 :
1319 0 : const bool isOrig = ((ig == ig2) && (jg == jg2) && (kg == kg2));
1320 0 : view(i, j, k) = isOrig * view_g2n1(i, j, k);
1321 :
1322 : // Now fill the rest of the field
1323 0 : const int s = 2 * size[0] - ig - 1 - ldom_g2n1[0].first() + nghost_g2n1;
1324 0 : const int p = 2 * size[1] - jg - 1 - ldom_g2n1[1].first() + nghost_g2n1;
1325 0 : const int q = 2 * size[2] - kg - 1 - ldom_g2n1[2].first() + nghost_g2n1;
1326 :
1327 0 : view(s, j, k) = view_g2n1(i + 1, j, k);
1328 0 : view(i, p, k) = view_g2n1(i, j + 1, k);
1329 0 : view(i, j, q) = view_g2n1(i, j, k + 1);
1330 0 : view(s, j, q) = view_g2n1(i + 1, j, k + 1);
1331 0 : view(s, p, k) = view_g2n1(i + 1, j + 1, k);
1332 0 : view(i, p, q) = view_g2n1(i, j + 1, k + 1);
1333 0 : view(s, p, q) = view_g2n1(i + 1, j + 1, k + 1);
1334 : });
1335 : }
1336 :
1337 0 : } else {
1338 : // Hockney case
1339 :
1340 : // calculate square of the mesh spacing for each dimension
1341 0 : Vector_t hrsq(hr_m * hr_m);
1342 :
1343 : // use the grnIField_m helper field to compute Green's function
1344 0 : for (unsigned int i = 0; i < Dim; ++i) {
1345 0 : grn_mr = grn_mr + grnIField_m[i] * hrsq[i];
1346 : }
1347 :
1348 0 : grn_mr = -1.0 / (4.0 * pi * sqrt(grn_mr));
1349 :
1350 0 : typename Field_t::view_type view = grn_mr.getView();
1351 0 : const int nghost = grn_mr.getNghost();
1352 0 : const auto& ldom = layout2_m->getLocalNDIndex();
1353 :
1354 : // Kokkos parallel for loop to find (0,0,0) point and regularize
1355 0 : Kokkos::parallel_for(
1356 0 : "Regularize Green's function ", grn_mr.getFieldRangePolicy(),
1357 0 : KOKKOS_LAMBDA(const int i, const int j, const int k) {
1358 : // go from local indices to global
1359 0 : const int ig = i + ldom[0].first() - nghost;
1360 0 : const int jg = j + ldom[1].first() - nghost;
1361 0 : const int kg = k + ldom[2].first() - nghost;
1362 :
1363 : // if (0,0,0), assign to it 1/(4*pi)
1364 0 : const bool isOrig = (ig == 0 && jg == 0 && kg == 0);
1365 0 : view(i, j, k) = isOrig * (-1.0 / (4.0 * pi)) + (!isOrig) * view(i, j, k);
1366 : });
1367 0 : }
1368 :
1369 : // start a timer
1370 0 : static IpplTimings::TimerRef fftg = IpplTimings::getTimer("FFT: Green");
1371 0 : IpplTimings::startTimer(fftg);
1372 :
1373 : // perform the FFT of the Green's function for the convolution
1374 0 : fft_m->transform(FORWARD, grn_mr, grntr_m);
1375 :
1376 0 : IpplTimings::stopTimer(fftg);
1377 0 : };
1378 :
1379 : template <typename FieldLHS, typename FieldRHS>
1380 0 : void FFTOpenPoissonSolver<FieldLHS, FieldRHS>::communicateVico(
1381 : Vector<int, Dim> size, typename CxField_gt::view_type view_g,
1382 : const ippl::NDIndex<Dim> ldom_g, const int nghost_g, typename Field_t::view_type view,
1383 : const ippl::NDIndex<Dim> ldom, const int nghost) {
1384 0 : const auto& lDomains2 = layout2_m->getHostLocalDomains();
1385 0 : const auto& lDomains4 = layout4_m->getHostLocalDomains();
1386 :
1387 0 : std::vector<MPI_Request> requests(0);
1388 0 : const int ranks = Comm->size();
1389 :
1390 : // 1st step: Define 8 domains corresponding to the different quadrants
1391 0 : ippl::NDIndex<Dim> none;
1392 0 : for (unsigned i = 0; i < Dim; i++) {
1393 0 : none[i] = ippl::Index(size[i]);
1394 : }
1395 :
1396 0 : ippl::NDIndex<Dim> x;
1397 0 : x[0] = ippl::Index(size[0], 2 * size[0] - 1);
1398 0 : x[1] = ippl::Index(size[1]);
1399 0 : x[2] = ippl::Index(size[2]);
1400 :
1401 0 : ippl::NDIndex<Dim> y;
1402 0 : y[0] = ippl::Index(size[0]);
1403 0 : y[1] = ippl::Index(size[1], 2 * size[1] - 1);
1404 0 : y[2] = ippl::Index(size[2]);
1405 :
1406 0 : ippl::NDIndex<Dim> z;
1407 0 : z[0] = ippl::Index(size[0]);
1408 0 : z[1] = ippl::Index(size[1]);
1409 0 : z[2] = ippl::Index(size[2], 2 * size[2] - 1);
1410 :
1411 0 : ippl::NDIndex<Dim> xy;
1412 0 : xy[0] = ippl::Index(size[0], 2 * size[0] - 1);
1413 0 : xy[1] = ippl::Index(size[1], 2 * size[1] - 1);
1414 0 : xy[2] = ippl::Index(size[2]);
1415 :
1416 0 : ippl::NDIndex<Dim> xz;
1417 0 : xz[0] = ippl::Index(size[0], 2 * size[0] - 1);
1418 0 : xz[1] = ippl::Index(size[1]);
1419 0 : xz[2] = ippl::Index(size[2], 2 * size[2] - 1);
1420 :
1421 0 : ippl::NDIndex<Dim> yz;
1422 0 : yz[0] = ippl::Index(size[0]);
1423 0 : yz[1] = ippl::Index(size[1], 2 * size[1] - 1);
1424 0 : yz[2] = ippl::Index(size[2], 2 * size[2] - 1);
1425 :
1426 0 : ippl::NDIndex<Dim> xyz;
1427 0 : for (unsigned i = 0; i < Dim; i++) {
1428 0 : xyz[i] = ippl::Index(size[i], 2 * size[i] - 1);
1429 : }
1430 :
1431 : // 2nd step: send
1432 0 : for (int i = 0; i < ranks; ++i) {
1433 0 : auto domain2 = lDomains2[i];
1434 :
1435 0 : if (domain2.touches(none)) {
1436 0 : auto intersection = domain2.intersect(none);
1437 :
1438 0 : if (ldom_g.touches(intersection)) {
1439 0 : intersection = intersection.intersect(ldom_g);
1440 :
1441 0 : solver_send(mpi::tag::VICO_SOLVER, 0, i, intersection, ldom_g, nghost_g, view_g,
1442 0 : fd_m, requests);
1443 : }
1444 : }
1445 :
1446 0 : if (domain2.touches(x)) {
1447 0 : auto intersection = domain2.intersect(x);
1448 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1449 0 : (2 * size[0] - intersection[0].last()), -1);
1450 :
1451 0 : ippl::NDIndex<Dim> domain4;
1452 0 : domain4[0] = xdom;
1453 0 : domain4[1] = intersection[1];
1454 0 : domain4[2] = intersection[2];
1455 :
1456 0 : if (ldom_g.touches(domain4)) {
1457 0 : intersection = ldom_g.intersect(domain4);
1458 :
1459 0 : solver_send(mpi::tag::VICO_SOLVER, 1, i, intersection, ldom_g, nghost_g, view_g,
1460 0 : fd_m, requests);
1461 : }
1462 : }
1463 :
1464 0 : if (domain2.touches(y)) {
1465 0 : auto intersection = domain2.intersect(y);
1466 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1467 0 : (2 * size[1] - intersection[1].last()), -1);
1468 :
1469 0 : ippl::NDIndex<Dim> domain4;
1470 0 : domain4[0] = intersection[0];
1471 0 : domain4[1] = ydom;
1472 0 : domain4[2] = intersection[2];
1473 :
1474 0 : if (ldom_g.touches(domain4)) {
1475 0 : intersection = ldom_g.intersect(domain4);
1476 :
1477 0 : solver_send(mpi::tag::VICO_SOLVER, 2, i, intersection, ldom_g, nghost_g, view_g,
1478 0 : fd_m, requests);
1479 : }
1480 : }
1481 :
1482 0 : if (domain2.touches(z)) {
1483 0 : auto intersection = domain2.intersect(z);
1484 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1485 0 : (2 * size[2] - intersection[2].last()), -1);
1486 :
1487 0 : ippl::NDIndex<Dim> domain4;
1488 0 : domain4[0] = intersection[0];
1489 0 : domain4[1] = intersection[1];
1490 0 : domain4[2] = zdom;
1491 :
1492 0 : if (ldom_g.touches(domain4)) {
1493 0 : intersection = ldom_g.intersect(domain4);
1494 :
1495 0 : solver_send(mpi::tag::VICO_SOLVER, 3, i, intersection, ldom_g, nghost_g, view_g,
1496 0 : fd_m, requests);
1497 : }
1498 : }
1499 :
1500 0 : if (domain2.touches(xy)) {
1501 0 : auto intersection = domain2.intersect(xy);
1502 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1503 0 : (2 * size[0] - intersection[0].last()), -1);
1504 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1505 0 : (2 * size[1] - intersection[1].last()), -1);
1506 :
1507 0 : ippl::NDIndex<Dim> domain4;
1508 0 : domain4[0] = xdom;
1509 0 : domain4[1] = ydom;
1510 0 : domain4[2] = intersection[2];
1511 :
1512 0 : if (ldom_g.touches(domain4)) {
1513 0 : intersection = ldom_g.intersect(domain4);
1514 :
1515 0 : solver_send(mpi::tag::VICO_SOLVER, 4, i, intersection, ldom_g, nghost_g, view_g,
1516 0 : fd_m, requests);
1517 : }
1518 : }
1519 :
1520 0 : if (domain2.touches(yz)) {
1521 0 : auto intersection = domain2.intersect(yz);
1522 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1523 0 : (2 * size[1] - intersection[1].last()), -1);
1524 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1525 0 : (2 * size[2] - intersection[2].last()), -1);
1526 :
1527 0 : ippl::NDIndex<Dim> domain4;
1528 0 : domain4[0] = intersection[0];
1529 0 : domain4[1] = ydom;
1530 0 : domain4[2] = zdom;
1531 :
1532 0 : if (ldom_g.touches(domain4)) {
1533 0 : intersection = ldom_g.intersect(domain4);
1534 0 : solver_send(mpi::tag::VICO_SOLVER, 5, i, intersection, ldom_g, nghost_g, view_g,
1535 0 : fd_m, requests);
1536 : }
1537 : }
1538 :
1539 0 : if (domain2.touches(xz)) {
1540 0 : auto intersection = domain2.intersect(xz);
1541 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1542 0 : (2 * size[0] - intersection[0].last()), -1);
1543 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1544 0 : (2 * size[2] - intersection[2].last()), -1);
1545 :
1546 0 : ippl::NDIndex<Dim> domain4;
1547 0 : domain4[0] = xdom;
1548 0 : domain4[1] = intersection[1];
1549 0 : domain4[2] = zdom;
1550 :
1551 0 : if (ldom_g.touches(domain4)) {
1552 0 : intersection = ldom_g.intersect(domain4);
1553 0 : solver_send(mpi::tag::VICO_SOLVER, 6, i, intersection, ldom_g, nghost_g, view_g,
1554 0 : fd_m, requests);
1555 : }
1556 : }
1557 :
1558 0 : if (domain2.touches(xyz)) {
1559 0 : auto intersection = domain2.intersect(xyz);
1560 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1561 0 : (2 * size[0] - intersection[0].last()), -1);
1562 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1563 0 : (2 * size[1] - intersection[1].last()), -1);
1564 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1565 0 : (2 * size[2] - intersection[2].last()), -1);
1566 :
1567 0 : ippl::NDIndex<Dim> domain4;
1568 0 : domain4[0] = xdom;
1569 0 : domain4[1] = ydom;
1570 0 : domain4[2] = zdom;
1571 :
1572 0 : if (ldom_g.touches(domain4)) {
1573 0 : intersection = ldom_g.intersect(domain4);
1574 0 : solver_send(mpi::tag::VICO_SOLVER, 7, i, intersection, ldom_g, nghost_g, view_g,
1575 0 : fd_m, requests);
1576 : }
1577 : }
1578 : }
1579 :
1580 : // 3rd step: receive
1581 0 : for (int i = 0; i < ranks; ++i) {
1582 0 : if (ldom.touches(none)) {
1583 0 : auto intersection = ldom.intersect(none);
1584 :
1585 0 : if (lDomains4[i].touches(intersection)) {
1586 0 : intersection = intersection.intersect(lDomains4[i]);
1587 :
1588 0 : solver_recv(mpi::tag::VICO_SOLVER, 0, i, intersection, ldom, nghost, view,
1589 0 : fd_m);
1590 : }
1591 : }
1592 :
1593 0 : if (ldom.touches(x)) {
1594 0 : auto intersection = ldom.intersect(x);
1595 :
1596 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1597 0 : (2 * size[0] - intersection[0].last()), -1);
1598 :
1599 0 : ippl::NDIndex<Dim> domain4;
1600 0 : domain4[0] = xdom;
1601 0 : domain4[1] = intersection[1];
1602 0 : domain4[2] = intersection[2];
1603 :
1604 0 : if (lDomains4[i].touches(domain4)) {
1605 0 : domain4 = lDomains4[i].intersect(domain4);
1606 0 : domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(),
1607 0 : 2 * size[0] - domain4[0].last(), -1);
1608 :
1609 0 : intersection = intersection.intersect(domain4);
1610 :
1611 0 : solver_recv(mpi::tag::VICO_SOLVER, 1, i, intersection, ldom, nghost, view, fd_m,
1612 : true, false, false);
1613 : }
1614 : }
1615 :
1616 0 : if (ldom.touches(y)) {
1617 0 : auto intersection = ldom.intersect(y);
1618 :
1619 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1620 0 : (2 * size[1] - intersection[1].last()), -1);
1621 :
1622 0 : ippl::NDIndex<Dim> domain4;
1623 0 : domain4[0] = intersection[0];
1624 0 : domain4[1] = ydom;
1625 0 : domain4[2] = intersection[2];
1626 :
1627 0 : if (lDomains4[i].touches(domain4)) {
1628 0 : domain4 = lDomains4[i].intersect(domain4);
1629 0 : domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(),
1630 0 : 2 * size[1] - domain4[1].last(), -1);
1631 :
1632 0 : intersection = intersection.intersect(domain4);
1633 :
1634 0 : solver_recv(mpi::tag::VICO_SOLVER, 2, i, intersection, ldom, nghost, view, fd_m,
1635 : false, true, false);
1636 : }
1637 : }
1638 :
1639 0 : if (ldom.touches(z)) {
1640 0 : auto intersection = ldom.intersect(z);
1641 :
1642 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1643 0 : (2 * size[2] - intersection[2].last()), -1);
1644 :
1645 0 : ippl::NDIndex<Dim> domain4;
1646 0 : domain4[0] = intersection[0];
1647 0 : domain4[1] = intersection[1];
1648 0 : domain4[2] = zdom;
1649 :
1650 0 : if (lDomains4[i].touches(domain4)) {
1651 0 : domain4 = lDomains4[i].intersect(domain4);
1652 0 : domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(),
1653 0 : 2 * size[2] - domain4[2].last(), -1);
1654 :
1655 0 : intersection = intersection.intersect(domain4);
1656 :
1657 0 : solver_recv(mpi::tag::VICO_SOLVER, 3, i, intersection, ldom, nghost, view, fd_m,
1658 : false, false, true);
1659 : }
1660 : }
1661 :
1662 0 : if (ldom.touches(xy)) {
1663 0 : auto intersection = ldom.intersect(xy);
1664 :
1665 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1666 0 : (2 * size[0] - intersection[0].last()), -1);
1667 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1668 0 : (2 * size[1] - intersection[1].last()), -1);
1669 :
1670 0 : ippl::NDIndex<Dim> domain4;
1671 0 : domain4[0] = xdom;
1672 0 : domain4[1] = ydom;
1673 0 : domain4[2] = intersection[2];
1674 :
1675 0 : if (lDomains4[i].touches(domain4)) {
1676 0 : domain4 = lDomains4[i].intersect(domain4);
1677 0 : domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(),
1678 0 : 2 * size[0] - domain4[0].last(), -1);
1679 0 : domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(),
1680 0 : 2 * size[1] - domain4[1].last(), -1);
1681 :
1682 0 : intersection = intersection.intersect(domain4);
1683 :
1684 0 : solver_recv(mpi::tag::VICO_SOLVER, 4, i, intersection, ldom, nghost, view, fd_m,
1685 : true, true, false);
1686 : }
1687 : }
1688 :
1689 0 : if (ldom.touches(yz)) {
1690 0 : auto intersection = ldom.intersect(yz);
1691 :
1692 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1693 0 : (2 * size[1] - intersection[1].last()), -1);
1694 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1695 0 : (2 * size[2] - intersection[2].last()), -1);
1696 :
1697 0 : ippl::NDIndex<Dim> domain4;
1698 0 : domain4[0] = intersection[0];
1699 0 : domain4[1] = ydom;
1700 0 : domain4[2] = zdom;
1701 :
1702 0 : if (lDomains4[i].touches(domain4)) {
1703 0 : domain4 = lDomains4[i].intersect(domain4);
1704 0 : domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(),
1705 0 : 2 * size[1] - domain4[1].last(), -1);
1706 0 : domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(),
1707 0 : 2 * size[2] - domain4[2].last(), -1);
1708 :
1709 0 : intersection = intersection.intersect(domain4);
1710 :
1711 0 : solver_recv(mpi::tag::VICO_SOLVER, 5, i, intersection, ldom, nghost, view, fd_m,
1712 : false, true, true);
1713 : }
1714 : }
1715 :
1716 0 : if (ldom.touches(xz)) {
1717 0 : auto intersection = ldom.intersect(xz);
1718 :
1719 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1720 0 : (2 * size[0] - intersection[0].last()), -1);
1721 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1722 0 : (2 * size[2] - intersection[2].last()), -1);
1723 :
1724 0 : ippl::NDIndex<Dim> domain4;
1725 0 : domain4[0] = xdom;
1726 0 : domain4[1] = intersection[1];
1727 0 : domain4[2] = zdom;
1728 :
1729 0 : if (lDomains4[i].touches(domain4)) {
1730 0 : domain4 = lDomains4[i].intersect(domain4);
1731 0 : domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(),
1732 0 : 2 * size[0] - domain4[0].last(), -1);
1733 0 : domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(),
1734 0 : 2 * size[2] - domain4[2].last(), -1);
1735 :
1736 0 : intersection = intersection.intersect(domain4);
1737 :
1738 0 : solver_recv(mpi::tag::VICO_SOLVER, 6, i, intersection, ldom, nghost, view, fd_m,
1739 : true, false, true);
1740 : }
1741 : }
1742 :
1743 0 : if (ldom.touches(xyz)) {
1744 0 : auto intersection = ldom.intersect(xyz);
1745 :
1746 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1747 0 : (2 * size[0] - intersection[0].last()), -1);
1748 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1749 0 : (2 * size[1] - intersection[1].last()), -1);
1750 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1751 0 : (2 * size[2] - intersection[2].last()), -1);
1752 :
1753 0 : ippl::NDIndex<Dim> domain4;
1754 0 : domain4[0] = xdom;
1755 0 : domain4[1] = ydom;
1756 0 : domain4[2] = zdom;
1757 :
1758 0 : if (lDomains4[i].touches(domain4)) {
1759 0 : domain4 = lDomains4[i].intersect(domain4);
1760 0 : domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(),
1761 0 : 2 * size[0] - domain4[0].last(), -1);
1762 0 : domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(),
1763 0 : 2 * size[1] - domain4[1].last(), -1);
1764 0 : domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(),
1765 0 : 2 * size[2] - domain4[2].last(), -1);
1766 :
1767 0 : intersection = intersection.intersect(domain4);
1768 :
1769 0 : solver_recv(mpi::tag::VICO_SOLVER, 7, i, intersection, ldom, nghost, view, fd_m,
1770 : true, true, true);
1771 : }
1772 : }
1773 : }
1774 :
1775 0 : if (requests.size() > 0) {
1776 0 : MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
1777 : }
1778 0 : ippl::Comm->freeAllBuffers();
1779 0 : };
1780 :
1781 : // CommunicateVico for DCT_VICO (2N+1 to 2N)
1782 : template <typename FieldLHS, typename FieldRHS>
1783 0 : void FFTOpenPoissonSolver<FieldLHS, FieldRHS>::communicateVico(
1784 : Vector<int, Dim> size, typename Field_t::view_type view_g, const ippl::NDIndex<Dim> ldom_g,
1785 : const int nghost_g, typename Field_t::view_type view, const ippl::NDIndex<Dim> ldom,
1786 : const int nghost) {
1787 0 : const auto& lDomains2 = layout2_m->getHostLocalDomains();
1788 0 : const auto& lDomains2n1 = layout2n1_m->getHostLocalDomains();
1789 :
1790 0 : std::vector<MPI_Request> requests(0);
1791 0 : const int ranks = ippl::Comm->size();
1792 :
1793 : // 1st step: Define 8 domains corresponding to the different quadrants
1794 0 : ippl::NDIndex<Dim> none;
1795 0 : for (unsigned i = 0; i < Dim; i++) {
1796 0 : none[i] = ippl::Index(size[i]);
1797 : }
1798 :
1799 0 : ippl::NDIndex<Dim> x;
1800 0 : x[0] = ippl::Index(size[0], 2 * size[0] - 1);
1801 0 : x[1] = ippl::Index(size[1]);
1802 0 : x[2] = ippl::Index(size[2]);
1803 :
1804 0 : ippl::NDIndex<Dim> y;
1805 0 : y[0] = ippl::Index(size[0]);
1806 0 : y[1] = ippl::Index(size[1], 2 * size[1] - 1);
1807 0 : y[2] = ippl::Index(size[2]);
1808 :
1809 0 : ippl::NDIndex<Dim> z;
1810 0 : z[0] = ippl::Index(size[0]);
1811 0 : z[1] = ippl::Index(size[1]);
1812 0 : z[2] = ippl::Index(size[2], 2 * size[2] - 1);
1813 :
1814 0 : ippl::NDIndex<Dim> xy;
1815 0 : xy[0] = ippl::Index(size[0], 2 * size[0] - 1);
1816 0 : xy[1] = ippl::Index(size[1], 2 * size[1] - 1);
1817 0 : xy[2] = ippl::Index(size[2]);
1818 :
1819 0 : ippl::NDIndex<Dim> xz;
1820 0 : xz[0] = ippl::Index(size[0], 2 * size[0] - 1);
1821 0 : xz[1] = ippl::Index(size[1]);
1822 0 : xz[2] = ippl::Index(size[2], 2 * size[2] - 1);
1823 :
1824 0 : ippl::NDIndex<Dim> yz;
1825 0 : yz[0] = ippl::Index(size[0]);
1826 0 : yz[1] = ippl::Index(size[1], 2 * size[1] - 1);
1827 0 : yz[2] = ippl::Index(size[2], 2 * size[2] - 1);
1828 :
1829 0 : ippl::NDIndex<Dim> xyz;
1830 0 : for (unsigned i = 0; i < Dim; i++) {
1831 0 : xyz[i] = ippl::Index(size[i], 2 * size[i] - 1);
1832 : }
1833 :
1834 : // 2nd step: send
1835 0 : for (int i = 0; i < ranks; ++i) {
1836 0 : auto domain2 = lDomains2[i];
1837 :
1838 0 : if (domain2.touches(none)) {
1839 0 : auto intersection = domain2.intersect(none);
1840 :
1841 0 : if (ldom_g.touches(intersection)) {
1842 0 : intersection = intersection.intersect(ldom_g);
1843 :
1844 0 : solver_send(mpi::tag::VICO_SOLVER, 0, i, intersection, ldom_g, nghost_g, view_g,
1845 0 : fd_m, requests);
1846 : }
1847 : }
1848 :
1849 0 : if (domain2.touches(x)) {
1850 0 : auto intersection = domain2.intersect(x);
1851 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1852 0 : (2 * size[0] - intersection[0].last()), -1);
1853 :
1854 0 : ippl::NDIndex<Dim> domain2n1;
1855 0 : domain2n1[0] = xdom;
1856 0 : domain2n1[1] = intersection[1];
1857 0 : domain2n1[2] = intersection[2];
1858 :
1859 0 : if (ldom_g.touches(domain2n1)) {
1860 0 : intersection = ldom_g.intersect(domain2n1);
1861 :
1862 0 : solver_send(mpi::tag::VICO_SOLVER, 1, i, intersection, ldom_g, nghost_g, view_g,
1863 0 : fd_m, requests);
1864 : }
1865 : }
1866 :
1867 0 : if (domain2.touches(y)) {
1868 0 : auto intersection = domain2.intersect(y);
1869 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1870 0 : (2 * size[1] - intersection[1].last()), -1);
1871 :
1872 0 : ippl::NDIndex<Dim> domain2n1;
1873 0 : domain2n1[0] = intersection[0];
1874 0 : domain2n1[1] = ydom;
1875 0 : domain2n1[2] = intersection[2];
1876 :
1877 0 : if (ldom_g.touches(domain2n1)) {
1878 0 : intersection = ldom_g.intersect(domain2n1);
1879 :
1880 0 : solver_send(mpi::tag::VICO_SOLVER, 2, i, intersection, ldom_g, nghost_g, view_g,
1881 0 : fd_m, requests);
1882 : }
1883 : }
1884 :
1885 0 : if (domain2.touches(z)) {
1886 0 : auto intersection = domain2.intersect(z);
1887 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1888 0 : (2 * size[2] - intersection[2].last()), -1);
1889 :
1890 0 : ippl::NDIndex<Dim> domain2n1;
1891 0 : domain2n1[0] = intersection[0];
1892 0 : domain2n1[1] = intersection[1];
1893 0 : domain2n1[2] = zdom;
1894 :
1895 0 : if (ldom_g.touches(domain2n1)) {
1896 0 : intersection = ldom_g.intersect(domain2n1);
1897 :
1898 0 : solver_send(mpi::tag::VICO_SOLVER, 3, i, intersection, ldom_g, nghost_g, view_g,
1899 0 : fd_m, requests);
1900 : }
1901 : }
1902 :
1903 0 : if (domain2.touches(xy)) {
1904 0 : auto intersection = domain2.intersect(xy);
1905 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1906 0 : (2 * size[0] - intersection[0].last()), -1);
1907 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1908 0 : (2 * size[1] - intersection[1].last()), -1);
1909 :
1910 0 : ippl::NDIndex<Dim> domain2n1;
1911 0 : domain2n1[0] = xdom;
1912 0 : domain2n1[1] = ydom;
1913 0 : domain2n1[2] = intersection[2];
1914 :
1915 0 : if (ldom_g.touches(domain2n1)) {
1916 0 : intersection = ldom_g.intersect(domain2n1);
1917 :
1918 0 : solver_send(mpi::tag::VICO_SOLVER, 4, i, intersection, ldom_g, nghost_g, view_g,
1919 0 : fd_m, requests);
1920 : }
1921 : }
1922 :
1923 0 : if (domain2.touches(yz)) {
1924 0 : auto intersection = domain2.intersect(yz);
1925 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1926 0 : (2 * size[1] - intersection[1].last()), -1);
1927 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1928 0 : (2 * size[2] - intersection[2].last()), -1);
1929 :
1930 0 : ippl::NDIndex<Dim> domain2n1;
1931 0 : domain2n1[0] = intersection[0];
1932 0 : domain2n1[1] = ydom;
1933 0 : domain2n1[2] = zdom;
1934 :
1935 0 : if (ldom_g.touches(domain2n1)) {
1936 0 : intersection = ldom_g.intersect(domain2n1);
1937 :
1938 0 : solver_send(mpi::tag::VICO_SOLVER, 5, i, intersection, ldom_g, nghost_g, view_g,
1939 0 : fd_m, requests);
1940 : }
1941 : }
1942 :
1943 0 : if (domain2.touches(xz)) {
1944 0 : auto intersection = domain2.intersect(xz);
1945 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1946 0 : (2 * size[0] - intersection[0].last()), -1);
1947 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1948 0 : (2 * size[2] - intersection[2].last()), -1);
1949 :
1950 0 : ippl::NDIndex<Dim> domain2n1;
1951 0 : domain2n1[0] = xdom;
1952 0 : domain2n1[1] = intersection[1];
1953 0 : domain2n1[2] = zdom;
1954 :
1955 0 : if (ldom_g.touches(domain2n1)) {
1956 0 : intersection = ldom_g.intersect(domain2n1);
1957 :
1958 0 : solver_send(mpi::tag::VICO_SOLVER, 6, i, intersection, ldom_g, nghost_g, view_g,
1959 0 : fd_m, requests);
1960 : }
1961 : }
1962 :
1963 0 : if (domain2.touches(xyz)) {
1964 0 : auto intersection = domain2.intersect(xyz);
1965 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
1966 0 : (2 * size[0] - intersection[0].last()), -1);
1967 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
1968 0 : (2 * size[1] - intersection[1].last()), -1);
1969 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
1970 0 : (2 * size[2] - intersection[2].last()), -1);
1971 :
1972 0 : ippl::NDIndex<Dim> domain2n1;
1973 0 : domain2n1[0] = xdom;
1974 0 : domain2n1[1] = ydom;
1975 0 : domain2n1[2] = zdom;
1976 :
1977 0 : if (ldom_g.touches(domain2n1)) {
1978 0 : intersection = ldom_g.intersect(domain2n1);
1979 :
1980 0 : solver_send(mpi::tag::VICO_SOLVER, 7, i, intersection, ldom_g, nghost_g, view_g,
1981 0 : fd_m, requests);
1982 : }
1983 : }
1984 : }
1985 :
1986 : // 3rd step: receive
1987 0 : for (int i = 0; i < ranks; ++i) {
1988 0 : if (ldom.touches(none)) {
1989 0 : auto intersection = ldom.intersect(none);
1990 :
1991 0 : if (lDomains2n1[i].touches(intersection)) {
1992 0 : intersection = intersection.intersect(lDomains2n1[i]);
1993 :
1994 0 : solver_recv(mpi::tag::VICO_SOLVER, 0, i, intersection, ldom, nghost, view,
1995 0 : fd_m);
1996 : }
1997 : }
1998 :
1999 0 : if (ldom.touches(x)) {
2000 0 : auto intersection = ldom.intersect(x);
2001 :
2002 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
2003 0 : (2 * size[0] - intersection[0].last()), -1);
2004 :
2005 0 : ippl::NDIndex<Dim> domain2n1;
2006 0 : domain2n1[0] = xdom;
2007 0 : domain2n1[1] = intersection[1];
2008 0 : domain2n1[2] = intersection[2];
2009 :
2010 0 : if (lDomains2n1[i].touches(domain2n1)) {
2011 0 : domain2n1 = lDomains2n1[i].intersect(domain2n1);
2012 0 : domain2n1[0] = ippl::Index(2 * size[0] - domain2n1[0].first(),
2013 0 : 2 * size[0] - domain2n1[0].last(), -1);
2014 :
2015 0 : intersection = intersection.intersect(domain2n1);
2016 :
2017 0 : solver_recv(mpi::tag::VICO_SOLVER, 1, i, intersection, ldom, nghost, view, fd_m,
2018 : true, false, false);
2019 : }
2020 : }
2021 :
2022 0 : if (ldom.touches(y)) {
2023 0 : auto intersection = ldom.intersect(y);
2024 :
2025 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
2026 0 : (2 * size[1] - intersection[1].last()), -1);
2027 :
2028 0 : ippl::NDIndex<Dim> domain2n1;
2029 0 : domain2n1[0] = intersection[0];
2030 0 : domain2n1[1] = ydom;
2031 0 : domain2n1[2] = intersection[2];
2032 :
2033 0 : if (lDomains2n1[i].touches(domain2n1)) {
2034 0 : domain2n1 = lDomains2n1[i].intersect(domain2n1);
2035 0 : domain2n1[1] = ippl::Index(2 * size[1] - domain2n1[1].first(),
2036 0 : 2 * size[1] - domain2n1[1].last(), -1);
2037 :
2038 0 : intersection = intersection.intersect(domain2n1);
2039 :
2040 0 : solver_recv(mpi::tag::VICO_SOLVER, 2, i, intersection, ldom, nghost, view, fd_m,
2041 : false, true, false);
2042 : }
2043 : }
2044 :
2045 0 : if (ldom.touches(z)) {
2046 0 : auto intersection = ldom.intersect(z);
2047 :
2048 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
2049 0 : (2 * size[2] - intersection[2].last()), -1);
2050 :
2051 0 : ippl::NDIndex<Dim> domain2n1;
2052 0 : domain2n1[0] = intersection[0];
2053 0 : domain2n1[1] = intersection[1];
2054 0 : domain2n1[2] = zdom;
2055 :
2056 0 : if (lDomains2n1[i].touches(domain2n1)) {
2057 0 : domain2n1 = lDomains2n1[i].intersect(domain2n1);
2058 0 : domain2n1[2] = ippl::Index(2 * size[2] - domain2n1[2].first(),
2059 0 : 2 * size[2] - domain2n1[2].last(), -1);
2060 :
2061 0 : intersection = intersection.intersect(domain2n1);
2062 :
2063 0 : solver_recv(mpi::tag::VICO_SOLVER, 3, i, intersection, ldom, nghost, view, fd_m,
2064 : false, false, true);
2065 : }
2066 : }
2067 :
2068 0 : if (ldom.touches(xy)) {
2069 0 : auto intersection = ldom.intersect(xy);
2070 :
2071 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
2072 0 : (2 * size[0] - intersection[0].last()), -1);
2073 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
2074 0 : (2 * size[1] - intersection[1].last()), -1);
2075 :
2076 0 : ippl::NDIndex<Dim> domain2n1;
2077 0 : domain2n1[0] = xdom;
2078 0 : domain2n1[1] = ydom;
2079 0 : domain2n1[2] = intersection[2];
2080 :
2081 0 : if (lDomains2n1[i].touches(domain2n1)) {
2082 0 : domain2n1 = lDomains2n1[i].intersect(domain2n1);
2083 0 : domain2n1[0] = ippl::Index(2 * size[0] - domain2n1[0].first(),
2084 0 : 2 * size[0] - domain2n1[0].last(), -1);
2085 0 : domain2n1[1] = ippl::Index(2 * size[1] - domain2n1[1].first(),
2086 0 : 2 * size[1] - domain2n1[1].last(), -1);
2087 :
2088 0 : intersection = intersection.intersect(domain2n1);
2089 :
2090 0 : solver_recv(mpi::tag::VICO_SOLVER, 4, i, intersection, ldom, nghost, view, fd_m,
2091 : true, true, false);
2092 : }
2093 : }
2094 :
2095 0 : if (ldom.touches(yz)) {
2096 0 : auto intersection = ldom.intersect(yz);
2097 :
2098 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
2099 0 : (2 * size[1] - intersection[1].last()), -1);
2100 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
2101 0 : (2 * size[2] - intersection[2].last()), -1);
2102 :
2103 0 : ippl::NDIndex<Dim> domain2n1;
2104 0 : domain2n1[0] = intersection[0];
2105 0 : domain2n1[1] = ydom;
2106 0 : domain2n1[2] = zdom;
2107 :
2108 0 : if (lDomains2n1[i].touches(domain2n1)) {
2109 0 : domain2n1 = lDomains2n1[i].intersect(domain2n1);
2110 0 : domain2n1[1] = ippl::Index(2 * size[1] - domain2n1[1].first(),
2111 0 : 2 * size[1] - domain2n1[1].last(), -1);
2112 0 : domain2n1[2] = ippl::Index(2 * size[2] - domain2n1[2].first(),
2113 0 : 2 * size[2] - domain2n1[2].last(), -1);
2114 :
2115 0 : intersection = intersection.intersect(domain2n1);
2116 :
2117 0 : solver_recv(mpi::tag::VICO_SOLVER, 5, i, intersection, ldom, nghost, view, fd_m,
2118 : false, true, true);
2119 : }
2120 : }
2121 :
2122 0 : if (ldom.touches(xz)) {
2123 0 : auto intersection = ldom.intersect(xz);
2124 :
2125 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
2126 0 : (2 * size[0] - intersection[0].last()), -1);
2127 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
2128 0 : (2 * size[2] - intersection[2].last()), -1);
2129 :
2130 0 : ippl::NDIndex<Dim> domain2n1;
2131 0 : domain2n1[0] = xdom;
2132 0 : domain2n1[1] = intersection[1];
2133 0 : domain2n1[2] = zdom;
2134 :
2135 0 : if (lDomains2n1[i].touches(domain2n1)) {
2136 0 : domain2n1 = lDomains2n1[i].intersect(domain2n1);
2137 0 : domain2n1[0] = ippl::Index(2 * size[0] - domain2n1[0].first(),
2138 0 : 2 * size[0] - domain2n1[0].last(), -1);
2139 0 : domain2n1[2] = ippl::Index(2 * size[2] - domain2n1[2].first(),
2140 0 : 2 * size[2] - domain2n1[2].last(), -1);
2141 :
2142 0 : intersection = intersection.intersect(domain2n1);
2143 :
2144 0 : solver_recv(mpi::tag::VICO_SOLVER, 6, i, intersection, ldom, nghost, view, fd_m,
2145 : true, false, true);
2146 : }
2147 : }
2148 :
2149 0 : if (ldom.touches(xyz)) {
2150 0 : auto intersection = ldom.intersect(xyz);
2151 :
2152 0 : auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
2153 0 : (2 * size[0] - intersection[0].last()), -1);
2154 0 : auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
2155 0 : (2 * size[1] - intersection[1].last()), -1);
2156 0 : auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
2157 0 : (2 * size[2] - intersection[2].last()), -1);
2158 :
2159 0 : ippl::NDIndex<Dim> domain2n1;
2160 0 : domain2n1[0] = xdom;
2161 0 : domain2n1[1] = ydom;
2162 0 : domain2n1[2] = zdom;
2163 :
2164 0 : if (lDomains2n1[i].touches(domain2n1)) {
2165 0 : domain2n1 = lDomains2n1[i].intersect(domain2n1);
2166 0 : domain2n1[0] = ippl::Index(2 * size[0] - domain2n1[0].first(),
2167 0 : 2 * size[0] - domain2n1[0].last(), -1);
2168 0 : domain2n1[1] = ippl::Index(2 * size[1] - domain2n1[1].first(),
2169 0 : 2 * size[1] - domain2n1[1].last(), -1);
2170 0 : domain2n1[2] = ippl::Index(2 * size[2] - domain2n1[2].first(),
2171 0 : 2 * size[2] - domain2n1[2].last(), -1);
2172 :
2173 0 : intersection = intersection.intersect(domain2n1);
2174 :
2175 0 : solver_recv(mpi::tag::VICO_SOLVER, 7, i, intersection, ldom, nghost, view, fd_m,
2176 : true, true, true);
2177 : }
2178 : }
2179 : }
2180 :
2181 0 : if (requests.size() > 0) {
2182 0 : MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
2183 : }
2184 0 : ippl::Comm->freeAllBuffers();
2185 0 : };
2186 : } // namespace ippl
|