LCOV - code coverage report
Current view: top level - src/PoissonSolvers - FFTOpenPoissonSolver.hpp (source / functions) Coverage Total Hit
Test: final_report.info Lines: 0.0 % 1245 0
Test Date: 2025-07-17 17:44:22 Functions: 0.0 % 77 0

            Line data    Source code
       1              : //
       2              : // Class FFTOpenPoissonSolver
       3              : //   FFT-based Poisson Solver for open boundaries.
       4              : //   Solves laplace(phi) = -rho, and E = -grad(phi).
       5              : //
       6              : //
       7              : 
       8              : // Communication specific functions (pack and unpack).
       9              : template <typename Tb, typename Tf>
      10            0 : void pack(const ippl::NDIndex<3> intersect, Kokkos::View<Tf***>& view,
      11              :           ippl::detail::FieldBufferData<Tb>& fd, int nghost, const ippl::NDIndex<3> ldom,
      12              :           ippl::mpi::Communicator::size_type& nsends) {
      13            0 :     Kokkos::View<Tb*>& buffer = fd.buffer;
      14              : 
      15            0 :     size_t size = intersect.size();
      16            0 :     nsends      = size;
      17            0 :     if (buffer.size() < size) {
      18            0 :         const int overalloc = ippl::Comm->getDefaultOverallocation();
      19            0 :         Kokkos::realloc(buffer, size * overalloc);
      20              :     }
      21              : 
      22            0 :     const int first0 = intersect[0].first() + nghost - ldom[0].first();
      23            0 :     const int first1 = intersect[1].first() + nghost - ldom[1].first();
      24            0 :     const int first2 = intersect[2].first() + nghost - ldom[2].first();
      25              : 
      26            0 :     const int last0 = intersect[0].last() + nghost - ldom[0].first() + 1;
      27            0 :     const int last1 = intersect[1].last() + nghost - ldom[1].first() + 1;
      28            0 :     const int last2 = intersect[2].last() + nghost - ldom[2].first() + 1;
      29              : 
      30              :     using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<3>>;
      31              :     // This type casting to long int is necessary as otherwise Kokkos complains for
      32              :     // intel compilers
      33            0 :     Kokkos::parallel_for(
      34              :         "pack()",
      35            0 :         mdrange_type({first0, first1, first2}, {(long int)last0, (long int)last1, (long int)last2}),
      36            0 :         KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) {
      37            0 :             const int ig = i - first0;
      38            0 :             const int jg = j - first1;
      39            0 :             const int kg = k - first2;
      40              : 
      41            0 :             int l = ig + jg * intersect[0].length()
      42            0 :                     + kg * intersect[1].length() * intersect[0].length();
      43              : 
      44            0 :             Kokkos::complex<Tb> val = view(i, j, k);
      45              : 
      46            0 :             buffer(l) = Kokkos::real(val);
      47              :         });
      48            0 :     Kokkos::fence();
      49            0 : }
      50              : 
      51              : template <int tensorRank, typename Tb, typename Tf>
      52            0 : void unpack_impl(const ippl::NDIndex<3> intersect, const Kokkos::View<Tf***>& view,
      53              :                  ippl::detail::FieldBufferData<Tb>& fd, int nghost, const ippl::NDIndex<3> ldom,
      54              :                  size_t dim1 = 0, size_t dim2 = 0, bool x = false, bool y = false, bool z = false) {
      55            0 :     Kokkos::View<Tb*>& buffer = fd.buffer;
      56              : 
      57            0 :     const int first0 = intersect[0].first() + nghost - ldom[0].first();
      58            0 :     const int first1 = intersect[1].first() + nghost - ldom[1].first();
      59            0 :     const int first2 = intersect[2].first() + nghost - ldom[2].first();
      60              : 
      61            0 :     const int last0 = intersect[0].last() + nghost - ldom[0].first() + 1;
      62            0 :     const int last1 = intersect[1].last() + nghost - ldom[1].first() + 1;
      63            0 :     const int last2 = intersect[2].last() + nghost - ldom[2].first() + 1;
      64              : 
      65              :     using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<3>>;
      66            0 :     Kokkos::parallel_for(
      67            0 :         "pack()", mdrange_type({first0, first1, first2}, {last0, last1, last2}),
      68            0 :         KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) {
      69            0 :             int ig = i - first0;
      70            0 :             int jg = j - first1;
      71            0 :             int kg = k - first2;
      72              : 
      73            0 :             ig = x * (intersect[0].length() - 2 * ig - 1) + ig;
      74            0 :             jg = y * (intersect[1].length() - 2 * jg - 1) + jg;
      75            0 :             kg = z * (intersect[2].length() - 2 * kg - 1) + kg;
      76              : 
      77            0 :             int l = ig + jg * intersect[0].length()
      78            0 :                     + kg * intersect[1].length() * intersect[0].length();
      79              : 
      80            0 :             ippl::detail::ViewAccess<tensorRank, decltype(view)>::get(view, dim1, dim2, i, j, k) =
      81            0 :                 buffer(l);
      82              :         });
      83            0 :     Kokkos::fence();
      84            0 : }
      85              : 
      86              : template <typename Tb, typename Tf>
      87            0 : void unpack(const ippl::NDIndex<3> intersect, const Kokkos::View<Tf***>& view,
      88              :             ippl::detail::FieldBufferData<Tb>& fd, int nghost, const ippl::NDIndex<3> ldom,
      89              :             bool x = false, bool y = false, bool z = false) {
      90            0 :     unpack_impl<0, Tb, Tf>(intersect, view, fd, nghost, ldom, 0, 0, x, y, z);
      91            0 : }
      92              : 
      93              : template <typename Tb, typename Tf>
      94            0 : void unpack(const ippl::NDIndex<3> intersect, const Kokkos::View<ippl::Vector<Tf, 3>***>& view,
      95              :             size_t dim1, ippl::detail::FieldBufferData<Tb>& fd, int nghost,
      96              :             const ippl::NDIndex<3> ldom) {
      97            0 :     unpack_impl<1, Tb, ippl::Vector<Tf, 3>>(intersect, view, fd, nghost, ldom, dim1);
      98            0 : }
      99              : 
     100              : template <typename Tb, typename Tf>
     101            0 : void unpack(const ippl::NDIndex<3> intersect,
     102              :             const Kokkos::View<ippl::Vector<ippl::Vector<Tf, 3>, 3>***>& view,
     103              :             ippl::detail::FieldBufferData<Tb>& fd, int nghost, const ippl::NDIndex<3> ldom,
     104              :             size_t dim1, size_t dim2) {
     105            0 :     unpack_impl<2, Tb, ippl::Vector<ippl::Vector<Tf, 3>, 3>>(intersect, view, fd, nghost, ldom,
     106              :                                                              dim1, dim2);
     107            0 : }
     108              : 
     109              : template <typename Tb, typename Tf, unsigned Dim>
     110            0 : void solver_send(int TAG, int id, int i, const ippl::NDIndex<Dim> intersection,
     111              :                  const ippl::NDIndex<Dim> ldom, int nghost, Kokkos::View<Tf***>& view,
     112              :                  ippl::detail::FieldBufferData<Tb>& fd, std::vector<MPI_Request>& requests) {
     113              :     using memory_space = typename Kokkos::View<Tf***>::memory_space;
     114              : 
     115            0 :     requests.resize(requests.size() + 1);
     116              : 
     117              :     ippl::mpi::Communicator::size_type nsends;
     118            0 :     pack(intersection, view, fd, nghost, ldom, nsends);
     119              : 
     120              :     // Buffer message indicates the domain intersection (x, y, z, xy, yz, xz, xyz).
     121            0 :     ippl::mpi::Communicator::buffer_type<memory_space> buf =
     122              :         ippl::Comm->getBuffer<memory_space, Tf>(nsends);
     123              : 
     124            0 :     int tag = TAG + id;
     125              : 
     126            0 :     ippl::Comm->isend(i, tag, fd, *buf, requests.back(), nsends);
     127            0 :     buf->resetWritePos();
     128            0 : }
     129              : 
     130              : template <typename Tb, typename Tf, unsigned Dim>
     131            0 : void solver_recv(int TAG, int id, int i, const ippl::NDIndex<Dim> intersection,
     132              :                  const ippl::NDIndex<Dim> ldom, int nghost, Kokkos::View<Tf***>& view,
     133              :                  ippl::detail::FieldBufferData<Tb>& fd, bool x = false, bool y = false,
     134              :                  bool z = false) {
     135              :     using memory_space = typename Kokkos::View<Tf***>::memory_space;
     136              : 
     137              :     ippl::mpi::Communicator::size_type nrecvs;
     138            0 :     nrecvs = intersection.size();
     139              : 
     140              :     // Buffer message indicates the domain intersection (x, y, z, xy, yz, xz, xyz).
     141            0 :     ippl::mpi::Communicator::buffer_type<memory_space> buf =
     142              :         ippl::Comm->getBuffer<memory_space, Tf>(nrecvs);
     143              : 
     144            0 :     int tag = TAG + id;
     145              : 
     146            0 :     ippl::Comm->recv(i, tag, fd, *buf, nrecvs * sizeof(Tf), nrecvs);
     147            0 :     buf->resetReadPos();
     148              : 
     149            0 :     unpack(intersection, view, fd, nghost, ldom, x, y, z);
     150            0 : }
     151              : 
     152              : namespace ippl {
     153              : 
     154              :     /////////////////////////////////////////////////////////////////////////
     155              :     // constructor and destructor
     156              :     template <typename FieldLHS, typename FieldRHS>
     157              :     FFTOpenPoissonSolver<FieldLHS, FieldRHS>::FFTOpenPoissonSolver()
     158              :         : Base()
     159              :         , mesh_mp(nullptr)
     160              :         , layout_mp(nullptr)
     161              :         , mesh2_m(nullptr)
     162              :         , layout2_m(nullptr)
     163              :         , meshComplex_m(nullptr)
     164              :         , layoutComplex_m(nullptr)
     165              :         , mesh4_m(nullptr)
     166              :         , layout4_m(nullptr)
     167              :         , mesh2n1_m(nullptr)
     168              :         , layout2n1_m(nullptr)
     169              :         , isGradFD_m(false) {
     170              :         setDefaultParameters();
     171              :     }
     172              : 
     173              :     template <typename FieldLHS, typename FieldRHS>
     174            0 :     FFTOpenPoissonSolver<FieldLHS, FieldRHS>::FFTOpenPoissonSolver(rhs_type& rhs,
     175              :                                                                    ParameterList& params)
     176            0 :         : mesh_mp(nullptr)
     177            0 :         , layout_mp(nullptr)
     178            0 :         , mesh2_m(nullptr)
     179            0 :         , layout2_m(nullptr)
     180            0 :         , meshComplex_m(nullptr)
     181            0 :         , layoutComplex_m(nullptr)
     182            0 :         , mesh4_m(nullptr)
     183            0 :         , layout4_m(nullptr)
     184            0 :         , mesh2n1_m(nullptr)
     185            0 :         , layout2n1_m(nullptr)
     186            0 :         , isGradFD_m(false) {
     187              :         using T = typename FieldLHS::value_type::value_type;
     188              :         static_assert(std::is_floating_point<T>::value, "Not a floating point type");
     189              : 
     190            0 :         setDefaultParameters();
     191            0 :         this->params_m.merge(params);
     192            0 :         this->params_m.update("output_type", Base::SOL);
     193              : 
     194            0 :         this->setRhs(rhs);
     195            0 :     }
     196              : 
     197              :     template <typename FieldLHS, typename FieldRHS>
     198            0 :     FFTOpenPoissonSolver<FieldLHS, FieldRHS>::FFTOpenPoissonSolver(lhs_type& lhs, rhs_type& rhs,
     199              :                                                                    ParameterList& params)
     200            0 :         : mesh_mp(nullptr)
     201            0 :         , layout_mp(nullptr)
     202            0 :         , mesh2_m(nullptr)
     203            0 :         , layout2_m(nullptr)
     204            0 :         , meshComplex_m(nullptr)
     205            0 :         , layoutComplex_m(nullptr)
     206            0 :         , mesh4_m(nullptr)
     207            0 :         , layout4_m(nullptr)
     208            0 :         , mesh2n1_m(nullptr)
     209            0 :         , layout2n1_m(nullptr)
     210            0 :         , isGradFD_m(false) {
     211              :         using T = typename FieldLHS::value_type::value_type;
     212              :         static_assert(std::is_floating_point<T>::value, "Not a floating point type");
     213              : 
     214            0 :         setDefaultParameters();
     215            0 :         this->params_m.merge(params);
     216              : 
     217            0 :         this->setLhs(lhs);
     218            0 :         this->setRhs(rhs);
     219            0 :     }
     220              : 
     221              :     /////////////////////////////////////////////////////////////////////////
     222              :     // override setRhs to call class-specific initialization
     223              :     template <typename FieldLHS, typename FieldRHS>
     224            0 :     void FFTOpenPoissonSolver<FieldLHS, FieldRHS>::setRhs(rhs_type& rhs) {
     225            0 :         Base::setRhs(rhs);
     226              : 
     227              :         // start a timer
     228            0 :         static IpplTimings::TimerRef initialize = IpplTimings::getTimer("Initialize");
     229            0 :         IpplTimings::startTimer(initialize);
     230              : 
     231            0 :         initializeFields();
     232              : 
     233            0 :         IpplTimings::stopTimer(initialize);
     234            0 :     }
     235              : 
     236              :     /////////////////////////////////////////////////////////////////////////
     237              :     // allows user to set gradient of phi = Efield instead of spectral
     238              :     // calculation of Efield (which uses FFTs)
     239              : 
     240              :     template <typename FieldLHS, typename FieldRHS>
     241              :     void FFTOpenPoissonSolver<FieldLHS, FieldRHS>::setGradFD() {
     242              :         // get the output type (sol, grad, or sol & grad)
     243              :         const int out = this->params_m.template get<int>("output_type");
     244              : 
     245              :         if (out != Base::SOL_AND_GRAD) {
     246              :             throw IpplException(
     247              :                 "FFTOpenPoissonSolver::setGradFD()",
     248              :                 "Cannot use gradient for Efield computation unless output type is SOL_AND_GRAD");
     249              :         } else {
     250              :             isGradFD_m = true;
     251              :         }
     252              :     }
     253              : 
     254              :     /////////////////////////////////////////////////////////////////////////
     255              :     // initializeFields method, called in constructor
     256              : 
     257              :     template <typename FieldLHS, typename FieldRHS>
     258            0 :     void FFTOpenPoissonSolver<FieldLHS, FieldRHS>::initializeFields() {
     259              :         // get algorithm and hessian flag from parameter list
     260            0 :         const int alg      = this->params_m.template get<int>("algorithm");
     261            0 :         const bool hessian = this->params_m.template get<bool>("hessian");
     262              : 
     263              :         // first check if valid algorithm choice
     264            0 :         if ((alg != Algorithm::VICO) && (alg != Algorithm::HOCKNEY)
     265            0 :             && (alg != Algorithm::BIHARMONIC) && (alg != Algorithm::DCT_VICO)) {
     266            0 :             throw IpplException("FFTOpenPoissonSolver::initializeFields()",
     267              :                                 "Currently only HOCKNEY, VICO, DCT_VICO, and BIHARMONIC are "
     268              :                                 "supported for open BCs");
     269              :         }
     270              : 
     271              :         // get layout and mesh
     272            0 :         layout_mp              = &(this->rhs_mp->getLayout());
     273            0 :         mesh_mp                = &(this->rhs_mp->get_mesh());
     274            0 :         mpi::Communicator comm = layout_mp->comm;
     275              : 
     276              :         // get mesh spacing and origin
     277            0 :         hr_m               = mesh_mp->getMeshSpacing();
     278            0 :         vector_type origin = mesh_mp->getOrigin();
     279              : 
     280              :         // create domain for the real fields
     281            0 :         domain_m = layout_mp->getDomain();
     282              : 
     283              :         // get the mesh spacings and sizes for each dimension
     284            0 :         for (unsigned int i = 0; i < Dim; ++i) {
     285            0 :             nr_m[i] = domain_m[i].length();
     286              : 
     287              :             // create the doubled domain for the FFT procedure
     288            0 :             domain2_m[i] = Index(2 * nr_m[i]);
     289              :         }
     290              : 
     291              :         // define decomposition (parallel / serial)
     292            0 :         std::array<bool, Dim> isParallel = layout_mp->isParallel();
     293              : 
     294              :         // create double sized mesh and layout objects using the previously defined domain2_m
     295              :         using mesh_type = typename lhs_type::Mesh_t;
     296            0 :         mesh2_m         = std::unique_ptr<mesh_type>(new mesh_type(domain2_m, hr_m, origin));
     297            0 :         layout2_m = std::unique_ptr<FieldLayout_t>(new FieldLayout_t(comm, domain2_m, isParallel));
     298              : 
     299              :         // create the domain for the transformed (complex) fields
     300              :         // since we use HeFFTe for the transforms it doesn't require permuting to the right
     301              :         // one of the dimensions has only (n/2 +1) as our original fields are fully real
     302              :         // the dimension is given by the user via r2c_direction
     303            0 :         unsigned int RCDirection = this->params_m.template get<int>("r2c_direction");
     304            0 :         for (unsigned int i = 0; i < Dim; ++i) {
     305            0 :             if (i == RCDirection) {
     306            0 :                 domainComplex_m[RCDirection] = Index(nr_m[RCDirection] + 1);
     307              :             } else {
     308            0 :                 domainComplex_m[i] = Index(2 * nr_m[i]);
     309              :             }
     310              :         }
     311              : 
     312              :         // create mesh and layout for the real to complex FFT transformed fields
     313            0 :         meshComplex_m = std::unique_ptr<mesh_type>(new mesh_type(domainComplex_m, hr_m, origin));
     314            0 :         layoutComplex_m =
     315            0 :             std::unique_ptr<FieldLayout_t>(new FieldLayout_t(comm, domainComplex_m, isParallel));
     316              : 
     317              :         // initialize fields
     318            0 :         storage_field.initialize(*mesh2_m, *layout2_m);
     319            0 :         rho2tr_m.initialize(*meshComplex_m, *layoutComplex_m);
     320            0 :         grntr_m.initialize(*meshComplex_m, *layoutComplex_m);
     321              : 
     322            0 :         int out = this->params_m.template get<int>("output_type");
     323            0 :         if (((out == Base::GRAD || out == Base::SOL_AND_GRAD) && !isGradFD_m) || hessian) {
     324            0 :             temp_m.initialize(*meshComplex_m, *layoutComplex_m);
     325              :         }
     326              : 
     327            0 :         if (hessian) {
     328            0 :             hess_m.initialize(*mesh_mp, *layout_mp);
     329              :         }
     330              : 
     331              :         // create the FFT object
     332            0 :         fft_m = std::make_unique<FFT_t>(*layout2_m, *layoutComplex_m, this->params_m);
     333              : 
     334              :         // if Vico, also need to create mesh and layout for 4N Fourier domain
     335              :         // on this domain, the truncated Green's function is defined
     336              :         // also need to create the 4N complex grid, on which precomputation step done
     337            0 :         if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) {
     338              :             // start a timer
     339            0 :             static IpplTimings::TimerRef initialize_vico =
     340            0 :                 IpplTimings::getTimer("Initialize: extra Vico");
     341            0 :             IpplTimings::startTimer(initialize_vico);
     342              : 
     343            0 :             for (unsigned int i = 0; i < Dim; ++i) {
     344            0 :                 domain4_m[i] = Index(4 * nr_m[i]);
     345              :             }
     346              : 
     347              :             // 4N grid
     348              :             using mesh_type = typename lhs_type::Mesh_t;
     349            0 :             mesh4_m         = std::unique_ptr<mesh_type>(new mesh_type(domain4_m, hr_m, origin));
     350            0 :             layout4_m =
     351            0 :                 std::unique_ptr<FieldLayout_t>(new FieldLayout_t(comm, domain4_m, isParallel));
     352              : 
     353              :             // initialize fields
     354            0 :             grnL_m.initialize(*mesh4_m, *layout4_m);
     355              : 
     356              :             // create a Complex-to-Complex FFT object to transform for layout4
     357            0 :             fft4n_m = std::make_unique<FFT<CCTransform, CxField_gt>>(*layout4_m, this->params_m);
     358              : 
     359            0 :             IpplTimings::stopTimer(initialize_vico);
     360              :         }
     361              : 
     362              :         // if DCT_VICO, need 2N+1 mesh, layout, domain, and green's function field for
     363              :         // precomputation
     364            0 :         if (alg == Algorithm::DCT_VICO) {
     365              :             // start a timer
     366            0 :             static IpplTimings::TimerRef initialize_vico =
     367            0 :                 IpplTimings::getTimer("Initialize: extra Vico");
     368            0 :             IpplTimings::startTimer(initialize_vico);
     369              : 
     370              :             // 2N+1 domain for DCT
     371            0 :             for (unsigned int i = 0; i < Dim; ++i) {
     372            0 :                 domain2n1_m[i] = Index(2 * nr_m[i] + 1);
     373              :             }
     374              : 
     375              :             // 2N+1 grid
     376            0 :             mesh2n1_m = std::unique_ptr<mesh_type>(new mesh_type(domain2n1_m, hr_m, origin));
     377            0 :             layout2n1_m =
     378            0 :                 std::unique_ptr<FieldLayout_t>(new FieldLayout_t(comm, domain2n1_m, isParallel));
     379              : 
     380              :             // initialize fields
     381            0 :             grn2n1_m.initialize(*mesh2n1_m, *layout2n1_m);  // 2N+1 grnL
     382              : 
     383              :             // create real to real FFT object for 2N+1 grid
     384            0 :             fft2n1_m = std::make_unique<FFT<Cos1Transform, Field_t>>(*layout2n1_m, this->params_m);
     385              : 
     386            0 :             IpplTimings::stopTimer(initialize_vico);
     387              :         }
     388              : 
     389              :         // these are fields that are used for calculating the Green's function for Hockney
     390            0 :         if (alg == Algorithm::HOCKNEY) {
     391              :             // start a timer
     392            0 :             static IpplTimings::TimerRef initialize_hockney =
     393            0 :                 IpplTimings::getTimer("Initialize: extra Hockney");
     394            0 :             IpplTimings::startTimer(initialize_hockney);
     395              : 
     396            0 :             for (unsigned int d = 0; d < Dim; ++d) {
     397            0 :                 grnIField_m[d].initialize(*mesh2_m, *layout2_m);
     398              : 
     399              :                 // get number of ghost points and the Kokkos view to iterate over field
     400            0 :                 auto view        = grnIField_m[d].getView();
     401            0 :                 const int nghost = grnIField_m[d].getNghost();
     402            0 :                 const auto& ldom = layout2_m->getLocalNDIndex();
     403              : 
     404              :                 // the length of the physical domain
     405            0 :                 const int size = nr_m[d];
     406              : 
     407              :                 // Kokkos parallel for loop to initialize grnIField[d]
     408            0 :                 switch (d) {
     409            0 :                     case 0:
     410            0 :                         Kokkos::parallel_for(
     411              :                             "Helper index Green field initialization",
     412            0 :                             grnIField_m[d].getFieldRangePolicy(),
     413            0 :                             KOKKOS_LAMBDA(const int i, const int j, const int k) {
     414              :                                 // go from local indices to global
     415            0 :                                 const int ig = i + ldom[0].first() - nghost;
     416            0 :                                 const int jg = j + ldom[1].first() - nghost;
     417            0 :                                 const int kg = k + ldom[2].first() - nghost;
     418              : 
     419              :                                 // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere
     420            0 :                                 const bool outsideN = (ig >= size);
     421            0 :                                 view(i, j, k) =
     422            0 :                                     (2 * size * outsideN - ig) * (2 * size * outsideN - ig);
     423              : 
     424              :                                 // add 1.0 if at (0,0,0) to avoid singularity
     425            0 :                                 const bool isOrig = ((ig == 0) && (jg == 0) && (kg == 0));
     426            0 :                                 view(i, j, k) += isOrig * 1.0;
     427              :                             });
     428            0 :                         break;
     429            0 :                     case 1:
     430            0 :                         Kokkos::parallel_for(
     431              :                             "Helper index Green field initialization",
     432            0 :                             grnIField_m[d].getFieldRangePolicy(),
     433            0 :                             KOKKOS_LAMBDA(const int i, const int j, const int k) {
     434              :                                 // go from local indices to global
     435            0 :                                 const int jg = j + ldom[1].first() - nghost;
     436              : 
     437              :                                 // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere
     438            0 :                                 const bool outsideN = (jg >= size);
     439            0 :                                 view(i, j, k) =
     440            0 :                                     (2 * size * outsideN - jg) * (2 * size * outsideN - jg);
     441              :                             });
     442            0 :                         break;
     443            0 :                     case 2:
     444            0 :                         Kokkos::parallel_for(
     445              :                             "Helper index Green field initialization",
     446            0 :                             grnIField_m[d].getFieldRangePolicy(),
     447            0 :                             KOKKOS_LAMBDA(const int i, const int j, const int k) {
     448              :                                 // go from local indices to global
     449            0 :                                 const int kg = k + ldom[2].first() - nghost;
     450              : 
     451              :                                 // assign (index)^2 if 0 <= index < N, and (2N-index)^2 elsewhere
     452            0 :                                 const bool outsideN = (kg >= size);
     453            0 :                                 view(i, j, k) =
     454            0 :                                     (2 * size * outsideN - kg) * (2 * size * outsideN - kg);
     455              :                             });
     456            0 :                         break;
     457              :                 }
     458              :             }
     459            0 :             IpplTimings::stopTimer(initialize_hockney);
     460              :         }
     461              : 
     462            0 :         static IpplTimings::TimerRef warmup = IpplTimings::getTimer("Warmup");
     463            0 :         IpplTimings::startTimer(warmup);
     464              : 
     465              :         // "empty" transforms to warmup all the FFTs
     466            0 :         fft_m->warmup(rho2_mr, rho2tr_m);
     467            0 :         if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) {
     468            0 :             fft4n_m->warmup(grnL_m);
     469              :         }
     470            0 :         if (alg == Algorithm::DCT_VICO) {
     471            0 :             fft2n1_m->warmup(grn2n1_m);
     472              :         }
     473              : 
     474            0 :         IpplTimings::stopTimer(warmup);
     475              : 
     476            0 :         rho2_mr  = 0.0;
     477            0 :         rho2tr_m = 0.0;
     478            0 :         grnL_m   = 0.0;
     479            0 :         grn2n1_m = 0.0;
     480              : 
     481              :         // call greensFunction and we will get the transformed G in the class attribute
     482              :         // this is done in initialization so that we already have the precomputed fct
     483              :         // for all timesteps (green's fct will only change if mesh size changes)
     484            0 :         static IpplTimings::TimerRef ginit = IpplTimings::getTimer("Green Init");
     485            0 :         IpplTimings::startTimer(ginit);
     486            0 :         greensFunction();
     487            0 :         IpplTimings::stopTimer(ginit);
     488            0 :     };
     489              : 
     490              :     /////////////////////////////////////////////////////////////////////////
     491              :     // compute electric potential by solving Poisson's eq given a field rho and mesh spacings hr
     492              :     template <typename FieldLHS, typename FieldRHS>
     493            0 :     void FFTOpenPoissonSolver<FieldLHS, FieldRHS>::solve() {
     494              :         // start a timer
     495            0 :         static IpplTimings::TimerRef solve = IpplTimings::getTimer("Solve");
     496            0 :         IpplTimings::startTimer(solve);
     497              : 
     498              :         // get the output type (sol, grad, or sol & grad)
     499            0 :         const int out = this->params_m.template get<int>("output_type");
     500              : 
     501              :         // get the algorithm (hockney, vico, or biharmonic)
     502            0 :         const int alg = this->params_m.template get<int>("algorithm");
     503              : 
     504              :         // get hessian flag (if true, we compute the Hessian)
     505            0 :         const bool hessian = this->params_m.template get<bool>("hessian");
     506              : 
     507              :         // set the mesh & spacing, which may change each timestep
     508            0 :         mesh_mp = &(this->rhs_mp->get_mesh());
     509              : 
     510              :         // check whether the mesh spacing has changed with respect to the old one
     511              :         // if yes, update and set green flag to true
     512            0 :         bool green = false;
     513            0 :         for (unsigned int i = 0; i < Dim; ++i) {
     514            0 :             if (hr_m[i] != mesh_mp->getMeshSpacing(i)) {
     515            0 :                 hr_m[i] = mesh_mp->getMeshSpacing(i);
     516            0 :                 green   = true;
     517              :             }
     518              :         }
     519              : 
     520              :         // set mesh spacing on the other grids again
     521            0 :         mesh2_m->setMeshSpacing(hr_m);
     522            0 :         meshComplex_m->setMeshSpacing(hr_m);
     523              : 
     524              :         // field object on the doubled grid; zero-padded
     525            0 :         rho2_mr = 0.0;
     526              : 
     527              :         // start a timer
     528            0 :         static IpplTimings::TimerRef stod = IpplTimings::getTimer("Solve: Physical to double");
     529            0 :         IpplTimings::startTimer(stod);
     530              : 
     531              :         // store rho (RHS) in the lower left quadrant of the doubled grid
     532              :         // with or without communication (if only 1 rank)
     533              : 
     534            0 :         const int ranks = Comm->size();
     535              : 
     536            0 :         auto view2 = rho2_mr.getView();
     537            0 :         auto view1 = this->rhs_mp->getView();
     538              : 
     539            0 :         const int nghost2 = rho2_mr.getNghost();
     540            0 :         const int nghost1 = this->rhs_mp->getNghost();
     541              : 
     542            0 :         const auto& ldom2 = layout2_m->getLocalNDIndex();
     543            0 :         const auto& ldom1 = layout_mp->getLocalNDIndex();
     544              : 
     545            0 :         if (ranks > 1) {
     546              :             // COMMUNICATION
     547            0 :             const auto& lDomains2 = layout2_m->getHostLocalDomains();
     548              : 
     549              :             // send
     550            0 :             std::vector<MPI_Request> requests(0);
     551              : 
     552            0 :             for (int i = 0; i < ranks; ++i) {
     553            0 :                 if (lDomains2[i].touches(ldom1)) {
     554            0 :                     auto intersection = lDomains2[i].intersect(ldom1);
     555              : 
     556            0 :                     solver_send(mpi::tag::OPEN_SOLVER, 0, i, intersection, ldom1, nghost1, view1,
     557            0 :                                 fd_m, requests);
     558              :                 }
     559              :             }
     560              : 
     561              :             // receive
     562            0 :             const auto& lDomains1 = layout_mp->getHostLocalDomains();
     563              : 
     564            0 :             for (int i = 0; i < ranks; ++i) {
     565            0 :                 if (lDomains1[i].touches(ldom2)) {
     566            0 :                     auto intersection = lDomains1[i].intersect(ldom2);
     567              : 
     568              :                     mpi::Communicator::size_type nrecvs;
     569            0 :                     nrecvs = intersection.size();
     570              : 
     571            0 :                     buffer_type buf = Comm->getBuffer<memory_space, Trhs>(nrecvs);
     572              : 
     573            0 :                     Comm->recv(i, mpi::tag::OPEN_SOLVER, fd_m, *buf, nrecvs * sizeof(Trhs), nrecvs);
     574            0 :                     buf->resetReadPos();
     575              : 
     576            0 :                     unpack(intersection, view2, fd_m, nghost2, ldom2);
     577            0 :                 }
     578              :             }
     579              : 
     580              :             // wait for all messages to be received
     581            0 :             if (requests.size() > 0) {
     582            0 :                 MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
     583              :             }
     584            0 :             ippl::Comm->freeAllBuffers();
     585              : 
     586            0 :         } else {
     587            0 :             Kokkos::parallel_for(
     588            0 :                 "Write rho on the doubled grid", this->rhs_mp->getFieldRangePolicy(),
     589            0 :                 KOKKOS_LAMBDA(const size_t i, const size_t j, const size_t k) {
     590            0 :                     const size_t ig2 = i + ldom2[0].first() - nghost2;
     591            0 :                     const size_t jg2 = j + ldom2[1].first() - nghost2;
     592            0 :                     const size_t kg2 = k + ldom2[2].first() - nghost2;
     593              : 
     594            0 :                     const size_t ig1 = i + ldom1[0].first() - nghost1;
     595            0 :                     const size_t jg1 = j + ldom1[1].first() - nghost1;
     596            0 :                     const size_t kg1 = k + ldom1[2].first() - nghost1;
     597              : 
     598              :                     // write physical rho on [0,N-1] of doubled field
     599            0 :                     const bool isQuadrant1 = ((ig1 == ig2) && (jg1 == jg2) && (kg1 == kg2));
     600            0 :                     view2(i, j, k)         = view1(i, j, k) * isQuadrant1;
     601              :                 });
     602              :         }
     603              : 
     604            0 :         IpplTimings::stopTimer(stod);
     605              : 
     606              :         // start a timer
     607            0 :         static IpplTimings::TimerRef fftrho = IpplTimings::getTimer("FFT: Rho");
     608            0 :         IpplTimings::startTimer(fftrho);
     609              : 
     610              :         // forward FFT of the charge density field on doubled grid
     611            0 :         fft_m->transform(FORWARD, rho2_mr, rho2tr_m);
     612              : 
     613            0 :         IpplTimings::stopTimer(fftrho);
     614              : 
     615              :         // call greensFunction to recompute if the mesh spacing has changed
     616            0 :         if (green) {
     617            0 :             greensFunction();
     618              :         }
     619              : 
     620              :         // multiply FFT(rho2)*FFT(green)
     621              :         // convolution becomes multiplication in FFT
     622              :         // minus sign since we are solving laplace(phi) = -rho
     623            0 :         rho2tr_m = -rho2tr_m * grntr_m;
     624              : 
     625              :         // if output_type is SOL or SOL_AND_GRAD, we caculate solution
     626            0 :         if ((out == Base::SOL) || (out == Base::SOL_AND_GRAD)) {
     627              :             // start a timer
     628            0 :             static IpplTimings::TimerRef fftc = IpplTimings::getTimer("FFT: Convolution");
     629            0 :             IpplTimings::startTimer(fftc);
     630              : 
     631              :             // inverse FFT of the product and store the electrostatic potential in rho2_mr
     632            0 :             fft_m->transform(BACKWARD, rho2_mr, rho2tr_m);
     633              : 
     634            0 :             IpplTimings::stopTimer(fftc);
     635              :             // Hockney: multiply the rho2_mr field by the total number of points to account for
     636              :             // double counting (rho and green) of normalization factor in forward transform
     637              :             // also multiply by the mesh spacing^3 (to account for discretization)
     638              :             // Vico: need to multiply by normalization factor of 1/4N^3,
     639              :             // since only backward transform was performed on the 4N grid
     640              :             // DCT_VICO: need to multiply by a factor of (2N)^3 to match the normalization factor in
     641              :             // the transform.
     642            0 :             for (unsigned int i = 0; i < Dim; ++i) {
     643            0 :                 switch (alg) {
     644            0 :                     case Algorithm::HOCKNEY:
     645            0 :                         rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i];
     646            0 :                         break;
     647            0 :                     case Algorithm::VICO:
     648              :                     case Algorithm::BIHARMONIC:
     649            0 :                         rho2_mr = rho2_mr * 2.0 * (1.0 / 4.0);
     650            0 :                         break;
     651            0 :                     case Algorithm::DCT_VICO:
     652            0 :                         rho2_mr = rho2_mr * (1.0 / 4.0);
     653            0 :                         break;
     654            0 :                     default:
     655            0 :                         throw IpplException(
     656              :                             "FFTOpenPoissonSolver::initializeFields()",
     657              :                             "Currently only HOCKNEY, VICO, DCT_VICO, and BIHARMONIC are "
     658              :                             "supported for open BCs");
     659              :                 }
     660              :             }
     661              : 
     662              :             // start a timer
     663            0 :             static IpplTimings::TimerRef dtos = IpplTimings::getTimer("Solve: Double to physical");
     664            0 :             IpplTimings::startTimer(dtos);
     665              : 
     666              :             // get the physical part only --> physical electrostatic potential is now given in RHS
     667              :             // need communication if more than one rank
     668              : 
     669            0 :             if (ranks > 1) {
     670              :                 // COMMUNICATION
     671              : 
     672              :                 // send
     673            0 :                 const auto& lDomains1 = layout_mp->getHostLocalDomains();
     674              : 
     675            0 :                 std::vector<MPI_Request> requests(0);
     676              : 
     677            0 :                 for (int i = 0; i < ranks; ++i) {
     678            0 :                     if (lDomains1[i].touches(ldom2)) {
     679            0 :                         auto intersection = lDomains1[i].intersect(ldom2);
     680              : 
     681            0 :                         solver_send(mpi::tag::OPEN_SOLVER, 0, i, intersection, ldom2, nghost2,
     682            0 :                                     view2, fd_m, requests);
     683              :                     }
     684              :                 }
     685              : 
     686              :                 // receive
     687            0 :                 const auto& lDomains2 = layout2_m->getHostLocalDomains();
     688              : 
     689            0 :                 for (int i = 0; i < ranks; ++i) {
     690            0 :                     if (ldom1.touches(lDomains2[i])) {
     691            0 :                         auto intersection = ldom1.intersect(lDomains2[i]);
     692              : 
     693              :                         mpi::Communicator::size_type nrecvs;
     694            0 :                         nrecvs = intersection.size();
     695              : 
     696            0 :                         buffer_type buf = Comm->getBuffer<memory_space, Trhs>(nrecvs);
     697              : 
     698            0 :                         Comm->recv(i, mpi::tag::OPEN_SOLVER, fd_m, *buf, nrecvs * sizeof(Trhs),
     699              :                                    nrecvs);
     700            0 :                         buf->resetReadPos();
     701              : 
     702            0 :                         unpack(intersection, view1, fd_m, nghost1, ldom1);
     703            0 :                     }
     704              :                 }
     705              : 
     706              :                 // wait for all messages to be received
     707            0 :                 if (requests.size() > 0) {
     708            0 :                     MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
     709              :                 }
     710            0 :                 ippl::Comm->freeAllBuffers();
     711              : 
     712            0 :             } else {
     713            0 :                 Kokkos::parallel_for(
     714              :                     "Write the solution into the LHS on physical grid",
     715            0 :                     this->rhs_mp->getFieldRangePolicy(),
     716            0 :                     KOKKOS_LAMBDA(const int i, const int j, const int k) {
     717            0 :                         const int ig2 = i + ldom2[0].first() - nghost2;
     718            0 :                         const int jg2 = j + ldom2[1].first() - nghost2;
     719            0 :                         const int kg2 = k + ldom2[2].first() - nghost2;
     720              : 
     721            0 :                         const int ig = i + ldom1[0].first() - nghost1;
     722            0 :                         const int jg = j + ldom1[1].first() - nghost1;
     723            0 :                         const int kg = k + ldom1[2].first() - nghost1;
     724              : 
     725              :                         // take [0,N-1] as physical solution
     726            0 :                         const bool isQuadrant1 = ((ig == ig2) && (jg == jg2) && (kg == kg2));
     727            0 :                         view1(i, j, k)         = view2(i, j, k) * isQuadrant1;
     728              :                     });
     729              :             }
     730            0 :             IpplTimings::stopTimer(dtos);
     731              :         }
     732              : 
     733              :         // if we want finite differences Efield = -grad(phi)
     734              :         // instead of computing it in Fourier domain
     735              :         // this is only possible if SOL_AND_GRAD is the output type
     736            0 :         if (isGradFD_m && (out == Base::SOL_AND_GRAD)) {
     737            0 :             *(this->lhs_mp) = -grad(*this->rhs_mp);
     738              :         }
     739              : 
     740              :         // if output_type is GRAD or SOL_AND_GRAD, we calculate E-field (gradient in Fourier domain)
     741            0 :         if (((out == Base::GRAD) || (out == Base::SOL_AND_GRAD)) && (!isGradFD_m)) {
     742              :             // start a timer
     743            0 :             static IpplTimings::TimerRef efield = IpplTimings::getTimer("Solve: Electric field");
     744            0 :             IpplTimings::startTimer(efield);
     745              : 
     746              :             // get E field view (LHS)
     747            0 :             auto viewL        = this->lhs_mp->getView();
     748            0 :             const int nghostL = this->lhs_mp->getNghost();
     749              : 
     750              :             // get rho2tr_m view (as we want to multiply by ik then transform)
     751            0 :             auto viewR        = rho2tr_m.getView();
     752            0 :             const int nghostR = rho2tr_m.getNghost();
     753            0 :             const auto& ldomR = layoutComplex_m->getLocalNDIndex();
     754              : 
     755              :             // use temp_m as a temporary complex field
     756            0 :             auto view_g = temp_m.getView();
     757              : 
     758              :             // define some constants
     759            0 :             const scalar_type pi          = Kokkos::numbers::pi_v<scalar_type>;
     760            0 :             const Kokkos::complex<Trhs> I = {0.0, 1.0};
     761              : 
     762              :             // define some member variables in local scope for the parallel_for
     763            0 :             vector_type hsize  = hr_m;
     764            0 :             Vector<int, Dim> N = nr_m;
     765              : 
     766              :             // loop over each component (E = vector field)
     767            0 :             for (size_t gd = 0; gd < Dim; ++gd) {
     768              :                 // loop over rho2tr_m to multiply by -ik (gradient in Fourier space)
     769            0 :                 Kokkos::parallel_for(
     770            0 :                     "Gradient - E field", rho2tr_m.getFieldRangePolicy(),
     771            0 :                     KOKKOS_LAMBDA(const int i, const int j, const int k) {
     772              :                         // global indices for 2N rhotr_m
     773            0 :                         const int ig = i + ldomR[0].first() - nghostR;
     774            0 :                         const int jg = j + ldomR[1].first() - nghostR;
     775            0 :                         const int kg = k + ldomR[2].first() - nghostR;
     776              : 
     777            0 :                         Vector<int, 3> iVec = {ig, jg, kg};
     778              : 
     779              :                         scalar_type k_gd;
     780            0 :                         const scalar_type Len = N[gd] * hsize[gd];
     781            0 :                         const bool shift      = (iVec[gd] > N[gd]);
     782            0 :                         const bool notMid     = (iVec[gd] != N[gd]);
     783              : 
     784            0 :                         k_gd = notMid * (pi / Len) * (iVec[gd] - shift * 2 * N[gd]);
     785              : 
     786            0 :                         view_g(i, j, k) = -(I * k_gd) * viewR(i, j, k);
     787            0 :                     });
     788              : 
     789              :                 // start a timer
     790            0 :                 static IpplTimings::TimerRef ffte = IpplTimings::getTimer("FFT: Efield");
     791            0 :                 IpplTimings::startTimer(ffte);
     792              : 
     793              :                 // transform to get E-field
     794            0 :                 fft_m->transform(BACKWARD, rho2_mr, temp_m);
     795              : 
     796            0 :                 IpplTimings::stopTimer(ffte);
     797              : 
     798              :                 // apply proper normalization
     799            0 :                 for (unsigned int i = 0; i < Dim; ++i) {
     800            0 :                     switch (alg) {
     801            0 :                         case Algorithm::HOCKNEY:
     802            0 :                             rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i];
     803            0 :                             break;
     804            0 :                         case Algorithm::VICO:
     805              :                         case Algorithm::BIHARMONIC:
     806            0 :                             rho2_mr = rho2_mr * 2.0 * (1.0 / 4.0);
     807            0 :                             break;
     808            0 :                         case Algorithm::DCT_VICO:
     809            0 :                             rho2_mr = rho2_mr * (1.0 / 4.0);
     810            0 :                             break;
     811            0 :                         default:
     812            0 :                             throw IpplException(
     813              :                                 "FFTOpenPoissonSolver::initializeFields()",
     814              :                                 "Currently only HOCKNEY, VICO, DCT_VICO, and BIHARMONIC are "
     815              :                                 "supported for open BCs");
     816              :                     }
     817              :                 }
     818              : 
     819              :                 // start a timer
     820            0 :                 static IpplTimings::TimerRef edtos =
     821            0 :                     IpplTimings::getTimer("Efield: double to phys.");
     822            0 :                 IpplTimings::startTimer(edtos);
     823              : 
     824              :                 // restrict to physical grid (N^3) and assign to LHS (E-field)
     825              :                 // communication needed if more than one rank
     826            0 :                 if (ranks > 1) {
     827              :                     // COMMUNICATION
     828              : 
     829              :                     // send
     830            0 :                     const auto& lDomains1 = layout_mp->getHostLocalDomains();
     831            0 :                     std::vector<MPI_Request> requests(0);
     832              : 
     833            0 :                     for (int i = 0; i < ranks; ++i) {
     834            0 :                         if (lDomains1[i].touches(ldom2)) {
     835            0 :                             auto intersection = lDomains1[i].intersect(ldom2);
     836              : 
     837            0 :                             solver_send(mpi::tag::OPEN_SOLVER, 0, i, intersection, ldom2, nghost2,
     838            0 :                                         view2, fd_m, requests);
     839              :                         }
     840              :                     }
     841              : 
     842              :                     // receive
     843            0 :                     const auto& lDomains2 = layout2_m->getHostLocalDomains();
     844              : 
     845            0 :                     for (int i = 0; i < ranks; ++i) {
     846            0 :                         if (ldom1.touches(lDomains2[i])) {
     847            0 :                             auto intersection = ldom1.intersect(lDomains2[i]);
     848              : 
     849              :                             mpi::Communicator::size_type nrecvs;
     850            0 :                             nrecvs = intersection.size();
     851              : 
     852            0 :                             buffer_type buf = Comm->getBuffer<memory_space, Trhs>(nrecvs);
     853              : 
     854            0 :                             Comm->recv(i, mpi::tag::OPEN_SOLVER, fd_m, *buf, nrecvs * sizeof(Trhs),
     855              :                                        nrecvs);
     856            0 :                             buf->resetReadPos();
     857              : 
     858            0 :                             unpack(intersection, viewL, gd, fd_m, nghostL, ldom1);
     859            0 :                         }
     860              :                     }
     861              : 
     862              :                     // wait for all messages to be received
     863            0 :                     if (requests.size() > 0) {
     864            0 :                         MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
     865              :                     }
     866            0 :                     ippl::Comm->freeAllBuffers();
     867              : 
     868            0 :                 } else {
     869            0 :                     Kokkos::parallel_for(
     870            0 :                         "Write the E-field on physical grid", this->lhs_mp->getFieldRangePolicy(),
     871            0 :                         KOKKOS_LAMBDA(const int i, const int j, const int k) {
     872            0 :                             const int ig2 = i + ldom2[0].first() - nghost2;
     873            0 :                             const int jg2 = j + ldom2[1].first() - nghost2;
     874            0 :                             const int kg2 = k + ldom2[2].first() - nghost2;
     875              : 
     876            0 :                             const int ig = i + ldom1[0].first() - nghostL;
     877            0 :                             const int jg = j + ldom1[1].first() - nghostL;
     878            0 :                             const int kg = k + ldom1[2].first() - nghostL;
     879              : 
     880              :                             // take [0,N-1] as physical solution
     881            0 :                             const bool isQuadrant1 = ((ig == ig2) && (jg == jg2) && (kg == kg2));
     882            0 :                             viewL(i, j, k)[gd]     = view2(i, j, k) * isQuadrant1;
     883              :                         });
     884              :                 }
     885            0 :                 IpplTimings::stopTimer(edtos);
     886              :             }
     887            0 :             IpplTimings::stopTimer(efield);
     888            0 :         }
     889              : 
     890              :         // if user asked for Hessian, compute in Fourier domain (-k^2 multiplication)
     891            0 :         if (hessian) {
     892              :             // start a timer
     893            0 :             static IpplTimings::TimerRef hess = IpplTimings::getTimer("Solve: Hessian");
     894            0 :             IpplTimings::startTimer(hess);
     895              : 
     896              :             // get Hessian matrix view (LHS)
     897            0 :             auto viewH        = hess_m.getView();
     898            0 :             const int nghostH = hess_m.getNghost();
     899              : 
     900              :             // get rho2tr_m view (as we want to multiply by -k^2 then transform)
     901            0 :             auto viewR        = rho2tr_m.getView();
     902            0 :             const int nghostR = rho2tr_m.getNghost();
     903            0 :             const auto& ldomR = layoutComplex_m->getLocalNDIndex();
     904              : 
     905              :             // use temp_m as a temporary complex field
     906            0 :             auto view_g = temp_m.getView();
     907              : 
     908              :             // define some constants
     909            0 :             const scalar_type pi = Kokkos::numbers::pi_v<scalar_type>;
     910              : 
     911              :             // define some member variables in local scope for the parallel_for
     912            0 :             vector_type hsize  = hr_m;
     913            0 :             Vector<int, Dim> N = nr_m;
     914              : 
     915              :             // loop over each component (Hessian = Matrix field)
     916            0 :             for (size_t row = 0; row < Dim; ++row) {
     917            0 :                 for (size_t col = 0; col < Dim; ++col) {
     918              :                     // loop over rho2tr_m to multiply by -k^2 (second derivative in Fourier space)
     919              :                     // if diagonal element (row = col), do not need N/2 term = 0
     920              :                     // else, if mixed derivative, need kVec = 0 at N/2
     921              : 
     922            0 :                     Kokkos::parallel_for(
     923            0 :                         "Hessian", rho2tr_m.getFieldRangePolicy(),
     924            0 :                         KOKKOS_LAMBDA(const int i, const int j, const int k) {
     925              :                             // global indices for 2N rhotr_m
     926            0 :                             const int ig = i + ldomR[0].first() - nghostR;
     927            0 :                             const int jg = j + ldomR[1].first() - nghostR;
     928            0 :                             const int kg = k + ldomR[2].first() - nghostR;
     929              : 
     930            0 :                             Vector<int, 3> iVec = {ig, jg, kg};
     931            0 :                             Vector_t kVec;
     932              : 
     933            0 :                             for (size_t d = 0; d < Dim; ++d) {
     934            0 :                                 const scalar_type Len = N[d] * hsize[d];
     935            0 :                                 const bool shift      = (iVec[d] > N[d]);
     936            0 :                                 const bool isMid      = (iVec[d] == N[d]);
     937            0 :                                 const bool notDiag    = (row != col);
     938              : 
     939            0 :                                 kVec[d] = (1 - (notDiag * isMid)) * (pi / Len)
     940            0 :                                           * (iVec[d] - shift * 2 * N[d]);
     941              :                             }
     942              : 
     943            0 :                             view_g(i, j, k) = -(kVec[col] * kVec[row]) * viewR(i, j, k);
     944            0 :                         });
     945              : 
     946              :                     // start a timer
     947            0 :                     static IpplTimings::TimerRef ffth = IpplTimings::getTimer("FFT: Hessian");
     948            0 :                     IpplTimings::startTimer(ffth);
     949              : 
     950              :                     // transform to get Hessian
     951            0 :                     fft_m->transform(BACKWARD, rho2_mr, temp_m);
     952              : 
     953            0 :                     IpplTimings::stopTimer(ffth);
     954              : 
     955              :                     // apply proper normalization
     956            0 :                     for (unsigned int i = 0; i < Dim; ++i) {
     957            0 :                         switch (alg) {
     958            0 :                             case Algorithm::HOCKNEY:
     959            0 :                                 rho2_mr = rho2_mr * 2.0 * nr_m[i] * hr_m[i];
     960            0 :                                 break;
     961            0 :                             case Algorithm::VICO:
     962              :                             case Algorithm::BIHARMONIC:
     963            0 :                                 rho2_mr = rho2_mr * 2.0 * (1.0 / 4.0);
     964            0 :                                 break;
     965            0 :                             case Algorithm::DCT_VICO:
     966            0 :                                 rho2_mr = rho2_mr * (1.0 / 4.0);
     967            0 :                                 break;
     968            0 :                             default:
     969            0 :                                 throw IpplException(
     970              :                                     "FFTOpenPoissonSolver::initializeFields()",
     971              :                                     "Currently only HOCKNEY, VICO, DCT_VICO, and BIHARMONIC are "
     972              :                                     "supported for open BCs");
     973              :                         }
     974              :                     }
     975              : 
     976              :                     // start a timer
     977            0 :                     static IpplTimings::TimerRef hdtos =
     978            0 :                         IpplTimings::getTimer("Hessian: double to phys.");
     979            0 :                     IpplTimings::startTimer(hdtos);
     980              : 
     981              :                     // restrict to physical grid (N^3) and assign to Matrix field (Hessian)
     982              :                     // communication needed if more than one rank
     983            0 :                     if (ranks > 1) {
     984              :                         // COMMUNICATION
     985              : 
     986              :                         // send
     987            0 :                         const auto& lDomains1 = layout_mp->getHostLocalDomains();
     988            0 :                         std::vector<MPI_Request> requests(0);
     989              : 
     990            0 :                         for (int i = 0; i < ranks; ++i) {
     991            0 :                             if (lDomains1[i].touches(ldom2)) {
     992            0 :                                 auto intersection = lDomains1[i].intersect(ldom2);
     993              : 
     994            0 :                                 solver_send(mpi::tag::OPEN_SOLVER, 0, i, intersection, ldom2,
     995            0 :                                             nghost2, view2, fd_m, requests);
     996              :                             }
     997              :                         }
     998              : 
     999              :                         // receive
    1000            0 :                         const auto& lDomains2 = layout2_m->getHostLocalDomains();
    1001              : 
    1002            0 :                         for (int i = 0; i < ranks; ++i) {
    1003            0 :                             if (ldom1.touches(lDomains2[i])) {
    1004            0 :                                 auto intersection = ldom1.intersect(lDomains2[i]);
    1005              : 
    1006              :                                 mpi::Communicator::size_type nrecvs;
    1007            0 :                                 nrecvs = intersection.size();
    1008              : 
    1009            0 :                                 buffer_type buf = Comm->getBuffer<memory_space, Trhs>(nrecvs);
    1010              : 
    1011            0 :                                 Comm->recv(i, mpi::tag::OPEN_SOLVER, fd_m, *buf,
    1012              :                                            nrecvs * sizeof(Trhs), nrecvs);
    1013            0 :                                 buf->resetReadPos();
    1014              : 
    1015            0 :                                 unpack(intersection, viewH, fd_m, nghostH, ldom1, row, col);
    1016            0 :                             }
    1017              :                         }
    1018              : 
    1019              :                         // wait for all messages to be received
    1020            0 :                         if (requests.size() > 0) {
    1021            0 :                             MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
    1022              :                         }
    1023            0 :                         ippl::Comm->freeAllBuffers();
    1024              : 
    1025            0 :                     } else {
    1026            0 :                         Kokkos::parallel_for(
    1027            0 :                             "Write Hessian on physical grid", hess_m.getFieldRangePolicy(),
    1028            0 :                             KOKKOS_LAMBDA(const int i, const int j, const int k) {
    1029            0 :                                 const int ig2 = i + ldom2[0].first() - nghost2;
    1030            0 :                                 const int jg2 = j + ldom2[1].first() - nghost2;
    1031            0 :                                 const int kg2 = k + ldom2[2].first() - nghost2;
    1032              : 
    1033            0 :                                 const int ig = i + ldom1[0].first() - nghostH;
    1034            0 :                                 const int jg = j + ldom1[1].first() - nghostH;
    1035            0 :                                 const int kg = k + ldom1[2].first() - nghostH;
    1036              : 
    1037              :                                 // take [0,N-1] as physical solution
    1038            0 :                                 const bool isQuadrant1 =
    1039            0 :                                     ((ig == ig2) && (jg == jg2) && (kg == kg2));
    1040            0 :                                 viewH(i, j, k)[row][col] = view2(i, j, k) * isQuadrant1;
    1041              :                             });
    1042              :                     }
    1043            0 :                     IpplTimings::stopTimer(hdtos);
    1044              :                 }
    1045              :             }
    1046            0 :             IpplTimings::stopTimer(hess);
    1047            0 :         }
    1048            0 :         IpplTimings::stopTimer(solve);
    1049            0 :     };
    1050              : 
    1051              :     ////////////////////////////////////////////////////////////////////////
    1052              :     // calculate FFT of the Green's function
    1053              : 
    1054              :     template <typename FieldLHS, typename FieldRHS>
    1055            0 :     void FFTOpenPoissonSolver<FieldLHS, FieldRHS>::greensFunction() {
    1056            0 :         const scalar_type pi = Kokkos::numbers::pi_v<scalar_type>;
    1057            0 :         grn_mr               = 0.0;
    1058              : 
    1059            0 :         const int alg = this->params_m.template get<int>("algorithm");
    1060              : 
    1061            0 :         if (alg == Algorithm::VICO || alg == Algorithm::BIHARMONIC) {
    1062            0 :             Vector_t l(hr_m * nr_m);
    1063            0 :             Vector_t hs_m;
    1064            0 :             double L_sum(0.0);
    1065              : 
    1066              :             // compute length of the physical domain
    1067              :             // compute Fourier domain spacing
    1068            0 :             for (unsigned int i = 0; i < Dim; ++i) {
    1069            0 :                 hs_m[i] = pi * 0.5 / l[i];
    1070            0 :                 L_sum   = L_sum + l[i] * l[i];
    1071              :             }
    1072              : 
    1073              :             // define the origin of the 4N grid
    1074            0 :             vector_type origin;
    1075              : 
    1076            0 :             for (unsigned int i = 0; i < Dim; ++i) {
    1077            0 :                 origin[i] = -2 * nr_m[i] * pi / l[i];
    1078              :             }
    1079              : 
    1080              :             // set mesh for the 4N mesh
    1081            0 :             mesh4_m->setMeshSpacing(hs_m);
    1082              : 
    1083              :             // size of truncation window
    1084            0 :             L_sum = std::sqrt(L_sum);
    1085              :             // we choose a window 10% larger than domain (arbitrary choice)
    1086            0 :             L_sum = 1.1 * L_sum;
    1087              : 
    1088              :             // initialize grnL_m
    1089            0 :             typename CxField_gt::view_type view_g = grnL_m.getView();
    1090            0 :             const int nghost_g                    = grnL_m.getNghost();
    1091            0 :             const auto& ldom_g                    = layout4_m->getLocalNDIndex();
    1092              : 
    1093            0 :             Vector<int, Dim> size = nr_m;
    1094              : 
    1095              :             // Kokkos parallel for loop to assign analytic grnL_m
    1096            0 :             if (alg == Algorithm::VICO) {
    1097            0 :                 Kokkos::parallel_for(
    1098            0 :                     "Initialize Green's function ", grnL_m.getFieldRangePolicy(),
    1099            0 :                     KOKKOS_LAMBDA(const int i, const int j, const int k) {
    1100              :                         // go from local indices to global
    1101            0 :                         const int ig = i + ldom_g[0].first() - nghost_g;
    1102            0 :                         const int jg = j + ldom_g[1].first() - nghost_g;
    1103            0 :                         const int kg = k + ldom_g[2].first() - nghost_g;
    1104              : 
    1105            0 :                         bool isOutside = (ig > 2 * size[0] - 1);
    1106            0 :                         const Tg t     = ig * hs_m[0] + isOutside * origin[0];
    1107              : 
    1108            0 :                         isOutside  = (jg > 2 * size[1] - 1);
    1109            0 :                         const Tg u = jg * hs_m[1] + isOutside * origin[1];
    1110              : 
    1111            0 :                         isOutside  = (kg > 2 * size[2] - 1);
    1112            0 :                         const Tg v = kg * hs_m[2] + isOutside * origin[2];
    1113              : 
    1114            0 :                         Tg s = (t * t) + (u * u) + (v * v);
    1115            0 :                         s    = Kokkos::sqrt(s);
    1116              : 
    1117              :                         // assign the green's function value
    1118              :                         // if (0,0,0), assign L^2/2 (analytical limit of sinc)
    1119              : 
    1120            0 :                         const bool isOrig    = ((ig == 0 && jg == 0 && kg == 0));
    1121            0 :                         const Tg analyticLim = -L_sum * L_sum * 0.5;
    1122            0 :                         const Tg value = -2.0 * (Kokkos::sin(0.5 * L_sum * s) / (s + isOrig * 1.0))
    1123            0 :                                          * (Kokkos::sin(0.5 * L_sum * s) / (s + isOrig * 1.0));
    1124              : 
    1125            0 :                         view_g(i, j, k) = (!isOrig) * value + isOrig * analyticLim;
    1126              :                     });
    1127            0 :             } else if (alg == Algorithm::BIHARMONIC) {
    1128            0 :                 Kokkos::parallel_for(
    1129            0 :                     "Initialize Green's function ", grnL_m.getFieldRangePolicy(),
    1130            0 :                     KOKKOS_LAMBDA(const int i, const int j, const int k) {
    1131              :                         // go from local indices to global
    1132            0 :                         const int ig = i + ldom_g[0].first() - nghost_g;
    1133            0 :                         const int jg = j + ldom_g[1].first() - nghost_g;
    1134            0 :                         const int kg = k + ldom_g[2].first() - nghost_g;
    1135              : 
    1136            0 :                         bool isOutside = (ig > 2 * size[0] - 1);
    1137            0 :                         const Tg t     = ig * hs_m[0] + isOutside * origin[0];
    1138              : 
    1139            0 :                         isOutside  = (jg > 2 * size[1] - 1);
    1140            0 :                         const Tg u = jg * hs_m[1] + isOutside * origin[1];
    1141              : 
    1142            0 :                         isOutside  = (kg > 2 * size[2] - 1);
    1143            0 :                         const Tg v = kg * hs_m[2] + isOutside * origin[2];
    1144              : 
    1145            0 :                         Tg s = (t * t) + (u * u) + (v * v);
    1146            0 :                         s    = Kokkos::sqrt(s);
    1147              : 
    1148              :                         // assign value and replace with analytic limit at origin (0,0,0)
    1149            0 :                         const bool isOrig    = ((ig == 0 && jg == 0 && kg == 0));
    1150            0 :                         const Tg analyticLim = -L_sum * L_sum * L_sum * L_sum / 8.0;
    1151            0 :                         const Tg value = -((2 - (L_sum * L_sum * s * s)) * Kokkos::cos(L_sum * s)
    1152            0 :                                            + 2 * L_sum * s * Kokkos::sin(L_sum * s) - 2)
    1153            0 :                                          / (2 * s * s * s * s + isOrig * 1.0);
    1154              : 
    1155            0 :                         view_g(i, j, k) = (!isOrig) * value + isOrig * analyticLim;
    1156              :                     });
    1157              :             }
    1158              : 
    1159              :             // start a timer
    1160            0 :             static IpplTimings::TimerRef fft4 = IpplTimings::getTimer("FFT: Precomputation");
    1161            0 :             IpplTimings::startTimer(fft4);
    1162              : 
    1163              :             // inverse Fourier transform of the green's function for precomputation
    1164            0 :             fft4n_m->transform(BACKWARD, grnL_m);
    1165              : 
    1166            0 :             IpplTimings::stopTimer(fft4);
    1167              : 
    1168              :             // Restrict transformed grnL_m to 2N domain after precomputation step
    1169              : 
    1170              :             // get the field data first
    1171            0 :             typename Field_t::view_type view = grn_mr.getView();
    1172            0 :             const int nghost                 = grn_mr.getNghost();
    1173            0 :             const auto& ldom                 = layout2_m->getLocalNDIndex();
    1174              : 
    1175              :             // start a timer
    1176            0 :             static IpplTimings::TimerRef ifftshift = IpplTimings::getTimer("Vico shift loop");
    1177            0 :             IpplTimings::startTimer(ifftshift);
    1178              : 
    1179              :             // get number of ranks to see if need communication
    1180            0 :             const int ranks = Comm->size();
    1181              : 
    1182            0 :             if (ranks > 1) {
    1183            0 :                 communicateVico(size, view_g, ldom_g, nghost_g, view, ldom, nghost);
    1184              :             } else {
    1185              :                 // restrict the green's function to a (2N)^3 grid from the (4N)^3 grid
    1186              :                 using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<3>>;
    1187            0 :                 Kokkos::parallel_for(
    1188              :                     "Restrict domain of Green's function from 4N to 2N",
    1189            0 :                     mdrange_type({nghost, nghost, nghost}, {view.extent(0) - nghost - size[0],
    1190            0 :                                                             view.extent(1) - nghost - size[1],
    1191            0 :                                                             view.extent(2) - nghost - size[2]}),
    1192            0 :                     KOKKOS_LAMBDA(const int i, const int j, const int k) {
    1193              :                         // go from local indices to global
    1194            0 :                         const int ig = i + ldom[0].first() - nghost;
    1195            0 :                         const int jg = j + ldom[1].first() - nghost;
    1196            0 :                         const int kg = k + ldom[2].first() - nghost;
    1197              : 
    1198            0 :                         const int ig2 = i + ldom_g[0].first() - nghost_g;
    1199            0 :                         const int jg2 = j + ldom_g[1].first() - nghost_g;
    1200            0 :                         const int kg2 = k + ldom_g[2].first() - nghost_g;
    1201              : 
    1202            0 :                         const bool isOrig = (ig == ig2) && (jg == jg2) && (kg == kg2);
    1203            0 :                         view(i, j, k)     = isOrig * real(view_g(i, j, k));
    1204              : 
    1205              :                         // Now fill the rest of the field
    1206            0 :                         const int s = 2 * size[0] - ig - 1 - ldom_g[0].first() + nghost_g;
    1207            0 :                         const int p = 2 * size[1] - jg - 1 - ldom_g[1].first() + nghost_g;
    1208            0 :                         const int q = 2 * size[2] - kg - 1 - ldom_g[2].first() + nghost_g;
    1209              : 
    1210            0 :                         view(s, j, k) = real(view_g(i + 1, j, k));
    1211            0 :                         view(i, p, k) = real(view_g(i, j + 1, k));
    1212            0 :                         view(i, j, q) = real(view_g(i, j, k + 1));
    1213            0 :                         view(s, j, q) = real(view_g(i + 1, j, k + 1));
    1214            0 :                         view(s, p, k) = real(view_g(i + 1, j + 1, k));
    1215            0 :                         view(i, p, q) = real(view_g(i, j + 1, k + 1));
    1216            0 :                         view(s, p, q) = real(view_g(i + 1, j + 1, k + 1));
    1217              :                     });
    1218              :             }
    1219            0 :             IpplTimings::stopTimer(ifftshift);
    1220              : 
    1221            0 :         } else if (alg == Algorithm::DCT_VICO) {
    1222            0 :             Vector_t l(hr_m * nr_m);
    1223            0 :             Vector_t hs_m;
    1224            0 :             double L_sum(0.0);
    1225              : 
    1226              :             // compute length of the physical domain
    1227              :             // compute Fourier domain spacing
    1228            0 :             for (unsigned int i = 0; i < Dim; ++i) {
    1229            0 :                 hs_m[i] = Kokkos::numbers::pi_v<Trhs> * 0.5 / l[i];
    1230            0 :                 L_sum   = L_sum + l[i] * l[i];
    1231              :             }
    1232              : 
    1233              :             // define the origin of the 2N+1 grid
    1234            0 :             Vector_t origin = 0.0;
    1235              : 
    1236              :             // set mesh for the 2N+1 mesh
    1237            0 :             mesh2n1_m->setMeshSpacing(hs_m);
    1238              : 
    1239              :             // size of truncation window
    1240            0 :             L_sum = Kokkos::sqrt(L_sum);
    1241            0 :             L_sum = 1.1 * L_sum;
    1242              : 
    1243              :             // initialize grn2n1_m
    1244            0 :             Vector<int, Dim> size = nr_m;
    1245              : 
    1246              :             // initialize grn2n1_m
    1247            0 :             typename Field_t::view_type view_g2n1 = grn2n1_m.getView();
    1248            0 :             const int nghost_g2n1                 = grn2n1_m.getNghost();
    1249            0 :             const auto& ldom_g2n1                 = layout2n1_m->getLocalNDIndex();
    1250              : 
    1251            0 :             Kokkos::parallel_for(
    1252            0 :                 "Initialize 2N+1 Green's function ", grn2n1_m.getFieldRangePolicy(),
    1253            0 :                 KOKKOS_LAMBDA(const int i, const int j, const int k) {
    1254              :                     // go from local indices to global
    1255            0 :                     const int ig = i + ldom_g2n1[0].first() - nghost_g2n1;
    1256            0 :                     const int jg = j + ldom_g2n1[1].first() - nghost_g2n1;
    1257            0 :                     const int kg = k + ldom_g2n1[2].first() - nghost_g2n1;
    1258              : 
    1259            0 :                     double t = ig * hs_m[0];
    1260            0 :                     double u = jg * hs_m[1];
    1261            0 :                     double v = kg * hs_m[2];
    1262              : 
    1263            0 :                     double s = (t * t) + (u * u) + (v * v);
    1264            0 :                     s        = Kokkos::sqrt(s);
    1265              : 
    1266            0 :                     const bool isOrig = ((ig == 0 && jg == 0 && kg == 0));
    1267            0 :                     const double val  = -2.0 * (Kokkos::sin(0.5 * L_sum * s) / (s + isOrig * 1.0))
    1268            0 :                                        * (Kokkos::sin(0.5 * L_sum * s) / (s + isOrig * 1.0));
    1269            0 :                     const double analyticLim = -L_sum * L_sum * 0.5;
    1270              : 
    1271            0 :                     view_g2n1(i, j, k) = ((!isOrig) * val) + (isOrig * analyticLim);
    1272              :                 });
    1273              : 
    1274              :             // start a timer
    1275            0 :             static IpplTimings::TimerRef fft4 = IpplTimings::getTimer("FFT: Precomputation");
    1276            0 :             IpplTimings::startTimer(fft4);
    1277              : 
    1278              :             // inverse DCT transform of 2N+1 green's function for the precomputation
    1279            0 :             fft2n1_m->transform(BACKWARD, grn2n1_m);
    1280              : 
    1281            0 :             IpplTimings::stopTimer(fft4);
    1282              : 
    1283              :             // Restrict transformed grn2n1_m to 2N domain after precomputation step
    1284              : 
    1285              :             // get the field data first
    1286            0 :             typename Field_t::view_type view = grn_mr.getView();
    1287            0 :             const int nghost                 = grn_mr.getNghost();
    1288            0 :             const auto& ldom                 = layout2_m->getLocalNDIndex();
    1289              : 
    1290              :             // start a timer
    1291            0 :             static IpplTimings::TimerRef ifftshift = IpplTimings::getTimer("Vico shift loop");
    1292            0 :             IpplTimings::startTimer(ifftshift);
    1293              : 
    1294              :             // get number of ranks to see if need communication
    1295            0 :             int ranks = ippl::Comm->size();
    1296              : 
    1297              :             // range type for Kokkos loop
    1298              :             using mdrange_type = Kokkos::MDRangePolicy<Kokkos::Rank<3>>;
    1299              : 
    1300            0 :             if (ranks > 1) {
    1301            0 :                 communicateVico(size, view_g2n1, ldom_g2n1, nghost_g2n1, view, ldom, nghost);
    1302              :             } else {
    1303              :                 // restrict the green's function to a (2N)^3 grid from the (2N+1)^3 grid
    1304            0 :                 Kokkos::parallel_for(
    1305              :                     "Restrict domain of Green's function from 2N+1 to 2N",
    1306            0 :                     mdrange_type({nghost, nghost, nghost}, {view.extent(0) - nghost - size[0],
    1307            0 :                                                             view.extent(1) - nghost - size[1],
    1308            0 :                                                             view.extent(2) - nghost - size[2]}),
    1309            0 :                     KOKKOS_LAMBDA(const int i, const int j, const int k) {
    1310              :                         // go from local indices to global
    1311            0 :                         const int ig = i + ldom[0].first() - nghost;
    1312            0 :                         const int jg = j + ldom[1].first() - nghost;
    1313            0 :                         const int kg = k + ldom[2].first() - nghost;
    1314              : 
    1315            0 :                         const int ig2 = i + ldom_g2n1[0].first() - nghost_g2n1;
    1316            0 :                         const int jg2 = j + ldom_g2n1[1].first() - nghost_g2n1;
    1317            0 :                         const int kg2 = k + ldom_g2n1[2].first() - nghost_g2n1;
    1318              : 
    1319            0 :                         const bool isOrig = ((ig == ig2) && (jg == jg2) && (kg == kg2));
    1320            0 :                         view(i, j, k)     = isOrig * view_g2n1(i, j, k);
    1321              : 
    1322              :                         // Now fill the rest of the field
    1323            0 :                         const int s = 2 * size[0] - ig - 1 - ldom_g2n1[0].first() + nghost_g2n1;
    1324            0 :                         const int p = 2 * size[1] - jg - 1 - ldom_g2n1[1].first() + nghost_g2n1;
    1325            0 :                         const int q = 2 * size[2] - kg - 1 - ldom_g2n1[2].first() + nghost_g2n1;
    1326              : 
    1327            0 :                         view(s, j, k) = view_g2n1(i + 1, j, k);
    1328            0 :                         view(i, p, k) = view_g2n1(i, j + 1, k);
    1329            0 :                         view(i, j, q) = view_g2n1(i, j, k + 1);
    1330            0 :                         view(s, j, q) = view_g2n1(i + 1, j, k + 1);
    1331            0 :                         view(s, p, k) = view_g2n1(i + 1, j + 1, k);
    1332            0 :                         view(i, p, q) = view_g2n1(i, j + 1, k + 1);
    1333            0 :                         view(s, p, q) = view_g2n1(i + 1, j + 1, k + 1);
    1334              :                     });
    1335              :             }
    1336              : 
    1337            0 :         } else {
    1338              :             // Hockney case
    1339              : 
    1340              :             // calculate square of the mesh spacing for each dimension
    1341            0 :             Vector_t hrsq(hr_m * hr_m);
    1342              : 
    1343              :             // use the grnIField_m helper field to compute Green's function
    1344            0 :             for (unsigned int i = 0; i < Dim; ++i) {
    1345            0 :                 grn_mr = grn_mr + grnIField_m[i] * hrsq[i];
    1346              :             }
    1347              : 
    1348            0 :             grn_mr = -1.0 / (4.0 * pi * sqrt(grn_mr));
    1349              : 
    1350            0 :             typename Field_t::view_type view = grn_mr.getView();
    1351            0 :             const int nghost                 = grn_mr.getNghost();
    1352            0 :             const auto& ldom                 = layout2_m->getLocalNDIndex();
    1353              : 
    1354              :             // Kokkos parallel for loop to find (0,0,0) point and regularize
    1355            0 :             Kokkos::parallel_for(
    1356            0 :                 "Regularize Green's function ", grn_mr.getFieldRangePolicy(),
    1357            0 :                 KOKKOS_LAMBDA(const int i, const int j, const int k) {
    1358              :                     // go from local indices to global
    1359            0 :                     const int ig = i + ldom[0].first() - nghost;
    1360            0 :                     const int jg = j + ldom[1].first() - nghost;
    1361            0 :                     const int kg = k + ldom[2].first() - nghost;
    1362              : 
    1363              :                     // if (0,0,0), assign to it 1/(4*pi)
    1364            0 :                     const bool isOrig = (ig == 0 && jg == 0 && kg == 0);
    1365            0 :                     view(i, j, k)     = isOrig * (-1.0 / (4.0 * pi)) + (!isOrig) * view(i, j, k);
    1366              :                 });
    1367            0 :         }
    1368              : 
    1369              :         // start a timer
    1370            0 :         static IpplTimings::TimerRef fftg = IpplTimings::getTimer("FFT: Green");
    1371            0 :         IpplTimings::startTimer(fftg);
    1372              : 
    1373              :         // perform the FFT of the Green's function for the convolution
    1374            0 :         fft_m->transform(FORWARD, grn_mr, grntr_m);
    1375              : 
    1376            0 :         IpplTimings::stopTimer(fftg);
    1377            0 :     };
    1378              : 
    1379              :     template <typename FieldLHS, typename FieldRHS>
    1380            0 :     void FFTOpenPoissonSolver<FieldLHS, FieldRHS>::communicateVico(
    1381              :         Vector<int, Dim> size, typename CxField_gt::view_type view_g,
    1382              :         const ippl::NDIndex<Dim> ldom_g, const int nghost_g, typename Field_t::view_type view,
    1383              :         const ippl::NDIndex<Dim> ldom, const int nghost) {
    1384            0 :         const auto& lDomains2 = layout2_m->getHostLocalDomains();
    1385            0 :         const auto& lDomains4 = layout4_m->getHostLocalDomains();
    1386              : 
    1387            0 :         std::vector<MPI_Request> requests(0);
    1388            0 :         const int ranks = Comm->size();
    1389              : 
    1390              :         // 1st step: Define 8 domains corresponding to the different quadrants
    1391            0 :         ippl::NDIndex<Dim> none;
    1392            0 :         for (unsigned i = 0; i < Dim; i++) {
    1393            0 :             none[i] = ippl::Index(size[i]);
    1394              :         }
    1395              : 
    1396            0 :         ippl::NDIndex<Dim> x;
    1397            0 :         x[0] = ippl::Index(size[0], 2 * size[0] - 1);
    1398            0 :         x[1] = ippl::Index(size[1]);
    1399            0 :         x[2] = ippl::Index(size[2]);
    1400              : 
    1401            0 :         ippl::NDIndex<Dim> y;
    1402            0 :         y[0] = ippl::Index(size[0]);
    1403            0 :         y[1] = ippl::Index(size[1], 2 * size[1] - 1);
    1404            0 :         y[2] = ippl::Index(size[2]);
    1405              : 
    1406            0 :         ippl::NDIndex<Dim> z;
    1407            0 :         z[0] = ippl::Index(size[0]);
    1408            0 :         z[1] = ippl::Index(size[1]);
    1409            0 :         z[2] = ippl::Index(size[2], 2 * size[2] - 1);
    1410              : 
    1411            0 :         ippl::NDIndex<Dim> xy;
    1412            0 :         xy[0] = ippl::Index(size[0], 2 * size[0] - 1);
    1413            0 :         xy[1] = ippl::Index(size[1], 2 * size[1] - 1);
    1414            0 :         xy[2] = ippl::Index(size[2]);
    1415              : 
    1416            0 :         ippl::NDIndex<Dim> xz;
    1417            0 :         xz[0] = ippl::Index(size[0], 2 * size[0] - 1);
    1418            0 :         xz[1] = ippl::Index(size[1]);
    1419            0 :         xz[2] = ippl::Index(size[2], 2 * size[2] - 1);
    1420              : 
    1421            0 :         ippl::NDIndex<Dim> yz;
    1422            0 :         yz[0] = ippl::Index(size[0]);
    1423            0 :         yz[1] = ippl::Index(size[1], 2 * size[1] - 1);
    1424            0 :         yz[2] = ippl::Index(size[2], 2 * size[2] - 1);
    1425              : 
    1426            0 :         ippl::NDIndex<Dim> xyz;
    1427            0 :         for (unsigned i = 0; i < Dim; i++) {
    1428            0 :             xyz[i] = ippl::Index(size[i], 2 * size[i] - 1);
    1429              :         }
    1430              : 
    1431              :         // 2nd step: send
    1432            0 :         for (int i = 0; i < ranks; ++i) {
    1433            0 :             auto domain2 = lDomains2[i];
    1434              : 
    1435            0 :             if (domain2.touches(none)) {
    1436            0 :                 auto intersection = domain2.intersect(none);
    1437              : 
    1438            0 :                 if (ldom_g.touches(intersection)) {
    1439            0 :                     intersection = intersection.intersect(ldom_g);
    1440              : 
    1441            0 :                     solver_send(mpi::tag::VICO_SOLVER, 0, i, intersection, ldom_g, nghost_g, view_g,
    1442            0 :                                 fd_m, requests);
    1443              :                 }
    1444              :             }
    1445              : 
    1446            0 :             if (domain2.touches(x)) {
    1447            0 :                 auto intersection = domain2.intersect(x);
    1448            0 :                 auto xdom         = ippl::Index((2 * size[0] - intersection[0].first()),
    1449            0 :                                                 (2 * size[0] - intersection[0].last()), -1);
    1450              : 
    1451            0 :                 ippl::NDIndex<Dim> domain4;
    1452            0 :                 domain4[0] = xdom;
    1453            0 :                 domain4[1] = intersection[1];
    1454            0 :                 domain4[2] = intersection[2];
    1455              : 
    1456            0 :                 if (ldom_g.touches(domain4)) {
    1457            0 :                     intersection = ldom_g.intersect(domain4);
    1458              : 
    1459            0 :                     solver_send(mpi::tag::VICO_SOLVER, 1, i, intersection, ldom_g, nghost_g, view_g,
    1460            0 :                                 fd_m, requests);
    1461              :                 }
    1462              :             }
    1463              : 
    1464            0 :             if (domain2.touches(y)) {
    1465            0 :                 auto intersection = domain2.intersect(y);
    1466            0 :                 auto ydom         = ippl::Index((2 * size[1] - intersection[1].first()),
    1467            0 :                                                 (2 * size[1] - intersection[1].last()), -1);
    1468              : 
    1469            0 :                 ippl::NDIndex<Dim> domain4;
    1470            0 :                 domain4[0] = intersection[0];
    1471            0 :                 domain4[1] = ydom;
    1472            0 :                 domain4[2] = intersection[2];
    1473              : 
    1474            0 :                 if (ldom_g.touches(domain4)) {
    1475            0 :                     intersection = ldom_g.intersect(domain4);
    1476              : 
    1477            0 :                     solver_send(mpi::tag::VICO_SOLVER, 2, i, intersection, ldom_g, nghost_g, view_g,
    1478            0 :                                 fd_m, requests);
    1479              :                 }
    1480              :             }
    1481              : 
    1482            0 :             if (domain2.touches(z)) {
    1483            0 :                 auto intersection = domain2.intersect(z);
    1484            0 :                 auto zdom         = ippl::Index((2 * size[2] - intersection[2].first()),
    1485            0 :                                                 (2 * size[2] - intersection[2].last()), -1);
    1486              : 
    1487            0 :                 ippl::NDIndex<Dim> domain4;
    1488            0 :                 domain4[0] = intersection[0];
    1489            0 :                 domain4[1] = intersection[1];
    1490            0 :                 domain4[2] = zdom;
    1491              : 
    1492            0 :                 if (ldom_g.touches(domain4)) {
    1493            0 :                     intersection = ldom_g.intersect(domain4);
    1494              : 
    1495            0 :                     solver_send(mpi::tag::VICO_SOLVER, 3, i, intersection, ldom_g, nghost_g, view_g,
    1496            0 :                                 fd_m, requests);
    1497              :                 }
    1498              :             }
    1499              : 
    1500            0 :             if (domain2.touches(xy)) {
    1501            0 :                 auto intersection = domain2.intersect(xy);
    1502            0 :                 auto xdom         = ippl::Index((2 * size[0] - intersection[0].first()),
    1503            0 :                                                 (2 * size[0] - intersection[0].last()), -1);
    1504            0 :                 auto ydom         = ippl::Index((2 * size[1] - intersection[1].first()),
    1505            0 :                                                 (2 * size[1] - intersection[1].last()), -1);
    1506              : 
    1507            0 :                 ippl::NDIndex<Dim> domain4;
    1508            0 :                 domain4[0] = xdom;
    1509            0 :                 domain4[1] = ydom;
    1510            0 :                 domain4[2] = intersection[2];
    1511              : 
    1512            0 :                 if (ldom_g.touches(domain4)) {
    1513            0 :                     intersection = ldom_g.intersect(domain4);
    1514              : 
    1515            0 :                     solver_send(mpi::tag::VICO_SOLVER, 4, i, intersection, ldom_g, nghost_g, view_g,
    1516            0 :                                 fd_m, requests);
    1517              :                 }
    1518              :             }
    1519              : 
    1520            0 :             if (domain2.touches(yz)) {
    1521            0 :                 auto intersection = domain2.intersect(yz);
    1522            0 :                 auto ydom         = ippl::Index((2 * size[1] - intersection[1].first()),
    1523            0 :                                                 (2 * size[1] - intersection[1].last()), -1);
    1524            0 :                 auto zdom         = ippl::Index((2 * size[2] - intersection[2].first()),
    1525            0 :                                                 (2 * size[2] - intersection[2].last()), -1);
    1526              : 
    1527            0 :                 ippl::NDIndex<Dim> domain4;
    1528            0 :                 domain4[0] = intersection[0];
    1529            0 :                 domain4[1] = ydom;
    1530            0 :                 domain4[2] = zdom;
    1531              : 
    1532            0 :                 if (ldom_g.touches(domain4)) {
    1533            0 :                     intersection = ldom_g.intersect(domain4);
    1534            0 :                     solver_send(mpi::tag::VICO_SOLVER, 5, i, intersection, ldom_g, nghost_g, view_g,
    1535            0 :                                 fd_m, requests);
    1536              :                 }
    1537              :             }
    1538              : 
    1539            0 :             if (domain2.touches(xz)) {
    1540            0 :                 auto intersection = domain2.intersect(xz);
    1541            0 :                 auto xdom         = ippl::Index((2 * size[0] - intersection[0].first()),
    1542            0 :                                                 (2 * size[0] - intersection[0].last()), -1);
    1543            0 :                 auto zdom         = ippl::Index((2 * size[2] - intersection[2].first()),
    1544            0 :                                                 (2 * size[2] - intersection[2].last()), -1);
    1545              : 
    1546            0 :                 ippl::NDIndex<Dim> domain4;
    1547            0 :                 domain4[0] = xdom;
    1548            0 :                 domain4[1] = intersection[1];
    1549            0 :                 domain4[2] = zdom;
    1550              : 
    1551            0 :                 if (ldom_g.touches(domain4)) {
    1552            0 :                     intersection = ldom_g.intersect(domain4);
    1553            0 :                     solver_send(mpi::tag::VICO_SOLVER, 6, i, intersection, ldom_g, nghost_g, view_g,
    1554            0 :                                 fd_m, requests);
    1555              :                 }
    1556              :             }
    1557              : 
    1558            0 :             if (domain2.touches(xyz)) {
    1559            0 :                 auto intersection = domain2.intersect(xyz);
    1560            0 :                 auto xdom         = ippl::Index((2 * size[0] - intersection[0].first()),
    1561            0 :                                                 (2 * size[0] - intersection[0].last()), -1);
    1562            0 :                 auto ydom         = ippl::Index((2 * size[1] - intersection[1].first()),
    1563            0 :                                                 (2 * size[1] - intersection[1].last()), -1);
    1564            0 :                 auto zdom         = ippl::Index((2 * size[2] - intersection[2].first()),
    1565            0 :                                                 (2 * size[2] - intersection[2].last()), -1);
    1566              : 
    1567            0 :                 ippl::NDIndex<Dim> domain4;
    1568            0 :                 domain4[0] = xdom;
    1569            0 :                 domain4[1] = ydom;
    1570            0 :                 domain4[2] = zdom;
    1571              : 
    1572            0 :                 if (ldom_g.touches(domain4)) {
    1573            0 :                     intersection = ldom_g.intersect(domain4);
    1574            0 :                     solver_send(mpi::tag::VICO_SOLVER, 7, i, intersection, ldom_g, nghost_g, view_g,
    1575            0 :                                 fd_m, requests);
    1576              :                 }
    1577              :             }
    1578              :         }
    1579              : 
    1580              :         // 3rd step: receive
    1581            0 :         for (int i = 0; i < ranks; ++i) {
    1582            0 :             if (ldom.touches(none)) {
    1583            0 :                 auto intersection = ldom.intersect(none);
    1584              : 
    1585            0 :                 if (lDomains4[i].touches(intersection)) {
    1586            0 :                     intersection = intersection.intersect(lDomains4[i]);
    1587              : 
    1588            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 0, i, intersection, ldom, nghost, view,
    1589            0 :                                 fd_m);
    1590              :                 }
    1591              :             }
    1592              : 
    1593            0 :             if (ldom.touches(x)) {
    1594            0 :                 auto intersection = ldom.intersect(x);
    1595              : 
    1596            0 :                 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
    1597            0 :                                         (2 * size[0] - intersection[0].last()), -1);
    1598              : 
    1599            0 :                 ippl::NDIndex<Dim> domain4;
    1600            0 :                 domain4[0] = xdom;
    1601            0 :                 domain4[1] = intersection[1];
    1602            0 :                 domain4[2] = intersection[2];
    1603              : 
    1604            0 :                 if (lDomains4[i].touches(domain4)) {
    1605            0 :                     domain4    = lDomains4[i].intersect(domain4);
    1606            0 :                     domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(),
    1607            0 :                                              2 * size[0] - domain4[0].last(), -1);
    1608              : 
    1609            0 :                     intersection = intersection.intersect(domain4);
    1610              : 
    1611            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 1, i, intersection, ldom, nghost, view, fd_m,
    1612              :                                 true, false, false);
    1613              :                 }
    1614              :             }
    1615              : 
    1616            0 :             if (ldom.touches(y)) {
    1617            0 :                 auto intersection = ldom.intersect(y);
    1618              : 
    1619            0 :                 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
    1620            0 :                                         (2 * size[1] - intersection[1].last()), -1);
    1621              : 
    1622            0 :                 ippl::NDIndex<Dim> domain4;
    1623            0 :                 domain4[0] = intersection[0];
    1624            0 :                 domain4[1] = ydom;
    1625            0 :                 domain4[2] = intersection[2];
    1626              : 
    1627            0 :                 if (lDomains4[i].touches(domain4)) {
    1628            0 :                     domain4    = lDomains4[i].intersect(domain4);
    1629            0 :                     domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(),
    1630            0 :                                              2 * size[1] - domain4[1].last(), -1);
    1631              : 
    1632            0 :                     intersection = intersection.intersect(domain4);
    1633              : 
    1634            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 2, i, intersection, ldom, nghost, view, fd_m,
    1635              :                                 false, true, false);
    1636              :                 }
    1637              :             }
    1638              : 
    1639            0 :             if (ldom.touches(z)) {
    1640            0 :                 auto intersection = ldom.intersect(z);
    1641              : 
    1642            0 :                 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
    1643            0 :                                         (2 * size[2] - intersection[2].last()), -1);
    1644              : 
    1645            0 :                 ippl::NDIndex<Dim> domain4;
    1646            0 :                 domain4[0] = intersection[0];
    1647            0 :                 domain4[1] = intersection[1];
    1648            0 :                 domain4[2] = zdom;
    1649              : 
    1650            0 :                 if (lDomains4[i].touches(domain4)) {
    1651            0 :                     domain4    = lDomains4[i].intersect(domain4);
    1652            0 :                     domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(),
    1653            0 :                                              2 * size[2] - domain4[2].last(), -1);
    1654              : 
    1655            0 :                     intersection = intersection.intersect(domain4);
    1656              : 
    1657            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 3, i, intersection, ldom, nghost, view, fd_m,
    1658              :                                 false, false, true);
    1659              :                 }
    1660              :             }
    1661              : 
    1662            0 :             if (ldom.touches(xy)) {
    1663            0 :                 auto intersection = ldom.intersect(xy);
    1664              : 
    1665            0 :                 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
    1666            0 :                                         (2 * size[0] - intersection[0].last()), -1);
    1667            0 :                 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
    1668            0 :                                         (2 * size[1] - intersection[1].last()), -1);
    1669              : 
    1670            0 :                 ippl::NDIndex<Dim> domain4;
    1671            0 :                 domain4[0] = xdom;
    1672            0 :                 domain4[1] = ydom;
    1673            0 :                 domain4[2] = intersection[2];
    1674              : 
    1675            0 :                 if (lDomains4[i].touches(domain4)) {
    1676            0 :                     domain4    = lDomains4[i].intersect(domain4);
    1677            0 :                     domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(),
    1678            0 :                                              2 * size[0] - domain4[0].last(), -1);
    1679            0 :                     domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(),
    1680            0 :                                              2 * size[1] - domain4[1].last(), -1);
    1681              : 
    1682            0 :                     intersection = intersection.intersect(domain4);
    1683              : 
    1684            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 4, i, intersection, ldom, nghost, view, fd_m,
    1685              :                                 true, true, false);
    1686              :                 }
    1687              :             }
    1688              : 
    1689            0 :             if (ldom.touches(yz)) {
    1690            0 :                 auto intersection = ldom.intersect(yz);
    1691              : 
    1692            0 :                 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
    1693            0 :                                         (2 * size[1] - intersection[1].last()), -1);
    1694            0 :                 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
    1695            0 :                                         (2 * size[2] - intersection[2].last()), -1);
    1696              : 
    1697            0 :                 ippl::NDIndex<Dim> domain4;
    1698            0 :                 domain4[0] = intersection[0];
    1699            0 :                 domain4[1] = ydom;
    1700            0 :                 domain4[2] = zdom;
    1701              : 
    1702            0 :                 if (lDomains4[i].touches(domain4)) {
    1703            0 :                     domain4    = lDomains4[i].intersect(domain4);
    1704            0 :                     domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(),
    1705            0 :                                              2 * size[1] - domain4[1].last(), -1);
    1706            0 :                     domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(),
    1707            0 :                                              2 * size[2] - domain4[2].last(), -1);
    1708              : 
    1709            0 :                     intersection = intersection.intersect(domain4);
    1710              : 
    1711            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 5, i, intersection, ldom, nghost, view, fd_m,
    1712              :                                 false, true, true);
    1713              :                 }
    1714              :             }
    1715              : 
    1716            0 :             if (ldom.touches(xz)) {
    1717            0 :                 auto intersection = ldom.intersect(xz);
    1718              : 
    1719            0 :                 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
    1720            0 :                                         (2 * size[0] - intersection[0].last()), -1);
    1721            0 :                 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
    1722            0 :                                         (2 * size[2] - intersection[2].last()), -1);
    1723              : 
    1724            0 :                 ippl::NDIndex<Dim> domain4;
    1725            0 :                 domain4[0] = xdom;
    1726            0 :                 domain4[1] = intersection[1];
    1727            0 :                 domain4[2] = zdom;
    1728              : 
    1729            0 :                 if (lDomains4[i].touches(domain4)) {
    1730            0 :                     domain4    = lDomains4[i].intersect(domain4);
    1731            0 :                     domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(),
    1732            0 :                                              2 * size[0] - domain4[0].last(), -1);
    1733            0 :                     domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(),
    1734            0 :                                              2 * size[2] - domain4[2].last(), -1);
    1735              : 
    1736            0 :                     intersection = intersection.intersect(domain4);
    1737              : 
    1738            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 6, i, intersection, ldom, nghost, view, fd_m,
    1739              :                                 true, false, true);
    1740              :                 }
    1741              :             }
    1742              : 
    1743            0 :             if (ldom.touches(xyz)) {
    1744            0 :                 auto intersection = ldom.intersect(xyz);
    1745              : 
    1746            0 :                 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
    1747            0 :                                         (2 * size[0] - intersection[0].last()), -1);
    1748            0 :                 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
    1749            0 :                                         (2 * size[1] - intersection[1].last()), -1);
    1750            0 :                 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
    1751            0 :                                         (2 * size[2] - intersection[2].last()), -1);
    1752              : 
    1753            0 :                 ippl::NDIndex<Dim> domain4;
    1754            0 :                 domain4[0] = xdom;
    1755            0 :                 domain4[1] = ydom;
    1756            0 :                 domain4[2] = zdom;
    1757              : 
    1758            0 :                 if (lDomains4[i].touches(domain4)) {
    1759            0 :                     domain4    = lDomains4[i].intersect(domain4);
    1760            0 :                     domain4[0] = ippl::Index(2 * size[0] - domain4[0].first(),
    1761            0 :                                              2 * size[0] - domain4[0].last(), -1);
    1762            0 :                     domain4[1] = ippl::Index(2 * size[1] - domain4[1].first(),
    1763            0 :                                              2 * size[1] - domain4[1].last(), -1);
    1764            0 :                     domain4[2] = ippl::Index(2 * size[2] - domain4[2].first(),
    1765            0 :                                              2 * size[2] - domain4[2].last(), -1);
    1766              : 
    1767            0 :                     intersection = intersection.intersect(domain4);
    1768              : 
    1769            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 7, i, intersection, ldom, nghost, view, fd_m,
    1770              :                                 true, true, true);
    1771              :                 }
    1772              :             }
    1773              :         }
    1774              : 
    1775            0 :         if (requests.size() > 0) {
    1776            0 :             MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
    1777              :         }
    1778            0 :         ippl::Comm->freeAllBuffers();
    1779            0 :     };
    1780              : 
    1781              :     // CommunicateVico for DCT_VICO (2N+1 to 2N)
    1782              :     template <typename FieldLHS, typename FieldRHS>
    1783            0 :     void FFTOpenPoissonSolver<FieldLHS, FieldRHS>::communicateVico(
    1784              :         Vector<int, Dim> size, typename Field_t::view_type view_g, const ippl::NDIndex<Dim> ldom_g,
    1785              :         const int nghost_g, typename Field_t::view_type view, const ippl::NDIndex<Dim> ldom,
    1786              :         const int nghost) {
    1787            0 :         const auto& lDomains2   = layout2_m->getHostLocalDomains();
    1788            0 :         const auto& lDomains2n1 = layout2n1_m->getHostLocalDomains();
    1789              : 
    1790            0 :         std::vector<MPI_Request> requests(0);
    1791            0 :         const int ranks = ippl::Comm->size();
    1792              : 
    1793              :         // 1st step: Define 8 domains corresponding to the different quadrants
    1794            0 :         ippl::NDIndex<Dim> none;
    1795            0 :         for (unsigned i = 0; i < Dim; i++) {
    1796            0 :             none[i] = ippl::Index(size[i]);
    1797              :         }
    1798              : 
    1799            0 :         ippl::NDIndex<Dim> x;
    1800            0 :         x[0] = ippl::Index(size[0], 2 * size[0] - 1);
    1801            0 :         x[1] = ippl::Index(size[1]);
    1802            0 :         x[2] = ippl::Index(size[2]);
    1803              : 
    1804            0 :         ippl::NDIndex<Dim> y;
    1805            0 :         y[0] = ippl::Index(size[0]);
    1806            0 :         y[1] = ippl::Index(size[1], 2 * size[1] - 1);
    1807            0 :         y[2] = ippl::Index(size[2]);
    1808              : 
    1809            0 :         ippl::NDIndex<Dim> z;
    1810            0 :         z[0] = ippl::Index(size[0]);
    1811            0 :         z[1] = ippl::Index(size[1]);
    1812            0 :         z[2] = ippl::Index(size[2], 2 * size[2] - 1);
    1813              : 
    1814            0 :         ippl::NDIndex<Dim> xy;
    1815            0 :         xy[0] = ippl::Index(size[0], 2 * size[0] - 1);
    1816            0 :         xy[1] = ippl::Index(size[1], 2 * size[1] - 1);
    1817            0 :         xy[2] = ippl::Index(size[2]);
    1818              : 
    1819            0 :         ippl::NDIndex<Dim> xz;
    1820            0 :         xz[0] = ippl::Index(size[0], 2 * size[0] - 1);
    1821            0 :         xz[1] = ippl::Index(size[1]);
    1822            0 :         xz[2] = ippl::Index(size[2], 2 * size[2] - 1);
    1823              : 
    1824            0 :         ippl::NDIndex<Dim> yz;
    1825            0 :         yz[0] = ippl::Index(size[0]);
    1826            0 :         yz[1] = ippl::Index(size[1], 2 * size[1] - 1);
    1827            0 :         yz[2] = ippl::Index(size[2], 2 * size[2] - 1);
    1828              : 
    1829            0 :         ippl::NDIndex<Dim> xyz;
    1830            0 :         for (unsigned i = 0; i < Dim; i++) {
    1831            0 :             xyz[i] = ippl::Index(size[i], 2 * size[i] - 1);
    1832              :         }
    1833              : 
    1834              :         // 2nd step: send
    1835            0 :         for (int i = 0; i < ranks; ++i) {
    1836            0 :             auto domain2 = lDomains2[i];
    1837              : 
    1838            0 :             if (domain2.touches(none)) {
    1839            0 :                 auto intersection = domain2.intersect(none);
    1840              : 
    1841            0 :                 if (ldom_g.touches(intersection)) {
    1842            0 :                     intersection = intersection.intersect(ldom_g);
    1843              : 
    1844            0 :                     solver_send(mpi::tag::VICO_SOLVER, 0, i, intersection, ldom_g, nghost_g, view_g,
    1845            0 :                                 fd_m, requests);
    1846              :                 }
    1847              :             }
    1848              : 
    1849            0 :             if (domain2.touches(x)) {
    1850            0 :                 auto intersection = domain2.intersect(x);
    1851            0 :                 auto xdom         = ippl::Index((2 * size[0] - intersection[0].first()),
    1852            0 :                                                 (2 * size[0] - intersection[0].last()), -1);
    1853              : 
    1854            0 :                 ippl::NDIndex<Dim> domain2n1;
    1855            0 :                 domain2n1[0] = xdom;
    1856            0 :                 domain2n1[1] = intersection[1];
    1857            0 :                 domain2n1[2] = intersection[2];
    1858              : 
    1859            0 :                 if (ldom_g.touches(domain2n1)) {
    1860            0 :                     intersection = ldom_g.intersect(domain2n1);
    1861              : 
    1862            0 :                     solver_send(mpi::tag::VICO_SOLVER, 1, i, intersection, ldom_g, nghost_g, view_g,
    1863            0 :                                 fd_m, requests);
    1864              :                 }
    1865              :             }
    1866              : 
    1867            0 :             if (domain2.touches(y)) {
    1868            0 :                 auto intersection = domain2.intersect(y);
    1869            0 :                 auto ydom         = ippl::Index((2 * size[1] - intersection[1].first()),
    1870            0 :                                                 (2 * size[1] - intersection[1].last()), -1);
    1871              : 
    1872            0 :                 ippl::NDIndex<Dim> domain2n1;
    1873            0 :                 domain2n1[0] = intersection[0];
    1874            0 :                 domain2n1[1] = ydom;
    1875            0 :                 domain2n1[2] = intersection[2];
    1876              : 
    1877            0 :                 if (ldom_g.touches(domain2n1)) {
    1878            0 :                     intersection = ldom_g.intersect(domain2n1);
    1879              : 
    1880            0 :                     solver_send(mpi::tag::VICO_SOLVER, 2, i, intersection, ldom_g, nghost_g, view_g,
    1881            0 :                                 fd_m, requests);
    1882              :                 }
    1883              :             }
    1884              : 
    1885            0 :             if (domain2.touches(z)) {
    1886            0 :                 auto intersection = domain2.intersect(z);
    1887            0 :                 auto zdom         = ippl::Index((2 * size[2] - intersection[2].first()),
    1888            0 :                                                 (2 * size[2] - intersection[2].last()), -1);
    1889              : 
    1890            0 :                 ippl::NDIndex<Dim> domain2n1;
    1891            0 :                 domain2n1[0] = intersection[0];
    1892            0 :                 domain2n1[1] = intersection[1];
    1893            0 :                 domain2n1[2] = zdom;
    1894              : 
    1895            0 :                 if (ldom_g.touches(domain2n1)) {
    1896            0 :                     intersection = ldom_g.intersect(domain2n1);
    1897              : 
    1898            0 :                     solver_send(mpi::tag::VICO_SOLVER, 3, i, intersection, ldom_g, nghost_g, view_g,
    1899            0 :                                 fd_m, requests);
    1900              :                 }
    1901              :             }
    1902              : 
    1903            0 :             if (domain2.touches(xy)) {
    1904            0 :                 auto intersection = domain2.intersect(xy);
    1905            0 :                 auto xdom         = ippl::Index((2 * size[0] - intersection[0].first()),
    1906            0 :                                                 (2 * size[0] - intersection[0].last()), -1);
    1907            0 :                 auto ydom         = ippl::Index((2 * size[1] - intersection[1].first()),
    1908            0 :                                                 (2 * size[1] - intersection[1].last()), -1);
    1909              : 
    1910            0 :                 ippl::NDIndex<Dim> domain2n1;
    1911            0 :                 domain2n1[0] = xdom;
    1912            0 :                 domain2n1[1] = ydom;
    1913            0 :                 domain2n1[2] = intersection[2];
    1914              : 
    1915            0 :                 if (ldom_g.touches(domain2n1)) {
    1916            0 :                     intersection = ldom_g.intersect(domain2n1);
    1917              : 
    1918            0 :                     solver_send(mpi::tag::VICO_SOLVER, 4, i, intersection, ldom_g, nghost_g, view_g,
    1919            0 :                                 fd_m, requests);
    1920              :                 }
    1921              :             }
    1922              : 
    1923            0 :             if (domain2.touches(yz)) {
    1924            0 :                 auto intersection = domain2.intersect(yz);
    1925            0 :                 auto ydom         = ippl::Index((2 * size[1] - intersection[1].first()),
    1926            0 :                                                 (2 * size[1] - intersection[1].last()), -1);
    1927            0 :                 auto zdom         = ippl::Index((2 * size[2] - intersection[2].first()),
    1928            0 :                                                 (2 * size[2] - intersection[2].last()), -1);
    1929              : 
    1930            0 :                 ippl::NDIndex<Dim> domain2n1;
    1931            0 :                 domain2n1[0] = intersection[0];
    1932            0 :                 domain2n1[1] = ydom;
    1933            0 :                 domain2n1[2] = zdom;
    1934              : 
    1935            0 :                 if (ldom_g.touches(domain2n1)) {
    1936            0 :                     intersection = ldom_g.intersect(domain2n1);
    1937              : 
    1938            0 :                     solver_send(mpi::tag::VICO_SOLVER, 5, i, intersection, ldom_g, nghost_g, view_g,
    1939            0 :                                 fd_m, requests);
    1940              :                 }
    1941              :             }
    1942              : 
    1943            0 :             if (domain2.touches(xz)) {
    1944            0 :                 auto intersection = domain2.intersect(xz);
    1945            0 :                 auto xdom         = ippl::Index((2 * size[0] - intersection[0].first()),
    1946            0 :                                                 (2 * size[0] - intersection[0].last()), -1);
    1947            0 :                 auto zdom         = ippl::Index((2 * size[2] - intersection[2].first()),
    1948            0 :                                                 (2 * size[2] - intersection[2].last()), -1);
    1949              : 
    1950            0 :                 ippl::NDIndex<Dim> domain2n1;
    1951            0 :                 domain2n1[0] = xdom;
    1952            0 :                 domain2n1[1] = intersection[1];
    1953            0 :                 domain2n1[2] = zdom;
    1954              : 
    1955            0 :                 if (ldom_g.touches(domain2n1)) {
    1956            0 :                     intersection = ldom_g.intersect(domain2n1);
    1957              : 
    1958            0 :                     solver_send(mpi::tag::VICO_SOLVER, 6, i, intersection, ldom_g, nghost_g, view_g,
    1959            0 :                                 fd_m, requests);
    1960              :                 }
    1961              :             }
    1962              : 
    1963            0 :             if (domain2.touches(xyz)) {
    1964            0 :                 auto intersection = domain2.intersect(xyz);
    1965            0 :                 auto xdom         = ippl::Index((2 * size[0] - intersection[0].first()),
    1966            0 :                                                 (2 * size[0] - intersection[0].last()), -1);
    1967            0 :                 auto ydom         = ippl::Index((2 * size[1] - intersection[1].first()),
    1968            0 :                                                 (2 * size[1] - intersection[1].last()), -1);
    1969            0 :                 auto zdom         = ippl::Index((2 * size[2] - intersection[2].first()),
    1970            0 :                                                 (2 * size[2] - intersection[2].last()), -1);
    1971              : 
    1972            0 :                 ippl::NDIndex<Dim> domain2n1;
    1973            0 :                 domain2n1[0] = xdom;
    1974            0 :                 domain2n1[1] = ydom;
    1975            0 :                 domain2n1[2] = zdom;
    1976              : 
    1977            0 :                 if (ldom_g.touches(domain2n1)) {
    1978            0 :                     intersection = ldom_g.intersect(domain2n1);
    1979              : 
    1980            0 :                     solver_send(mpi::tag::VICO_SOLVER, 7, i, intersection, ldom_g, nghost_g, view_g,
    1981            0 :                                 fd_m, requests);
    1982              :                 }
    1983              :             }
    1984              :         }
    1985              : 
    1986              :         // 3rd step: receive
    1987            0 :         for (int i = 0; i < ranks; ++i) {
    1988            0 :             if (ldom.touches(none)) {
    1989            0 :                 auto intersection = ldom.intersect(none);
    1990              : 
    1991            0 :                 if (lDomains2n1[i].touches(intersection)) {
    1992            0 :                     intersection = intersection.intersect(lDomains2n1[i]);
    1993              : 
    1994            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 0, i, intersection, ldom, nghost, view,
    1995            0 :                                 fd_m);
    1996              :                 }
    1997              :             }
    1998              : 
    1999            0 :             if (ldom.touches(x)) {
    2000            0 :                 auto intersection = ldom.intersect(x);
    2001              : 
    2002            0 :                 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
    2003            0 :                                         (2 * size[0] - intersection[0].last()), -1);
    2004              : 
    2005            0 :                 ippl::NDIndex<Dim> domain2n1;
    2006            0 :                 domain2n1[0] = xdom;
    2007            0 :                 domain2n1[1] = intersection[1];
    2008            0 :                 domain2n1[2] = intersection[2];
    2009              : 
    2010            0 :                 if (lDomains2n1[i].touches(domain2n1)) {
    2011            0 :                     domain2n1    = lDomains2n1[i].intersect(domain2n1);
    2012            0 :                     domain2n1[0] = ippl::Index(2 * size[0] - domain2n1[0].first(),
    2013            0 :                                                2 * size[0] - domain2n1[0].last(), -1);
    2014              : 
    2015            0 :                     intersection = intersection.intersect(domain2n1);
    2016              : 
    2017            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 1, i, intersection, ldom, nghost, view, fd_m,
    2018              :                                 true, false, false);
    2019              :                 }
    2020              :             }
    2021              : 
    2022            0 :             if (ldom.touches(y)) {
    2023            0 :                 auto intersection = ldom.intersect(y);
    2024              : 
    2025            0 :                 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
    2026            0 :                                         (2 * size[1] - intersection[1].last()), -1);
    2027              : 
    2028            0 :                 ippl::NDIndex<Dim> domain2n1;
    2029            0 :                 domain2n1[0] = intersection[0];
    2030            0 :                 domain2n1[1] = ydom;
    2031            0 :                 domain2n1[2] = intersection[2];
    2032              : 
    2033            0 :                 if (lDomains2n1[i].touches(domain2n1)) {
    2034            0 :                     domain2n1    = lDomains2n1[i].intersect(domain2n1);
    2035            0 :                     domain2n1[1] = ippl::Index(2 * size[1] - domain2n1[1].first(),
    2036            0 :                                                2 * size[1] - domain2n1[1].last(), -1);
    2037              : 
    2038            0 :                     intersection = intersection.intersect(domain2n1);
    2039              : 
    2040            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 2, i, intersection, ldom, nghost, view, fd_m,
    2041              :                                 false, true, false);
    2042              :                 }
    2043              :             }
    2044              : 
    2045            0 :             if (ldom.touches(z)) {
    2046            0 :                 auto intersection = ldom.intersect(z);
    2047              : 
    2048            0 :                 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
    2049            0 :                                         (2 * size[2] - intersection[2].last()), -1);
    2050              : 
    2051            0 :                 ippl::NDIndex<Dim> domain2n1;
    2052            0 :                 domain2n1[0] = intersection[0];
    2053            0 :                 domain2n1[1] = intersection[1];
    2054            0 :                 domain2n1[2] = zdom;
    2055              : 
    2056            0 :                 if (lDomains2n1[i].touches(domain2n1)) {
    2057            0 :                     domain2n1    = lDomains2n1[i].intersect(domain2n1);
    2058            0 :                     domain2n1[2] = ippl::Index(2 * size[2] - domain2n1[2].first(),
    2059            0 :                                                2 * size[2] - domain2n1[2].last(), -1);
    2060              : 
    2061            0 :                     intersection = intersection.intersect(domain2n1);
    2062              : 
    2063            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 3, i, intersection, ldom, nghost, view, fd_m,
    2064              :                                 false, false, true);
    2065              :                 }
    2066              :             }
    2067              : 
    2068            0 :             if (ldom.touches(xy)) {
    2069            0 :                 auto intersection = ldom.intersect(xy);
    2070              : 
    2071            0 :                 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
    2072            0 :                                         (2 * size[0] - intersection[0].last()), -1);
    2073            0 :                 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
    2074            0 :                                         (2 * size[1] - intersection[1].last()), -1);
    2075              : 
    2076            0 :                 ippl::NDIndex<Dim> domain2n1;
    2077            0 :                 domain2n1[0] = xdom;
    2078            0 :                 domain2n1[1] = ydom;
    2079            0 :                 domain2n1[2] = intersection[2];
    2080              : 
    2081            0 :                 if (lDomains2n1[i].touches(domain2n1)) {
    2082            0 :                     domain2n1    = lDomains2n1[i].intersect(domain2n1);
    2083            0 :                     domain2n1[0] = ippl::Index(2 * size[0] - domain2n1[0].first(),
    2084            0 :                                                2 * size[0] - domain2n1[0].last(), -1);
    2085            0 :                     domain2n1[1] = ippl::Index(2 * size[1] - domain2n1[1].first(),
    2086            0 :                                                2 * size[1] - domain2n1[1].last(), -1);
    2087              : 
    2088            0 :                     intersection = intersection.intersect(domain2n1);
    2089              : 
    2090            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 4, i, intersection, ldom, nghost, view, fd_m,
    2091              :                                 true, true, false);
    2092              :                 }
    2093              :             }
    2094              : 
    2095            0 :             if (ldom.touches(yz)) {
    2096            0 :                 auto intersection = ldom.intersect(yz);
    2097              : 
    2098            0 :                 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
    2099            0 :                                         (2 * size[1] - intersection[1].last()), -1);
    2100            0 :                 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
    2101            0 :                                         (2 * size[2] - intersection[2].last()), -1);
    2102              : 
    2103            0 :                 ippl::NDIndex<Dim> domain2n1;
    2104            0 :                 domain2n1[0] = intersection[0];
    2105            0 :                 domain2n1[1] = ydom;
    2106            0 :                 domain2n1[2] = zdom;
    2107              : 
    2108            0 :                 if (lDomains2n1[i].touches(domain2n1)) {
    2109            0 :                     domain2n1    = lDomains2n1[i].intersect(domain2n1);
    2110            0 :                     domain2n1[1] = ippl::Index(2 * size[1] - domain2n1[1].first(),
    2111            0 :                                                2 * size[1] - domain2n1[1].last(), -1);
    2112            0 :                     domain2n1[2] = ippl::Index(2 * size[2] - domain2n1[2].first(),
    2113            0 :                                                2 * size[2] - domain2n1[2].last(), -1);
    2114              : 
    2115            0 :                     intersection = intersection.intersect(domain2n1);
    2116              : 
    2117            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 5, i, intersection, ldom, nghost, view, fd_m,
    2118              :                                 false, true, true);
    2119              :                 }
    2120              :             }
    2121              : 
    2122            0 :             if (ldom.touches(xz)) {
    2123            0 :                 auto intersection = ldom.intersect(xz);
    2124              : 
    2125            0 :                 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
    2126            0 :                                         (2 * size[0] - intersection[0].last()), -1);
    2127            0 :                 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
    2128            0 :                                         (2 * size[2] - intersection[2].last()), -1);
    2129              : 
    2130            0 :                 ippl::NDIndex<Dim> domain2n1;
    2131            0 :                 domain2n1[0] = xdom;
    2132            0 :                 domain2n1[1] = intersection[1];
    2133            0 :                 domain2n1[2] = zdom;
    2134              : 
    2135            0 :                 if (lDomains2n1[i].touches(domain2n1)) {
    2136            0 :                     domain2n1    = lDomains2n1[i].intersect(domain2n1);
    2137            0 :                     domain2n1[0] = ippl::Index(2 * size[0] - domain2n1[0].first(),
    2138            0 :                                                2 * size[0] - domain2n1[0].last(), -1);
    2139            0 :                     domain2n1[2] = ippl::Index(2 * size[2] - domain2n1[2].first(),
    2140            0 :                                                2 * size[2] - domain2n1[2].last(), -1);
    2141              : 
    2142            0 :                     intersection = intersection.intersect(domain2n1);
    2143              : 
    2144            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 6, i, intersection, ldom, nghost, view, fd_m,
    2145              :                                 true, false, true);
    2146              :                 }
    2147              :             }
    2148              : 
    2149            0 :             if (ldom.touches(xyz)) {
    2150            0 :                 auto intersection = ldom.intersect(xyz);
    2151              : 
    2152            0 :                 auto xdom = ippl::Index((2 * size[0] - intersection[0].first()),
    2153            0 :                                         (2 * size[0] - intersection[0].last()), -1);
    2154            0 :                 auto ydom = ippl::Index((2 * size[1] - intersection[1].first()),
    2155            0 :                                         (2 * size[1] - intersection[1].last()), -1);
    2156            0 :                 auto zdom = ippl::Index((2 * size[2] - intersection[2].first()),
    2157            0 :                                         (2 * size[2] - intersection[2].last()), -1);
    2158              : 
    2159            0 :                 ippl::NDIndex<Dim> domain2n1;
    2160            0 :                 domain2n1[0] = xdom;
    2161            0 :                 domain2n1[1] = ydom;
    2162            0 :                 domain2n1[2] = zdom;
    2163              : 
    2164            0 :                 if (lDomains2n1[i].touches(domain2n1)) {
    2165            0 :                     domain2n1    = lDomains2n1[i].intersect(domain2n1);
    2166            0 :                     domain2n1[0] = ippl::Index(2 * size[0] - domain2n1[0].first(),
    2167            0 :                                                2 * size[0] - domain2n1[0].last(), -1);
    2168            0 :                     domain2n1[1] = ippl::Index(2 * size[1] - domain2n1[1].first(),
    2169            0 :                                                2 * size[1] - domain2n1[1].last(), -1);
    2170            0 :                     domain2n1[2] = ippl::Index(2 * size[2] - domain2n1[2].first(),
    2171            0 :                                                2 * size[2] - domain2n1[2].last(), -1);
    2172              : 
    2173            0 :                     intersection = intersection.intersect(domain2n1);
    2174              : 
    2175            0 :                     solver_recv(mpi::tag::VICO_SOLVER, 7, i, intersection, ldom, nghost, view, fd_m,
    2176              :                                 true, true, true);
    2177              :                 }
    2178              :             }
    2179              :         }
    2180              : 
    2181            0 :         if (requests.size() > 0) {
    2182            0 :             MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
    2183              :         }
    2184            0 :         ippl::Comm->freeAllBuffers();
    2185            0 :     };
    2186              : }  // namespace ippl
        

Generated by: LCOV version 2.0-1