|             Line data    Source code 
       1              : #include "NedelecSpace.h"
       2              : namespace ippl {
       3              : 
       4              :     // NedelecSpace constructor, which calls the FiniteElementSpace constructor,
       5              :     // and decomposes the elements among ranks according to layout.
       6              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
       7              :               typename QuadratureType, typename FieldType>
       8          160 :     NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
       9              :                             ::NedelecSpace(UniformCartesian<T, Dim>& mesh, ElementType& ref_element,
      10              :                                 const QuadratureType& quadrature, const Layout_t& layout)
      11              :                             : FiniteElementSpace<T, Dim, getNedelecNumElementDOFs(Dim, Order),
      12              :                                 ElementType, QuadratureType, FEMVector<T>, FEMVector<T>>
      13          160 :                                     (mesh, ref_element, quadrature) {
      14              :         // Assert that the dimension is either 2 or 3.
      15              :         static_assert(Dim >= 2 && Dim <= 3,
      16              :             "The Nedelec Finite Element space only supports 2D and3D meshes");
      17              : 
      18              :         // Initialize the elementIndices view
      19          160 :         initializeElementIndices(layout);
      20          160 :     }
      21              : 
      22              :     // NedelecSpace constructor, which calls the FiniteElementSpace constructor.
      23              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
      24              :               typename QuadratureType, typename FieldType>
      25              :     NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
      26              :                             ::NedelecSpace(UniformCartesian<T, Dim>& mesh, ElementType& ref_element,
      27              :                                 const QuadratureType& quadrature)
      28              :                             : FiniteElementSpace<T, Dim, getNedelecNumElementDOFs(Dim, Order),
      29              :                             ElementType, QuadratureType, FEMVector<T>, FEMVector<T>>
      30              :                                 (mesh, ref_element, quadrature) {
      31              : 
      32              :         // Assert that the dimension is either 2 or 3.
      33              :         static_assert(Dim >= 2 && Dim <= 3,
      34              :             "The Nedelec Finite Element space only supports 2D and 3D meshes");
      35              :     }
      36              : 
      37              :     // NedelecSpace initializer, to be made available to the FEMPoissonSolver 
      38              :     // such that we can call it from setRhs.
      39              :     // Sets the correct mesh and decomposes the elements among ranks according to layout.
      40              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
      41              :               typename QuadratureType, typename FieldType>
      42              :     void NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
      43              :                             ::initialize(UniformCartesian<T, Dim>& mesh, const Layout_t& layout) {
      44              :         
      45              :         FiniteElementSpace<T, Dim, getNedelecNumElementDOFs(Dim, Order),
      46              :                             ElementType, QuadratureType, FEMVector<T>, FEMVector<T>>::setMesh(mesh);
      47              : 
      48              :         // Initialize the elementIndices view
      49              :         initializeElementIndices(layout);
      50              : 
      51              :         // set the local DOF position vector
      52              :         localDofPositions_m(0)(0) = 0.5; 
      53              :         localDofPositions_m(1)(1) = 0.5;
      54              :         localDofPositions_m(2)(0) = 0.5; localDofPositions_m(2)(1) = 1;
      55              :         localDofPositions_m(3)(0) = 1;   localDofPositions_m(3)(1) = 0.5;
      56              :         localDofPositions_m(4)(2) = 0.5;
      57              :         localDofPositions_m(5)(0) = 1;   localDofPositions_m(5)(2) = 0.5;
      58              :         localDofPositions_m(6)(0) = 1;   localDofPositions_m(6)(1) = 1;
      59              :             localDofPositions_m(6)(2) = 0.5;
      60              :         localDofPositions_m(7)(1) = 1;   localDofPositions_m(7)(2) = 0.5;
      61              :         localDofPositions_m(8)(0) = 0.5; localDofPositions_m(8)(2) = 1;
      62              :         localDofPositions_m(9)(1) = 0.5; localDofPositions_m(9)(2) = 1;
      63              :         localDofPositions_m(10)(0) = 0.5; localDofPositions_m(10)(1) = 1;
      64              :             localDofPositions_m(10)(2) = 1;
      65              :         localDofPositions_m(11)(0) = 1;   localDofPositions_m(11)(1) = 0.5;
      66              :             localDofPositions_m(11)(2) = 1;
      67              :     }
      68              : 
      69              :     // Initialize element indices Kokkos View
      70              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
      71              :               typename QuadratureType, typename FieldType>
      72          160 :     void NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
      73              :                             ::initializeElementIndices(const Layout_t& layout) {
      74              :         
      75          160 :         layout_m = layout;
      76          160 :         const auto& ldom = layout.getLocalNDIndex();
      77          160 :         int npoints      = ldom.size();
      78          160 :         auto first       = ldom.first();
      79          160 :         auto last        = ldom.last();
      80          160 :         ippl::Vector<double, Dim> bounds;
      81              : 
      82          560 :         for (size_t d = 0; d < Dim; ++d) {
      83          400 :             bounds[d] = this->nr_m[d] - 1;
      84              :         }
      85              : 
      86          160 :         int upperBoundaryPoints = -1;
      87              : 
      88          160 :         Kokkos::View<size_t*> points("ComputeMapping", npoints);
      89          160 :         Kokkos::parallel_reduce(
      90            0 :             "ComputePoints", npoints,
      91         4040 :             KOKKOS_CLASS_LAMBDA(const int i, int& local) {
      92         3720 :                 int idx = i;
      93         3720 :                 indices_t val;
      94         3720 :                 bool isBoundary = false;
      95        14200 :                 for (unsigned int d = 0; d < Dim; ++d) {
      96        10480 :                     int range = last[d] - first[d] + 1;
      97        10480 :                     val[d]    = first[d] + (idx % range);
      98        10480 :                     idx /= range;
      99        10480 :                     if (val[d] == bounds[d]) {
     100         2360 :                         isBoundary = true;
     101              :                     }
     102              :                 }
     103         3720 :                 points(i) = (!isBoundary) * (this->getElementIndex(val));
     104         3720 :                 local += isBoundary;
     105         3720 :             },
     106          320 :             Kokkos::Sum<int>(upperBoundaryPoints));
     107          160 :         Kokkos::fence();
     108              : 
     109          160 :         int elementsPerRank = npoints - upperBoundaryPoints;
     110          160 :         elementIndices      = Kokkos::View<size_t*>("i", elementsPerRank);
     111          160 :         Kokkos::View<size_t> index("index");
     112              : 
     113          160 :         Kokkos::parallel_for(
     114         7760 :             "RemoveNaNs", npoints, KOKKOS_CLASS_LAMBDA(const int i) {
     115         7440 :                 if ((points(i) != 0) || (i == 0)) {
     116         3680 :                     const size_t idx    = Kokkos::atomic_fetch_add(&index(), 1);
     117         5520 :                     elementIndices(idx) = points(i);
     118              :                 }
     119              :             }
     120              :         );
     121          160 :     }
     122              : 
     123              :     ///////////////////////////////////////////////////////////////////////
     124              :     /// Degree of Freedom operations //////////////////////////////////////
     125              :     ///////////////////////////////////////////////////////////////////////
     126              : 
     127              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     128              :               typename QuadratureType, typename FieldType>
     129            8 :     KOKKOS_FUNCTION size_t NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     130              :                                 ::numGlobalDOFs() const {
     131              : 
     132            8 :         size_t num_global_dofs = 0;
     133              :         
     134           28 :         for (size_t d = 0; d < Dim; ++d) {
     135           20 :             size_t accu = this->nr_m[d]-1;
     136           72 :             for (size_t d2 = 0; d2 < Dim; ++d2) {
     137           52 :                 if (d == d2) continue;
     138           32 :                 accu *= this->nr_m[d2];
     139              :             }
     140           20 :             num_global_dofs += accu;
     141              :         }
     142              : 
     143            8 :         return num_global_dofs;
     144              :     }
     145              : 
     146              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     147              :               typename QuadratureType, typename FieldType>
     148            0 :     KOKKOS_FUNCTION size_t NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     149              :                                 ::getLocalDOFIndex(const size_t& elementIndex,
     150              :                                     const size_t& globalDOFIndex) const {
     151              : 
     152              :         static_assert(Dim == 2 || Dim == 3, "Dim must be 2 or 3");
     153              : 
     154              :         // Get all the global DOFs for the element
     155            0 :         const Vector<size_t, this->numElementDOFs> global_dofs =
     156            0 :             this->getGlobalDOFIndices(elementIndex);
     157              : 
     158            0 :         ippl::Vector<size_t, this->numElementDOFs> dof_mapping;
     159              :         if (Dim == 2) {
     160            0 :             dof_mapping = {0, 1, 2, 3};
     161              :         } else if (Dim == 3) {
     162            0 :             dof_mapping = {0, 1, 2, 3, 4, 5, 7, 6, 8, 9, 10, 11};
     163              :         }
     164              : 
     165              :         // Find the global DOF in the vector and return the local DOF index
     166              :         // TODO this can be done faster since the global DOFs are sorted
     167            0 :         for (size_t i = 0; i < dof_mapping.dim; ++i) {
     168            0 :             if (global_dofs[dof_mapping[i]] == globalDOFIndex) {
     169            0 :                 return dof_mapping[i];
     170              :             }
     171              :         }
     172            0 :         return std::numeric_limits<size_t>::quiet_NaN();
     173            0 :     }
     174              : 
     175              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     176              :               typename QuadratureType, typename FieldType>
     177            0 :     KOKKOS_FUNCTION size_t NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     178              :                             ::getGlobalDOFIndex(const size_t& elementIndex,
     179              :                                 const size_t& localDOFIndex) const {
     180              : 
     181            0 :         const auto global_dofs = this->getGlobalDOFIndices(elementIndex);
     182              : 
     183            0 :         return global_dofs[localDOFIndex];
     184            0 :     }
     185              : 
     186              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     187              :               typename QuadratureType, typename FieldType>
     188              :     KOKKOS_FUNCTION Vector<size_t, NedelecSpace<T, Dim, Order, ElementType,
     189              :                         QuadratureType, FieldType>::numElementDOFs>
     190            0 :                     NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     191              :                             ::getLocalDOFIndices() const {
     192              : 
     193            0 :         Vector<size_t, numElementDOFs> localDOFs;
     194              : 
     195            0 :         for (size_t dof = 0; dof < numElementDOFs; ++dof) {
     196            0 :             localDOFs[dof] = dof;
     197              :         }
     198              : 
     199            0 :         return localDOFs;
     200              :     }
     201              : 
     202              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     203              :               typename QuadratureType, typename FieldType>
     204              :     KOKKOS_FUNCTION Vector<size_t, NedelecSpace<T, Dim, Order, ElementType,
     205              :                         QuadratureType, FieldType>::numElementDOFs>
     206           12 :                     NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     207              :                             ::getGlobalDOFIndices(const NedelecSpace<T, Dim, Order, ElementType,
     208              :                                 QuadratureType, FieldType>::indices_t& elementIndex) const {
     209              : 
     210              :         
     211              :         // These are simply some manual caclualtions that need to be done.
     212              :         
     213           12 :         Vector<size_t, this->numElementDOFs> globalDOFs(0);
     214              : 
     215              :         // Initialize a helper vector v
     216           12 :         Vector<size_t, Dim> v(1);
     217              :         if constexpr (Dim == 2) {
     218            8 :             size_t nx = this->nr_m[0];
     219            8 :             v(1) = 2*nx-1;
     220              :         } else if constexpr (Dim == 3) {
     221            4 :             size_t nx = this->nr_m[0];
     222            4 :             size_t ny = this->nr_m[1];
     223            4 :             v(1) = 2*nx -1;
     224            4 :             v(2) = 3*nx*ny - nx - ny;
     225              :         }
     226              : 
     227              :         // For both 2D and 3D the first few DOF indices are the same
     228           12 :         size_t nx = this->nr_m[0];
     229           12 :         globalDOFs(0) = v.dot(elementIndex);
     230           12 :         globalDOFs(1) = globalDOFs(0) + nx - 1;
     231           12 :         globalDOFs(2) = globalDOFs(1) + nx;
     232           12 :         globalDOFs(3) = globalDOFs(1) + 1;
     233              : 
     234              :         if constexpr (Dim == 3) {
     235            4 :             size_t ny = this->nr_m[1];
     236              : 
     237            4 :             globalDOFs(4) = v(2)*elementIndex(2) + 2*nx*ny - nx - ny
     238            4 :                 + elementIndex(1)*nx + elementIndex(0);
     239            4 :             globalDOFs(5) = globalDOFs(4) + 1;
     240            4 :             globalDOFs(6) = globalDOFs(4) + nx + 1;
     241            4 :             globalDOFs(7) = globalDOFs(4) + nx;
     242            4 :             globalDOFs(8) = globalDOFs(0) + 3*nx*ny - nx - ny;
     243            4 :             globalDOFs(9) = globalDOFs(8) + nx - 1;
     244            4 :             globalDOFs(10) = globalDOFs(9) + nx;
     245            4 :             globalDOFs(11) = globalDOFs(9) + 1;
     246              :         }
     247              :         
     248              : 
     249           24 :         return globalDOFs;
     250           12 :     }
     251              : 
     252              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     253              :               typename QuadratureType, typename FieldType>
     254              :     KOKKOS_FUNCTION Vector<size_t, NedelecSpace<T, Dim, Order, ElementType,
     255              :                         QuadratureType, FieldType>::numElementDOFs>
     256           12 :                     NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     257              :                             ::getGlobalDOFIndices(const size_t& elementIndex) const {
     258              : 
     259           12 :         indices_t elementPos = this->getElementNDIndex(elementIndex);
     260           24 :         return getGlobalDOFIndices(elementPos);
     261           12 :     }
     262              : 
     263              :     
     264              : 
     265              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     266              :               typename QuadratureType, typename FieldType>
     267              :     KOKKOS_FUNCTION Vector<size_t, NedelecSpace<T, Dim, Order, ElementType,
     268              :                         QuadratureType, FieldType>::numElementDOFs>
     269          144 :                     NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     270              :                             ::getFEMVectorDOFIndices(NedelecSpace<T, Dim, Order, ElementType,
     271              :                                 QuadratureType, FieldType>::indices_t elementIndex,
     272              :                                 NDIndex<Dim> ldom) const {
     273              :                             
     274              : 
     275              :         // This function here is pretty much the same as getGlobalDOFIndices()
     276              :         // the only difference is the domain size and that we have an offset
     277              :         // of the subdomain of the rank to the global one, else it is the same
     278              : 
     279          144 :         Vector<size_t, this->numElementDOFs> FEMVectorDOFs(0);
     280              :         
     281              :         // This corresponds to translating a global element position to one in
     282              :         // the subdomain of the rank. For this we subtract the starting position
     283              :         // the rank subdomain and add the "ghost" hyperplane.
     284          144 :         elementIndex -= ldom.first();
     285          144 :         elementIndex += 1;
     286              :         
     287          144 :         indices_t dif(0);
     288          144 :         dif = ldom.last() - ldom.first();
     289          144 :         dif += 1 + 2; // plus 1 for last still being in +2 for ghosts.
     290              :         
     291              :         // From here on out it is pretty much the same as the
     292              :         // getGlobalDOFIndices() function.
     293          144 :         Vector<size_t, Dim> v(1);
     294              :         if constexpr (Dim == 2) {
     295           44 :             size_t nx = dif[0];
     296           44 :             v(1) = 2*nx-1;
     297              :         } else if constexpr (Dim == 3) {
     298          100 :             size_t nx = dif[0];
     299          100 :             size_t ny = dif[1];
     300          100 :             v(1) = 2*nx -1;
     301          100 :             v(2) = 3*nx*ny - nx - ny;
     302              :         }
     303              : 
     304          144 :         size_t nx = dif[0];
     305          144 :         FEMVectorDOFs(0) = v.dot(elementIndex);
     306          144 :         FEMVectorDOFs(1) = FEMVectorDOFs(0) + nx - 1;
     307          144 :         FEMVectorDOFs(2) = FEMVectorDOFs(1) + nx;
     308          144 :         FEMVectorDOFs(3) = FEMVectorDOFs(1) + 1;
     309              : 
     310              :         if constexpr (Dim == 3) {
     311          100 :             size_t ny = dif[1];
     312              : 
     313          100 :             FEMVectorDOFs(4) = v(2)*elementIndex(2) + 2*nx*ny - nx - ny
     314          100 :                 + elementIndex(1)*nx + elementIndex(0);
     315          100 :             FEMVectorDOFs(5) = FEMVectorDOFs(4) + 1;
     316          100 :             FEMVectorDOFs(6) = FEMVectorDOFs(4) + nx + 1;
     317          100 :             FEMVectorDOFs(7) = FEMVectorDOFs(4) + nx;
     318          100 :             FEMVectorDOFs(8) = FEMVectorDOFs(0) + 3*nx*ny - nx - ny;
     319          100 :             FEMVectorDOFs(9) = FEMVectorDOFs(8) + nx - 1;
     320          100 :             FEMVectorDOFs(10) = FEMVectorDOFs(9) + nx;
     321          100 :             FEMVectorDOFs(11) = FEMVectorDOFs(9) + 1;
     322              :         }
     323              :         
     324              :         
     325          144 :         return FEMVectorDOFs;
     326          144 :     }
     327              : 
     328              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     329              :               typename QuadratureType, typename FieldType>
     330              :     KOKKOS_FUNCTION Vector<size_t, NedelecSpace<T, Dim, Order, ElementType,
     331              :                         QuadratureType, FieldType>::numElementDOFs>
     332            0 :                     NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     333              :                             ::getFEMVectorDOFIndices(const size_t& elementIndex, NDIndex<Dim> ldom) const {
     334              :         
     335              :         // First get the global element position
     336            0 :         indices_t elementPos = this->getElementNDIndex(elementIndex);
     337            0 :         return getFEMVectorDOFIndices(elementPos, ldom);
     338            0 :     }
     339              : 
     340              : 
     341              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     342              :               typename QuadratureType, typename FieldType>
     343              :     KOKKOS_FUNCTION typename NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>::point_t
     344            0 :         NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     345              :             ::getLocalDOFPosition(size_t localDOFIndex) const {
     346              :         
     347              :         // Hardcoded center of edges which are stored in the localDofPositions_m
     348              :         // vector. If the DOF position of an edge element actually is the center
     349              :         // of the edge is a different question...
     350            0 :         return localDofPositions_m(localDOFIndex);
     351              :     }
     352              : 
     353              : 
     354              : 
     355              :     ///////////////////////////////////////////////////////////////////////
     356              :     /// Assembly operations ///////////////////////////////////////////////
     357              :     ///////////////////////////////////////////////////////////////////////
     358              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     359              :                 typename QuadratureType, typename FieldType>
     360              :     template <typename F>
     361            0 :     FEMVector<T> NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     362              :                             ::evaluateAx(FEMVector<T>& x, F& evalFunction) const {
     363            0 :         Inform m("");
     364              :         
     365              : 
     366            0 :         IpplTimings::TimerRef timerAxInit = IpplTimings::getTimer("Ax init");
     367            0 :         IpplTimings::startTimer(timerAxInit);
     368              : 
     369              :         // create a new field for result, default initialized to zero thanks to
     370              :         // the Kokkos::View
     371            0 :         FEMVector<T> resultVector = x.template skeletonCopy<T>();
     372              : 
     373              :         // List of quadrature weights
     374            0 :         const Vector<T, QuadratureType::numElementNodes> w =
     375            0 :             this->quadrature_m.getWeightsForRefElement();
     376              : 
     377              :         // List of quadrature nodes
     378            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
     379            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
     380              :         
     381              :         // Get the values of the basis functions and their curl at the
     382              :         // quadrature points.
     383            0 :         Vector<Vector<point_t, this->numElementDOFs>, QuadratureType::numElementNodes> curl_b_q;
     384            0 :         Vector<Vector<point_t, this->numElementDOFs>, QuadratureType::numElementNodes> val_b_q;
     385            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     386            0 :             for (size_t i = 0; i < this->numElementDOFs; ++i) {
     387            0 :                 curl_b_q[k][i] = this->evaluateRefElementShapeFunctionCurl(i, q[k]);
     388            0 :                 val_b_q[k][i] = this->evaluateRefElementShapeFunction(i, q[k]);
     389              :             }
     390              :         }
     391              : 
     392              :         // Get field data and atomic result data,
     393              :         // since it will be added to during the kokkos loop
     394            0 :         ViewType view             = x.getView();
     395            0 :         AtomicViewType resultView = resultVector.getView();
     396              : 
     397              : 
     398              :         // Get domain information
     399            0 :         auto ldom = layout_m.getLocalNDIndex();
     400              : 
     401              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
     402              :         using policy_type = Kokkos::RangePolicy<exec_space>;
     403              : 
     404              : 
     405            0 :         IpplTimings::stopTimer(timerAxInit);
     406              : 
     407              :         // Here we assemble the local matrix of an element. In theory this would
     408              :         // have to be done for each element individually, but because we have
     409              :         // that in the case of IPPL all the elements have the same shape we can
     410              :         // also just do it once and then use if all the time.
     411            0 :         IpplTimings::TimerRef timerAxLocalMatrix = IpplTimings::getTimer("Ax local matrix");
     412            0 :         IpplTimings::startTimer(timerAxLocalMatrix);
     413            0 :         Vector<Vector<T,this->numElementDOFs>, this->numElementDOFs> A;
     414            0 :         for (size_t i = 0; i < this->numElementDOFs; ++i) {
     415            0 :             for (size_t j = 0; j < this->numElementDOFs; ++j) {
     416            0 :                 A[i][j] = 0.0;
     417            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     418            0 :                     A[i][j] += w[k] * evalFunction(i, j, curl_b_q[k], val_b_q[k]);
     419              :                 }
     420              :             }
     421              :         }
     422            0 :         IpplTimings::stopTimer(timerAxLocalMatrix);
     423              : 
     424              : 
     425            0 :         IpplTimings::TimerRef timerAxLoop = IpplTimings::getTimer("Ax Loop");
     426            0 :         IpplTimings::startTimer(timerAxLoop);
     427              : 
     428              :         // Loop over elements to compute contributions
     429            0 :         Kokkos::parallel_for(
     430            0 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
     431            0 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
     432            0 :                 const size_t elementIndex                            = elementIndices(index);
     433            0 :                 const Vector<size_t, this->numElementDOFs> local_dof = this->getLocalDOFIndices();
     434              :                 
     435              :                 // Here we now retrieve the global DOF indices and their
     436              :                 // position inside of the FEMVector
     437            0 :                 const Vector<size_t, this->numElementDOFs> global_dofs =
     438            0 :                     this->getGlobalDOFIndices(elementIndex);
     439              :                 
     440            0 :                 const Vector<size_t, this->numElementDOFs> vectorIndices =
     441              :                     this->getFEMVectorDOFIndices(elementIndex, ldom);
     442              :                 
     443              : 
     444              :                 // local DOF indices
     445              :                 size_t i, j;
     446              : 
     447              :                 // global DOF n-dimensional indices (Vector of N indices
     448              :                 // representing indices in each dimension)
     449              :                 size_t I, J;
     450              :                 
     451            0 :                 for (i = 0; i < this->numElementDOFs; ++i) {
     452            0 :                     I = global_dofs[i];
     453              : 
     454              :                     // Skip boundary DOFs (Zero Dirichlet BCs)
     455            0 :                     if (this->isDOFOnBoundary(I)) {
     456            0 :                         continue;
     457              :                     }
     458              : 
     459            0 :                     for (j = 0; j < this->numElementDOFs; ++j) {
     460            0 :                         J = global_dofs[j];
     461              : 
     462              :                         // Skip boundary DOFs (Zero Dirichlet BCs)        
     463            0 :                         if (this->isDOFOnBoundary(J)) {
     464            0 :                             continue;
     465              :                         }
     466              : 
     467            0 :                         resultView(vectorIndices[i]) += A[i][j] * view(vectorIndices[j]);
     468              :                     }
     469              :                 }
     470            0 :             }
     471              :         );
     472            0 :         IpplTimings::stopTimer(timerAxLoop);
     473              :         
     474            0 :         return resultVector;
     475              :     
     476            0 :     }
     477              : 
     478              : 
     479              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     480              :                 typename QuadratureType, typename FieldType>
     481            0 :     FEMVector<T> NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     482              :                             ::evaluateLoadVector(const FEMVector<NedelecSpace<T, Dim, Order, ElementType,
     483              :                                 QuadratureType, FieldType>::point_t>& f) const {
     484              : 
     485              :         // List of quadrature weights
     486            0 :         const Vector<T, QuadratureType::numElementNodes> w =
     487            0 :             this->quadrature_m.getWeightsForRefElement();
     488              : 
     489              :         // List of quadrature nodes
     490            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
     491            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
     492              : 
     493            0 :         const indices_t zeroNdIndex = Vector<size_t, Dim>(0);
     494              : 
     495              : 
     496              : 
     497              :         // Evaluate the basis functions for the DOF at the quadrature nodes
     498            0 :         Vector<Vector<point_t, this->numElementDOFs>, QuadratureType::numElementNodes> basis_q;
     499            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     500            0 :             for (size_t i = 0; i < this->numElementDOFs; ++i) {
     501            0 :                 basis_q[k][i] = this->evaluateRefElementShapeFunction(i, q[k]);
     502              :             }
     503              :         }
     504              : 
     505              : 
     506              :         // Get the distance between the quadrature nodes and the DOFs 
     507              :         // we assume that the dofs are at the center of an edge, this is then
     508              :         // going to be used to implement a very crude interpolation scheme.
     509              :         Vector<Vector<T, this->numElementDOFs>, QuadratureType::numElementNodes>
     510            0 :             quadratureDOFDistances;
     511            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     512            0 :             for (size_t i = 0; i < this->numElementDOFs; ++i) {
     513            0 :                 point_t dofPos = getLocalDOFPosition(i);
     514            0 :                 point_t d = dofPos - q[k];
     515            0 :                 quadratureDOFDistances[k][i] = Kokkos::sqrt(d.dot(d));
     516              :             }
     517              :         }
     518              : 
     519              :         // Absolute value of det Phi_K
     520            0 :         const T absDetDPhi = Kokkos::abs(this->ref_element_m.getDeterminantOfTransformationJacobian(
     521            0 :             this->getElementMeshVertexPoints(zeroNdIndex)));
     522              : 
     523              :         // Get domain information and ghost cells
     524            0 :         auto ldom = layout_m.getLocalNDIndex();
     525              : 
     526              : 
     527              :         // Get boundary conditions from field
     528            0 :         FEMVector<T> resultVector = createFEMVector();
     529              : 
     530              :         // Get field data and make it atomic,
     531              :         // since it will be added to during the kokkos loop
     532            0 :         AtomicViewType atomic_view = resultVector.getView();
     533            0 :         typename detail::ViewType<point_t, 1>::view_type view = f.getView(); 
     534              : 
     535              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
     536              :         using policy_type = Kokkos::RangePolicy<exec_space>;
     537              : 
     538              :         // Loop over elements to compute contributions
     539            0 :         Kokkos::parallel_for(
     540            0 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
     541            0 :             KOKKOS_CLASS_LAMBDA(size_t index) {
     542            0 :                 const size_t elementIndex                              = elementIndices(index);
     543            0 :                 const Vector<size_t, this->numElementDOFs> local_dofs  = this->getLocalDOFIndices();
     544            0 :                 const Vector<size_t, this->numElementDOFs> global_dofs =
     545            0 :                     this->getGlobalDOFIndices(elementIndex);
     546              : 
     547            0 :                 const Vector<size_t, this->numElementDOFs> vectorIndices =
     548              :                     this->getFEMVectorDOFIndices(elementIndex, ldom);
     549              : 
     550              :                 size_t i;
     551              : 
     552            0 :                 for (i = 0; i < this->numElementDOFs; ++i) {
     553            0 :                     size_t I = global_dofs[i];
     554            0 :                     if (this->isDOFOnBoundary(I)) {
     555            0 :                         continue;
     556              :                     }
     557              :                         
     558              : 
     559              :                     // calculate the contribution of this element
     560            0 :                     T contrib = 0;
     561            0 :                     for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     562              :                         // We now have to interpolate the value of the field
     563              :                         // given at the DOF positions to the quadrature point.
     564            0 :                         point_t interpolatedVal(0);
     565            0 :                         T distSum = 0;
     566            0 :                         for (size_t j = 0; j < this->numElementDOFs; ++j) {
     567              :                             // get field index corresponding to this DOF
     568              : 
     569              :                             // the distance
     570            0 :                             T dist = quadratureDOFDistances[k][j];
     571              :                             
     572              :                             // running variable used for normalization
     573            0 :                             distSum += 1/dist;
     574              :                             
     575              :                             // get field value at DOF and interpolate to q_k
     576            0 :                             interpolatedVal += 1./dist * view(vectorIndices<:j:>);
     577              :                         }
     578              :                         // here we have to divide it by distSum in order to
     579              :                         // normalize it
     580            0 :                         interpolatedVal /= distSum;
     581              : 
     582              :                         // update contribution
     583            0 :                         contrib += w[k] * basis_q[k][i].dot(interpolatedVal) * absDetDPhi;
     584              :                     }
     585              : 
     586              :                     // add the contribution of the element to the field
     587            0 :                     atomic_view(vectorIndices<:i:>) += contrib;
     588              : 
     589              :                 }
     590            0 :             });
     591              :         
     592            0 :         return resultVector;
     593            0 :     }
     594              : 
     595              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     596              :                 typename QuadratureType, typename FieldType>
     597              :     template <typename F>
     598              :     FEMVector<T> NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     599              :                             ::evaluateLoadVectorFunctor(const F& f) const {
     600              : 
     601              :         // List of quadrature weights
     602              :         const Vector<T, QuadratureType::numElementNodes> w =
     603              :             this->quadrature_m.getWeightsForRefElement();
     604              : 
     605              :         // List of quadrature nodes
     606              :         const Vector<point_t, QuadratureType::numElementNodes> q =
     607              :             this->quadrature_m.getIntegrationNodesForRefElement();
     608              : 
     609              :         const indices_t zeroNdIndex = Vector<size_t, Dim>(0);
     610              : 
     611              :         // Evaluate the basis functions for the DOF at the quadrature nodes
     612              :         Vector<Vector<point_t, this->numElementDOFs>, QuadratureType::numElementNodes> basis_q;
     613              :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     614              :             for (size_t i = 0; i < this->numElementDOFs; ++i) {
     615              :                 basis_q[k][i] = this->evaluateRefElementShapeFunction(i, q[k]);
     616              :             }
     617              :         }
     618              : 
     619              : 
     620              :         // Absolute value of det Phi_K
     621              :         const T absDetDPhi = Kokkos::abs(this->ref_element_m.getDeterminantOfTransformationJacobian(
     622              :             this->getElementMeshVertexPoints(zeroNdIndex)));
     623              : 
     624              :         // Get domain information and ghost cells
     625              :         auto ldom = layout_m.getLocalNDIndex();
     626              : 
     627              : 
     628              :         // Get boundary conditions from field
     629              :         FEMVector<T> resultVector = createFEMVector();
     630              : 
     631              :         // Get field data and make it atomic,
     632              :         // since it will be added to during the kokkos loop
     633              :         AtomicViewType atomic_view = resultVector.getView();
     634              : 
     635              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
     636              :         using policy_type = Kokkos::RangePolicy<exec_space>;
     637              :         
     638              : 
     639              :         // Loop over elements to compute contributions
     640              :         Kokkos::parallel_for(
     641              :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
     642              :             KOKKOS_CLASS_LAMBDA(size_t index) {
     643              :                 const size_t elementIndex                              = elementIndices(index);
     644              :                 const Vector<size_t, this->numElementDOFs> local_dofs  = this->getLocalDOFIndices();
     645              :                 const Vector<size_t, this->numElementDOFs> global_dofs =
     646              :                     this->getGlobalDOFIndices(elementIndex);
     647              :                 
     648              :                 const Vector<size_t, this->numElementDOFs> vectorIndices =
     649              :                     this->getFEMVectorDOFIndices(elementIndex, ldom);
     650              :                 
     651              : 
     652              :                 size_t i, I;
     653              : 
     654              :                 for (i = 0; i < this->numElementDOFs; ++i) {
     655              :                     I = global_dofs[i];
     656              : 
     657              :                     
     658              :                     if (this->isDOFOnBoundary(I)) {
     659              :                         continue;
     660              :                     }
     661              :                     
     662              :                     // calculate the contribution of this element
     663              :                     T contrib = 0;
     664              :                     for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     665              :                         // Get the global position of the quadrature point                        
     666              :                         point_t pos = this->ref_element_m.localToGlobal(
     667              :                             this->getElementMeshVertexPoints(this->getElementNDIndex(elementIndex)),
     668              :                             q[k]); 
     669              :                         
     670              :                         // evaluate the rhs function at this global position
     671              :                         point_t interpolatedVal = f(pos);
     672              :                         
     673              :                         // update contribution
     674              :                         contrib += w[k] * basis_q[k][i].dot(interpolatedVal) * absDetDPhi;
     675              :                     }
     676              : 
     677              :                     // add the contribution of the element to the vector
     678              :                     atomic_view(vectorIndices[i]) += contrib;
     679              :                 
     680              :                 }    
     681              :             });
     682              :         
     683              :         return resultVector;
     684              :     }
     685              : 
     686              : 
     687              : 
     688              :     ///////////////////////////////////////////////////////////////////////
     689              :     /// Basis functions and gradients /////////////////////////////////////
     690              :     ///////////////////////////////////////////////////////////////////////
     691              : 
     692              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     693              :                 typename QuadratureType, typename FieldType>
     694              :     KOKKOS_FUNCTION typename NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>::point_t
     695       390400 :                             NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     696              :                             ::evaluateRefElementShapeFunction(const size_t& localDOF,
     697              :                                 const NedelecSpace<T, Dim, Order, ElementType,
     698              :                                     QuadratureType, FieldType>::point_t& localPoint) const {
     699              : 
     700              :         // Assert that the local vertex index is valid.
     701       390400 :         assert(localDOF < this->numElementDOFs && "The local vertex index is invalid"); 
     702              : 
     703       390400 :         assert(this->ref_element_m.isPointInRefElement(localPoint)
     704              :             && "Point is not in reference element");
     705              :         
     706              : 
     707              : 
     708              :         // Simply hardcoded
     709       390400 :         point_t result(0);
     710              :         if constexpr (Dim == 2) {
     711         6400 :             T x = localPoint(0);
     712         6400 :             T y = localPoint(1);
     713              : 
     714         6400 :             switch (localDOF){
     715         1600 :                 case 0: result(0) = 1 - y; break;
     716         1600 :                 case 1: result(1) = 1 - x; break;
     717         1600 :                 case 2: result(0) = y; break;
     718         1600 :                 case 3: result(1) = x; break;
     719              :             }
     720              :         } else if constexpr (Dim == 3) {
     721       384000 :             T x = localPoint(0);
     722       384000 :             T y = localPoint(1);
     723       384000 :             T z = localPoint(2);
     724              : 
     725       384000 :             switch (localDOF){
     726        32000 :                 case 0:  result(0) = y*z - y - z + 1; break;
     727        32000 :                 case 1:  result(1) = x*z - x - z + 1; break;
     728        32000 :                 case 2:  result(0) = y*(1 - z);       break;
     729        32000 :                 case 3:  result(1) = x*(1 - z);       break;
     730        32000 :                 case 4:  result(2) = x*y - x - y + 1; break;
     731        32000 :                 case 5:  result(2) = x*(1 - y);       break;
     732        32000 :                 case 6:  result(2) = x*y;             break;
     733        32000 :                 case 7:  result(2) = y*(1 - x);       break;
     734        32000 :                 case 8:  result(0) = z*(1 - y);       break;
     735        32000 :                 case 9:  result(1) = z*(1 - x);       break;
     736        32000 :                 case 10: result(0) = y*z;             break;
     737        32000 :                 case 11: result(1) = x*z;             break;
     738              :             }
     739              :         }
     740              : 
     741              : 
     742       390400 :         return result;
     743              :     }
     744              : 
     745              : 
     746              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     747              :               typename QuadratureType, typename FieldType>
     748              :     KOKKOS_FUNCTION typename NedelecSpace<T, Dim, Order, ElementType,
     749              :                                 QuadratureType, FieldType>::point_t
     750       390400 :                              NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     751              :                                 ::evaluateRefElementShapeFunctionCurl(const size_t& localDOF,
     752              :                                     const NedelecSpace<T, Dim, Order, ElementType,
     753              :                                         QuadratureType, FieldType>::point_t& localPoint) const {
     754              :         
     755              :         // Hard coded.
     756       390400 :         point_t result(0);
     757              : 
     758              :         if constexpr (Dim == 2) {
     759              :             // In case of 2d we would have that the curl would correspond to a
     760              :             // scalar, but in order to keep the interface uniform across all the
     761              :             // dimensions, we still use a 2d vector but only set the first entry
     762              :             // of it. This should lead to no problems, as we later on only will
     763              :             // take the dot product between two of them and therefore should not
     764              :             // run into any problems
     765              :             
     766         6400 :             switch (localDOF) {
     767         1600 :                 case 0: result(0) = 1; break;
     768         1600 :                 case 1: result(0) = -1; break;
     769         1600 :                 case 2: result(0) = -1; break;
     770         1600 :                 case 3: result(0) = 1; break;
     771              :             }
     772              :         } else {
     773       384000 :             T x = localPoint(0);
     774       384000 :             T y = localPoint(1);
     775       384000 :             T z = localPoint(2);
     776              : 
     777       384000 :             switch (localDOF) {
     778        32000 :                 case 0: result(0) = 0; result(1) = -1+y; result(2) = 1-z; break;
     779        32000 :                 case 1: result(0) = 1-x; result(1) = 0; result(2) = -1+z; break;
     780        32000 :                 case 2: result(0) = 0; result(1) = -y; result(2) = -1+z; break;
     781        32000 :                 case 3: result(0) = x; result(1) = 0; result(2) = 1-z; break;
     782        32000 :                 case 4: result(0) = -1+x; result(1) = 1-y; result(2) = 0; break;
     783        32000 :                 case 5: result(0) = -x; result(1) = -1+y; result(2) = 0; break;
     784        32000 :                 case 6: result(0) = x; result(1) = -y; result(2) = 0; break;
     785        32000 :                 case 7: result(0) = 1-x; result(1) = y; result(2) = 0; break;
     786        32000 :                 case 8: result(0) = 0; result(1) = 1-y; result(2) = z; break;
     787        32000 :                 case 9: result(0) = -1+x; result(1) = 0; result(2) = -z; break;
     788        32000 :                 case 10: result(0) = 0; result(1) = y; result(2) = -z; break;
     789        32000 :                 case 11: result(0) = -x; result(1) = 0; result(2) = z; break;
     790              :             }
     791              :         }
     792              : 
     793       390400 :         return result;
     794              :         
     795              :     }
     796              : 
     797              : 
     798              :     ///////////////////////////////////////////////////////////////////////
     799              :     /// FEMVector conversion //////////////////////////////////////////////
     800              :     ///////////////////////////////////////////////////////////////////////
     801              : 
     802              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     803              :               typename QuadratureType, typename FieldType>
     804            8 :     FEMVector<T> NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     805              :                                 ::createFEMVector() const {
     806              :         // This function will simply call one of the other two depending on the
     807              :         // dimension of the space
     808              :         if constexpr (Dim == 2) {
     809            4 :             return createFEMVector2d();
     810              :         } else {
     811            4 :             return createFEMVector3d();
     812              :         }
     813              :     }
     814              : 
     815              : 
     816              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     817              :               typename QuadratureType, typename FieldType>
     818              :     Kokkos::View<typename NedelecSpace<T, Dim, Order, ElementType, QuadratureType,
     819              :         FieldType>::point_t*>
     820            0 :     NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>::reconstructToPoints(
     821              :         const Kokkos::View<typename NedelecSpace<T, Dim, Order, ElementType, QuadratureType,
     822              :         FieldType>::point_t*>& positions, const FEMVector<T>& coef) const {
     823              :         
     824              :         // The domain information of the subdomain of the MPI rank
     825            0 :         auto ldom = layout_m.getLocalNDIndex(); 
     826              :         
     827              :         // The domain information of the global domain
     828            0 :         auto gdom = layout_m.getDomain();
     829            0 :         indices_t gextent = gdom.last() - gdom.first();
     830              :         
     831              :         // The size of the global domain.
     832            0 :         point_t domainSize = (this->nr_m-1) * this->hr_m;
     833              : 
     834              : 
     835            0 :         auto coefView = coef.getView();
     836              : 
     837            0 :         Kokkos::View<point_t*> outView("reconstructed Func values at points", positions.extent(0));
     838              : 
     839            0 :         Kokkos::parallel_for("reconstructToPoints", positions.extent(0),
     840            0 :             KOKKOS_CLASS_LAMBDA(size_t i) {
     841              :                 // get the current position and for it figure out to which
     842              :                 // element it belongs
     843            0 :                 point_t pos = positions<:i:>;
     844            0 :                 indices_t elemIdx = ((pos - this->origin_m) / domainSize) * gextent;
     845              : 
     846              :             
     847              :                 // next up we have to handle the case of when a position that
     848              :                 // was provided to us lies on an edge at the upper bound of the
     849              :                 // local domain, because in this case we have that the above
     850              :                 // transformation gives back an element which is somewhat in the
     851              :                 // halo. In order to fix this we simply subtract one.
     852            0 :                 for (size_t d = 0; d < Dim; ++d) {
     853            0 :                     if (elemIdx<:d:> >= ldom.last()<:d:>) {
     854            0 :                         elemIdx<:d:> -= 1;
     855              :                     }
     856              :                 }
     857              : 
     858              : 
     859              :                 // get correct indices
     860            0 :                 const Vector<size_t, this->numElementDOFs> vectorIndices =
     861              :                     this->getFEMVectorDOFIndices(elemIdx, ldom);
     862              : 
     863              :                 
     864              :                 // figure out position inside of the reference element
     865            0 :                 point_t locPos = pos - (elemIdx * this->hr_m + this->origin_m);
     866            0 :                 locPos /= this->hr_m;
     867              : 
     868              :                 // because of numerical instabilities it might happen then when
     869              :                 // a point is on an edge this becomes marginally larger that 1 
     870              :                 // or slightly negative which triggers an assertion. So this
     871              :                 // simply is to prevent this.
     872            0 :                 for (size_t d = 0; d < Dim; ++d) {
     873            0 :                     locPos<:d:> = Kokkos::min(T(1), locPos<:d:>);
     874            0 :                     locPos<:d:> = Kokkos::max(T(0), locPos<:d:>);
     875              :                 }
     876              : 
     877              : 
     878              :                 // interpolate the function value to the position, using the
     879              :                 // basis functions.
     880            0 :                 point_t val(0);
     881            0 :                 for (size_t j = 0; j < this->numElementDOFs; ++j) {
     882            0 :                     point_t funcVal = this->evaluateRefElementShapeFunction(j, locPos);
     883            0 :                     val += funcVal*coefView(vectorIndices<:j:>);
     884              :                 }
     885            0 :                 outView(i) = val;
     886              : 
     887            0 :             }
     888              :         );
     889              :         
     890              : 
     891            0 :         return outView;
     892            0 :     }
     893              : 
     894              : 
     895              :     ///////////////////////////////////////////////////////////////////////
     896              :     /// Error norm computations ///////////////////////////////////////////
     897              :     ///////////////////////////////////////////////////////////////////////
     898              : 
     899              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     900              :               typename QuadratureType, typename FieldType>
     901              :     template <typename F>
     902            0 :     T NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>::computeError(
     903              :         const FEMVector<T>& u_h, const F& u_sol) const {
     904            0 :         if (this->quadrature_m.getOrder() < (2 * Order + 1)) {
     905              :             // throw exception
     906            0 :             throw IpplException( "NedelecSpace::computeError()",
     907              :                 "Order of quadrature rule for error computation should be > 2*p + 1");
     908              :         }
     909              : 
     910              :         // List of quadrature weights
     911            0 :         const Vector<T, QuadratureType::numElementNodes> w =
     912            0 :             this->quadrature_m.getWeightsForRefElement();
     913              : 
     914              :         // List of quadrature nodes
     915            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
     916            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
     917              : 
     918              :         // Evaluate the basis functions for the DOF at the quadrature nodes
     919            0 :         Vector<Vector<point_t, this->numElementDOFs>, QuadratureType::numElementNodes> basis_q;
     920            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     921            0 :             for (size_t i = 0; i < this->numElementDOFs; ++i) {
     922            0 :                 basis_q[k][i] = this->evaluateRefElementShapeFunction(i, q[k]);
     923              :             }
     924              :         }
     925              : 
     926            0 :         const indices_t zeroNdIndex = Vector<size_t, Dim>(0);
     927              : 
     928              :         // Absolute value of det Phi_K
     929            0 :         const T absDetDPhi = Kokkos::abs(this->ref_element_m
     930            0 :             .getDeterminantOfTransformationJacobian(this->getElementMeshVertexPoints(zeroNdIndex)));
     931              : 
     932              :         // Variable to sum the error to
     933            0 :         T error = 0;
     934              : 
     935              :         // Get domain information and ghost cells
     936            0 :         auto ldom        = layout_m.getLocalNDIndex();
     937              : 
     938              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
     939              :         using policy_type = Kokkos::RangePolicy<exec_space>;
     940              : 
     941            0 :         auto view = u_h.getView();
     942              : 
     943              :     
     944              :         // Loop over elements to compute contributions
     945            0 :         Kokkos::parallel_reduce("Compute error over elements",
     946            0 :             policy_type(0, elementIndices.extent(0)),
     947            0 :             KOKKOS_CLASS_LAMBDA(size_t index, double& local) {
     948            0 :                 const size_t elementIndex = elementIndices(index);
     949            0 :                 const Vector<size_t, this->numElementDOFs> global_dofs =
     950            0 :                     this->getGlobalDOFIndices(elementIndex);
     951              :                 
     952            0 :                 const Vector<size_t, this->numElementDOFs> vectorIndices =
     953              :                     this->getFEMVectorDOFIndices(elementIndex, ldom);
     954              : 
     955              : 
     956              :                 // contribution of this element to the error
     957            0 :                 T contrib = 0;
     958            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     959              :                     // Evaluate the analystical solution at the global position
     960              :                     // of the quadrature point
     961            0 :                     point_t val_u_sol = u_sol(this->ref_element_m.localToGlobal(
     962            0 :                         this->getElementMeshVertexPoints(this->getElementNDIndex(elementIndex)),
     963            0 :                             q[k]));
     964              :                     
     965              :                     // Here we now reconstruct the solution given the basis
     966              :                     // functions.
     967            0 :                     point_t val_u_h = 0;
     968            0 :                     for (size_t j = 0; j < this->numElementDOFs; ++j) {
     969              :                         // get field index corresponding to this DOF
     970            0 :                         val_u_h += basis_q[k][j] * view(vectorIndices[j]);
     971              :                     }
     972              : 
     973              :                     // calculate error and add to sum.
     974            0 :                     point_t dif = (val_u_sol -  val_u_h);
     975            0 :                     T x = dif.dot(dif);
     976            0 :                     contrib += w[k] * x * absDetDPhi;
     977              :                 }
     978            0 :                 local += contrib;
     979            0 :             },
     980            0 :             Kokkos::Sum<double>(error)
     981              :         );
     982              : 
     983              :         // MPI reduce
     984            0 :         T global_error = 0.0;
     985            0 :         Comm->allreduce(error, global_error, 1, std::plus<T>());
     986              : 
     987            0 :         return Kokkos::sqrt(global_error);
     988            0 :     }
     989              : 
     990              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     991              :               typename QuadratureType, typename FieldType>
     992          328 :     bool NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
     993              :                                 ::isDOFOnBoundary(const size_t& dofIdx) const {
     994              :         
     995          328 :         bool onBoundary = false;
     996              :         if constexpr (Dim == 2) {
     997          160 :             size_t nx = this->nr_m[0];
     998          160 :             size_t ny = this->nr_m[1];
     999              :             // South
    1000          160 :             bool sVal = (dofIdx < nx -1);
    1001          160 :             onBoundary = onBoundary || sVal;
    1002              :             // North
    1003          160 :             onBoundary = onBoundary || (dofIdx > nx*(ny-1) + ny*(nx-1) - nx);
    1004              :             // West
    1005          160 :             onBoundary = onBoundary || ((dofIdx >= nx-1) && (dofIdx - (nx-1)) % (2*nx - 1) == 0);
    1006              :             // East
    1007          160 :             onBoundary = onBoundary || ((dofIdx >= 2*nx-2) && ((dofIdx - 2*nx + 2) % (2*nx - 1) == 0));    
    1008              :         }
    1009              : 
    1010              :         if constexpr (Dim == 3) {
    1011          168 :             size_t nx = this->nr_m[0];
    1012          168 :             size_t ny = this->nr_m[1];
    1013          168 :             size_t nz = this->nr_m[2];
    1014              : 
    1015          168 :             size_t zOffset = dofIdx / (nx*(ny-1) + ny*(nx-1) + nx*ny);
    1016              : 
    1017              : 
    1018          168 :             if (dofIdx - (nx*(ny-1) + ny*(nx-1) + nx*ny)*zOffset >= (nx*(ny-1) + ny*(nx-1))) {
    1019              :                 // we are parallel to z axis
    1020              :                 // therefore we have halve a cell offset and can never be on the ground or in
    1021              :                 // space
    1022           60 :                 size_t f = dofIdx - (nx*(ny-1) + ny*(nx-1) + nx*ny)*zOffset
    1023           60 :                     - (nx*(ny-1) + ny*(nx-1));
    1024              :                 
    1025           60 :                 size_t yOffset = f / nx;
    1026              :                 // South
    1027           60 :                 onBoundary = onBoundary || yOffset == 0;
    1028              :                 // North
    1029           60 :                 onBoundary = onBoundary || yOffset == ny-1;
    1030              : 
    1031           60 :                 size_t xOffset = f % nx;
    1032              :                 // West
    1033           60 :                 onBoundary = onBoundary || xOffset == 0;
    1034              :                 // East
    1035           60 :                 onBoundary = onBoundary || xOffset == nx-1;
    1036              : 
    1037              :             } else {
    1038              :                 // are parallel to one of the other axes
    1039              :                 // Ground
    1040          108 :                 onBoundary = onBoundary || zOffset == 0;
    1041              :                 // Space
    1042          108 :                 onBoundary = onBoundary || zOffset == nz-1;
    1043              :                 
    1044          108 :                 size_t f = dofIdx - (nx*(ny-1) + ny*(nx-1) + nx*ny)*zOffset;
    1045          108 :                 size_t yOffset = f / (2*nx - 1);
    1046          108 :                 size_t xOffset = f - (2*nx - 1)*yOffset;
    1047              : 
    1048          108 :                 if (xOffset < (nx-1)) {
    1049              :                     // we are parallel to the x axis, therefore we cannot
    1050              :                     // be on an west or east boundary, but we still can
    1051              :                     // be on a north or south boundary
    1052              :                     
    1053              :                     // South
    1054           48 :                     onBoundary = onBoundary || yOffset == 0;
    1055              :                     // North
    1056           48 :                     onBoundary = onBoundary || yOffset == ny-1;
    1057              :                     
    1058              :                 } else {
    1059              :                     // we are parallel to the y axis, therefore we cannot be
    1060              :                     // on a south or north boundary, but we still can be on
    1061              :                     // a west or east boundary
    1062           60 :                     if (xOffset >= nx-1) {
    1063           60 :                         xOffset -= (nx-1);
    1064              :                     }
    1065              : 
    1066              :                     // West
    1067           60 :                     onBoundary = onBoundary || xOffset == 0;
    1068              :                     // East
    1069           60 :                     onBoundary = onBoundary || xOffset == nx-1;
    1070              :                 }
    1071              :             }
    1072              :         }
    1073          328 :         return onBoundary;
    1074              :     }
    1075              : 
    1076              : 
    1077              : 
    1078              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
    1079              :               typename QuadratureType, typename FieldType>
    1080          280 :     int NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
    1081              :                                 ::getBoundarySide(const size_t& dofIdx) const {
    1082              : 
    1083              :         if constexpr (Dim == 2) {
    1084          160 :             size_t nx = this->nr_m[0];
    1085          160 :             size_t ny = this->nr_m[1];
    1086              : 
    1087              :             // South
    1088          160 :             if (dofIdx < nx -1) return 0;
    1089              :             // West
    1090          144 :             if ((dofIdx - (nx-1)) % (2*nx - 1) == 0) return 1;
    1091              :             // North
    1092          128 :             if (dofIdx > nx*(ny-1) + ny*(nx-1) - nx) return 2;
    1093              :             // East
    1094          112 :             if ((dofIdx >= 2*nx-2) && (dofIdx - 2*nx + 2) % (2*nx - 1) == 0) return 3;
    1095              : 
    1096           96 :             return -1;
    1097              :         }
    1098              : 
    1099              :         if constexpr (Dim == 3) {
    1100          120 :             size_t nx = this->nr_m[0];
    1101          120 :             size_t ny = this->nr_m[1];
    1102          120 :             size_t nz = this->nr_m[2];
    1103              : 
    1104          120 :             size_t zOffset = dofIdx / (nx*(ny-1) + ny*(nx-1) + nx*ny);
    1105              : 
    1106              : 
    1107          120 :             if (dofIdx - (nx*(ny-1) + ny*(nx-1) + nx*ny)*zOffset >= (nx*(ny-1) + ny*(nx-1))) {
    1108              :                 // we are parallel to z axis
    1109              :                 // therefore we have halve a cell offset and can never be on the ground or in
    1110              :                 // space
    1111           40 :                 size_t f = dofIdx - (nx*(ny-1) + ny*(nx-1) + nx*ny)*zOffset
    1112           40 :                     - (nx*(ny-1) + ny*(nx-1));
    1113              :                 
    1114           40 :                 size_t yOffset = f / nx;
    1115              :                 // South
    1116           40 :                 if (yOffset == 0) return 0;
    1117              :                 // North
    1118           32 :                 if (yOffset == ny-1) return 2;
    1119              : 
    1120           24 :                 size_t xOffset = f % nx;
    1121              :                 // West
    1122           24 :                 if (xOffset == 0) return 1;
    1123              :                 // East
    1124           16 :                 if (xOffset == nx-1) return 3;
    1125              : 
    1126              :             } else {
    1127              :                 // are parallel to one of the other axes
    1128              :                 // Ground
    1129           80 :                 if (zOffset == 0) return 4;
    1130              :                 // Space
    1131           64 :                 if (zOffset == nz-1) return 5;
    1132              :                 
    1133           48 :                 size_t f = dofIdx - (nx*(ny-1) + ny*(nx-1) + nx*ny)*zOffset;
    1134           48 :                 size_t yOffset = f / (2*nx - 1);
    1135           48 :                 size_t xOffset = f - (2*nx - 1)*yOffset;
    1136              : 
    1137           48 :                 if (xOffset < (nx-1)) {
    1138              :                     // we are parallel to the x axis, therefore we cannot
    1139              :                     // be on an west or east boundary, but we still can
    1140              :                     // be on a north or south boundary
    1141              :                     
    1142              :                     // South
    1143           24 :                     if (yOffset == 0) return 0;
    1144              :                     // North
    1145           16 :                     if (yOffset == ny-1) return 2;
    1146              :                     
    1147              :                 } else {
    1148              :                     // we are parallel to the y axis, therefore we cannot be
    1149              :                     // on a south or north boundary, but we still can be on
    1150              :                     // a west or east boundary
    1151           24 :                     if (xOffset >= nx-1) {
    1152           24 :                         xOffset -= (nx-1);
    1153              :                     }
    1154              : 
    1155              :                     // West
    1156           24 :                     if (xOffset == 0) return 1;
    1157              :                     // East
    1158           16 :                     if (xOffset == nx-1) return 3;
    1159              :                 }
    1160              :             }
    1161           24 :             return -1;
    1162              :         }
    1163              : 
    1164              :     }
    1165              : 
    1166              : 
    1167              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
    1168              :               typename QuadratureType, typename FieldType>
    1169            4 :     FEMVector<T> NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
    1170              :                                 ::createFEMVector2d() const{
    1171              : 
    1172              :         // Here we will create an empty FEMVector for the case of the domain
    1173              :         // being 2D.
    1174              :         // The largest part of this is going to be the handling of the halo
    1175              :         // cells, more specifically figuring out which entries of the vector are
    1176              :         // part of the sendIdxs and which are part of the recvIdxs. For this we
    1177              :         // loop through all the other domains and for each of them figure out if
    1178              :         // we share a boundary with them. If we share a boundary we then have to
    1179              :         // figure out which entries of the vector are part of this boundary. To
    1180              :         // do this we loop though all the mesh elements which are on this
    1181              :         // boundary and then for each of these elements we take the DOFs which
    1182              :         // should be part of the sendIdxs and the ones which are part of the
    1183              :         // recvIdxs. Currently in this step we have to manually select the
    1184              :         // correct DOFs of the reference element corresponding to that specific
    1185              :         // boundary type (north, south, west, east). This manual selection of
    1186              :         // the correct DOFs might lead to an "out of the blue" feeling on why
    1187              :         // exactly we selected those DOFs, but it should be easily verifyable
    1188              :         // that those are the correct DOFs.
    1189              :         // Also note that we are not exchanging any boundary information over 
    1190              :         // corners, test showed that this does not have any impact on the 
    1191              :         // correctness.
    1192              :         // For more information regarding the domain decomposition refer to the
    1193              :         // report available at: TODO add reference to report on AMAS website
    1194              : 
    1195            4 :         auto ldom = layout_m.getLocalNDIndex();
    1196            4 :         auto doms = layout_m.getHostLocalDomains();
    1197              : 
    1198              :         // Create the temporaries and so on which will store the MPI
    1199              :         // information.
    1200            4 :         std::vector<size_t> neighbors;
    1201            4 :         std::vector< Kokkos::View<size_t*> > sendIdxs;
    1202            4 :         std::vector< Kokkos::View<size_t*> > recvIdxs;
    1203            4 :         std::vector< std::vector<size_t> > sendIdxsTemp;
    1204            4 :         std::vector< std::vector<size_t> > recvIdxsTemp;
    1205              : 
    1206              :         // Here we loop thought all the domains to figure out how we are related
    1207              :         // to them and if we have to do any kind of exchange.
    1208           12 :         for (size_t i = 0; i < doms.extent(0); ++i) {
    1209            8 :             if (i == Comm->rank()) {
    1210              :                 // We are looking at ourself
    1211            4 :                 continue;
    1212              :             }
    1213            4 :             auto odom = doms(i);
    1214              : 
    1215              :             // East boundary
    1216           10 :             if (ldom.last()[0] == odom.first()[0]-1 &&
    1217            6 :                     !(odom.last()[1] < ldom.first()[1] || odom.first()[1] > ldom.last()[1])) {
    1218              :                 // Extract the range of the boundary.
    1219            2 :                 int begin = std::max(odom.first()[1], ldom.first()[1]);
    1220            2 :                 int end = std::min(odom.last()[1], ldom.last()[1]);
    1221            2 :                 int pos = ldom.last()[0];
    1222              :                 
    1223              :                 // Add this to the neighbour list.
    1224            2 :                 neighbors.push_back(i);
    1225            2 :                 sendIdxsTemp.push_back(std::vector<size_t>());
    1226            2 :                 recvIdxsTemp.push_back(std::vector<size_t>());
    1227            2 :                 size_t idx = neighbors.size() - 1;
    1228              :                 
    1229              :                 // Add all the halo
    1230            2 :                 indices_t elementPosHalo(0);
    1231            2 :                 elementPosHalo(0) = pos;
    1232            2 :                 indices_t elementPosSend(0);
    1233            2 :                 elementPosSend(0) = pos;
    1234           12 :                 for (int k = begin; k <= end; ++k) {
    1235           10 :                     elementPosHalo(1) = k;
    1236           10 :                     elementPosSend(1) = k;
    1237              :                     
    1238           10 :                     auto dofIndicesHalo = getFEMVectorDOFIndices(elementPosHalo, ldom);
    1239           10 :                     recvIdxsTemp[idx].push_back(dofIndicesHalo[3]);
    1240              : 
    1241           10 :                     auto dofIndicesSend = getFEMVectorDOFIndices(elementPosSend, ldom);
    1242           10 :                     sendIdxsTemp[idx].push_back(dofIndicesSend[0]);
    1243           10 :                     sendIdxsTemp[idx].push_back(dofIndicesSend[1]);
    1244              :                 }
    1245              :                 // Check if on very north
    1246            2 :                 if (end == layout_m.getDomain().last()[1] || ldom.last()[1] > odom.last()[1]) {
    1247            2 :                     elementPosSend(1) = end;
    1248            2 :                     auto dofIndicesSend = getFEMVectorDOFIndices(elementPosSend, ldom);
    1249              :                     // also have to add dof 2
    1250            2 :                     sendIdxsTemp[idx].push_back(dofIndicesSend[2]);
    1251            2 :                 }
    1252            2 :             }
    1253              : 
    1254              :             // West boundary
    1255           10 :             if (ldom.first()[0] == odom.last()[0]+1 &&
    1256            6 :                     !(odom.last()[1] < ldom.first()[1] || odom.first()[1] > ldom.last()[1])) {
    1257              :                 // Extract the range of the boundary.
    1258            2 :                 int begin = std::max(odom.first()[1], ldom.first()[1]);
    1259            2 :                 int end = std::min(odom.last()[1], ldom.last()[1]);
    1260            2 :                 int pos = ldom.first()[0];
    1261              :                 
    1262              :                 // Add this to the neighbour list.
    1263            2 :                 neighbors.push_back(i);
    1264            2 :                 sendIdxsTemp.push_back(std::vector<size_t>());
    1265            2 :                 recvIdxsTemp.push_back(std::vector<size_t>());
    1266            2 :                 size_t idx = neighbors.size() - 1;
    1267              :                 
    1268              :                 // Add all the halo
    1269            2 :                 indices_t elementPosHalo(0);
    1270            2 :                 elementPosHalo(0) = pos-1;
    1271            2 :                 indices_t elementPosSend(0);
    1272            2 :                 elementPosSend(0) = pos;
    1273           12 :                 for (int k = begin; k <= end; ++k) {
    1274           10 :                     elementPosHalo(1) = k;
    1275           10 :                     elementPosSend(1) = k;
    1276              :                     
    1277           10 :                     auto dofIndicesHalo = getFEMVectorDOFIndices(elementPosHalo, ldom);
    1278           10 :                     recvIdxsTemp[idx].push_back(dofIndicesHalo[0]);
    1279           10 :                     recvIdxsTemp[idx].push_back(dofIndicesHalo[1]);
    1280              : 
    1281           10 :                     auto dofIndicesSend = getFEMVectorDOFIndices(elementPosSend, ldom);
    1282           10 :                     sendIdxsTemp[idx].push_back(dofIndicesSend[1]);
    1283              :                 }
    1284              :                 // Check if on very north
    1285            2 :                 if (end == layout_m.getDomain().last()[1] || odom.last()[1] > ldom.last()[1]) {
    1286            2 :                     elementPosHalo(1) = end;
    1287            2 :                     auto dofIndicesHalo = getFEMVectorDOFIndices(elementPosHalo, ldom);
    1288              :                     // also have to add dof 2
    1289            2 :                     recvIdxsTemp[idx].push_back(dofIndicesHalo[2]);
    1290            2 :                 }
    1291            2 :             }
    1292              : 
    1293              :             // North boundary
    1294            8 :             if (ldom.last()[1] == odom.first()[1]-1 &&
    1295            4 :                     !(odom.last()[0] < ldom.first()[0] || odom.first()[0] > ldom.last()[0])) {
    1296              :                 // Extract the range of the boundary.
    1297            0 :                 int begin = std::max(odom.first()[0], ldom.first()[0]);
    1298            0 :                 int end = std::min(odom.last()[0], ldom.last()[0]);
    1299            0 :                 int pos = ldom.last()[1];
    1300              :                 
    1301              :                 // Add this to the neighbour list.
    1302            0 :                 neighbors.push_back(i);
    1303            0 :                 sendIdxsTemp.push_back(std::vector<size_t>());
    1304            0 :                 recvIdxsTemp.push_back(std::vector<size_t>());
    1305            0 :                 size_t idx = neighbors.size() - 1;
    1306              :                 
    1307              :                 // Add all the halo
    1308            0 :                 indices_t elementPosHalo(0);
    1309            0 :                 elementPosHalo(1) = pos;
    1310            0 :                 indices_t elementPosSend(0);
    1311            0 :                 elementPosSend(1) = pos;
    1312            0 :                 for (int k = begin; k <= end; ++k) {
    1313            0 :                     elementPosHalo(0) = k;
    1314            0 :                     elementPosSend(0) = k;
    1315              :                     
    1316            0 :                     auto dofIndicesHalo = getFEMVectorDOFIndices(elementPosHalo, ldom);
    1317            0 :                     recvIdxsTemp[idx].push_back(dofIndicesHalo[2]);
    1318              : 
    1319            0 :                     auto dofIndicesSend = getFEMVectorDOFIndices(elementPosSend, ldom);
    1320            0 :                     sendIdxsTemp[idx].push_back(dofIndicesSend[0]);
    1321            0 :                     sendIdxsTemp[idx].push_back(dofIndicesSend[1]);
    1322              :                 }
    1323              :                 // Check if on very east
    1324            0 :                 if (end == layout_m.getDomain().last()[0] || ldom.last()[0] > odom.last()[0]) {
    1325            0 :                     elementPosSend(0) = end;
    1326            0 :                     auto dofIndicesSend = getFEMVectorDOFIndices(elementPosSend, ldom);
    1327              :                     // also have to add dof 3
    1328            0 :                     sendIdxsTemp[idx].push_back(dofIndicesSend[3]);
    1329            0 :                 }
    1330            0 :             }
    1331              : 
    1332              :             // South boundary
    1333            8 :             if (ldom.first()[1] == odom.last()[1]+1 &&
    1334            4 :                     !(odom.last()[0] < ldom.first()[0] || odom.first()[0] > ldom.last()[0])) {
    1335              :                 // Extract the range of the boundary.
    1336            0 :                 int begin = std::max(odom.first()[0], ldom.first()[0]);
    1337            0 :                 int end = std::min(odom.last()[0], ldom.last()[0]);
    1338            0 :                 int pos = ldom.first()[1];
    1339              :                 
    1340              :                 // Add this to the neighbour list.
    1341            0 :                 neighbors.push_back(i);
    1342            0 :                 sendIdxsTemp.push_back(std::vector<size_t>());
    1343            0 :                 recvIdxsTemp.push_back(std::vector<size_t>());
    1344            0 :                 size_t idx = neighbors.size() - 1;
    1345              :                 
    1346              :                 // Add all the halo
    1347            0 :                 indices_t elementPosHalo(0);
    1348            0 :                 elementPosHalo(1) = pos-1;
    1349            0 :                 indices_t elementPosSend(0);
    1350            0 :                 elementPosSend(1) = pos;
    1351            0 :                 for (int k = begin; k <= end; ++k) {
    1352            0 :                     elementPosHalo(0) = k;
    1353            0 :                     elementPosSend(0) = k;
    1354              :                     
    1355            0 :                     auto dofIndicesHalo = getFEMVectorDOFIndices(elementPosHalo, ldom);
    1356            0 :                     recvIdxsTemp[idx].push_back(dofIndicesHalo[0]);
    1357            0 :                     recvIdxsTemp[idx].push_back(dofIndicesHalo[1]);
    1358              : 
    1359            0 :                     auto dofIndicesSend = getFEMVectorDOFIndices(elementPosSend, ldom);
    1360            0 :                     sendIdxsTemp[idx].push_back(dofIndicesSend[0]);
    1361              :                 }
    1362              :                 // Check if on very east
    1363            0 :                 if (end == layout_m.getDomain().last()[0] || odom.last()[0] > ldom.last()[0]) {
    1364            0 :                     elementPosHalo(0) = end;
    1365            0 :                     auto dofIndicesHalo = getFEMVectorDOFIndices(elementPosHalo, ldom);
    1366              :                     // also have to add dof 3
    1367            0 :                     recvIdxsTemp[idx].push_back(dofIndicesHalo[3]);
    1368            0 :                 }
    1369            0 :             }
    1370              :         }
    1371              : 
    1372              : 
    1373              : 
    1374              :         // Here we now have to translate the sendIdxsTemp and recvIdxsTemp which
    1375              :         // are std::vectors<std::vector> into the correct list type which
    1376              :         // is std::vector<Kokkos::View>
    1377            8 :         for (size_t i = 0; i < neighbors.size(); ++i) {
    1378            4 :             sendIdxs.push_back(Kokkos::View<size_t*>("FEMvector::sendIdxs[" + std::to_string(i) +
    1379            4 :                                                         "]", sendIdxsTemp[i].size()));
    1380            4 :             recvIdxs.push_back(Kokkos::View<size_t*>("FEMvector::recvIdxs[" + std::to_string(i) +
    1381            4 :                                                         "]", recvIdxsTemp[i].size()));
    1382            4 :             auto sendView = sendIdxs[i];
    1383            4 :             auto recvView = recvIdxs[i];
    1384            4 :             auto hSendView = Kokkos::create_mirror_view(sendView);
    1385            4 :             auto hRecvView = Kokkos::create_mirror_view(recvView);
    1386              : 
    1387           36 :             for (size_t j = 0; j < sendIdxsTemp[i].size(); ++j) {
    1388           64 :                 hSendView(j) = sendIdxsTemp[i][j];
    1389              :             }
    1390              : 
    1391           36 :             for (size_t j = 0; j < recvIdxsTemp[i].size(); ++j) {
    1392           64 :                 hRecvView(j) = recvIdxsTemp[i][j];
    1393              :             }
    1394              : 
    1395            4 :             Kokkos::deep_copy(sendView, hSendView);
    1396            4 :             Kokkos::deep_copy(recvView, hRecvView);
    1397              :         }
    1398              :         
    1399              : 
    1400              :         
    1401              :         // Now finaly create the FEMVector
    1402            4 :         indices_t extents(0);
    1403            4 :         extents = (ldom.last() - ldom.first()) + 3;
    1404            4 :         size_t nx = extents(0);
    1405            4 :         size_t ny = extents(1);
    1406            4 :         size_t n = nx*(ny-1) + ny*(nx-1);
    1407            4 :         FEMVector<T> vec(n, neighbors, sendIdxs, recvIdxs);
    1408              :         
    1409            4 :         return vec;
    1410            4 :     }
    1411              : 
    1412              : 
    1413              : 
    1414              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
    1415              :               typename QuadratureType, typename FieldType>
    1416            4 :     FEMVector<T> NedelecSpace<T, Dim, Order, ElementType, QuadratureType, FieldType>
    1417              :                                 ::createFEMVector3d() const{
    1418              : 
    1419              : 
    1420              : 
    1421              :         
    1422              :         // Here we will create an empty FEMVector for the case of the domain
    1423              :         // being 3D.
    1424              :         // It follows the same principle as the 2D case (check comment there).
    1425              :         // The major difference now is that we have more types of boundaries.
    1426              :         // Namely we have 6 directions: west, east, south, north, ground, and
    1427              :         // space. Where west-east is on the x-axis, south-north on the y-axis,
    1428              :         // and ground-space on the z-axis. For this we have 3 major types of
    1429              :         // boundaries, namely "flat" boundaries which are along the coordinate
    1430              :         // axes (your standard west-east, south-north, and ground-space
    1431              :         // exchanges), then we have two different types of diagonal exchanges,
    1432              :         // which we will call "positive" and "negative", we have two types as
    1433              :         // a diagonal exchange always happens over an edge and this edges is 
    1434              :         // shared by 4 different ranks, so we have two different diagonal 
    1435              :         // exchanges per edge, which we differentiate with "positive" and 
    1436              :         // "negative".
    1437              :         // If we now look at these types independently we have that the code
    1438              :         // needed to perform one such exchange is large going to be independent
    1439              :         // of the direction of the exchange (i.e. is the "flat" exchange
    1440              :         // happening over a west-est or a ground-space boundary) the major
    1441              :         // difference is the DOF indices we have to chose for the elements on
    1442              :         // the boundary (check the 2D case for more info). 
    1443              :         // We therefore create 3 lambdas for these different types of boundaries
    1444              :         // which we then call with appropriate arguments for the direction of the
    1445              :         // exchange.
    1446              :         // Note that like with the 2D case we do not consider any exchanges over
    1447              :         // corners.
    1448              :         // For more information regarding the domain decomposition refer to the
    1449              :         // report available at: TODO add reference to report on AMAS website
    1450              : 
    1451              :         using indices_t = Vector<int, Dim>;
    1452              : 
    1453            4 :         auto ldom = layout_m.getLocalNDIndex();
    1454            4 :         auto doms = layout_m.getHostLocalDomains();
    1455              : 
    1456              :         // Create the temporaries and so on which will store the MPI
    1457              :         // information.
    1458            4 :         std::vector<size_t> neighbors;
    1459            4 :         std::vector< Kokkos::View<size_t*> > sendIdxs;
    1460            4 :         std::vector< Kokkos::View<size_t*> > recvIdxs;
    1461            4 :         std::vector< std::vector<size_t> > sendIdxsTemp;
    1462            4 :         std::vector< std::vector<size_t> > recvIdxsTemp;
    1463              : 
    1464              :         // The parameters are:
    1465              :         // i: The index of the other dom we are looking at (according to doms).
    1466              :         // a: Along which axis the exchange happens, 0 = x-axis, 1 = y-axis,
    1467              :         //      2 = z-axis.
    1468              :         // f,s: While the exchange happens over the axis "a" we have that
    1469              :         //        elements which are part of the boundary are on a plane spanned
    1470              :         //        by the other two axes, these other two axes are then given by
    1471              :         //        these two variables "f" and "s" (they then also define the
    1472              :         //        order in which we go though these axes).
    1473              :         // va,vb: These are the placeholders for the sendIdxs and recvIdxs
    1474              :         //          arrays, note that depending on if an exchange happens from,
    1475              :         //          e.g. west to east or from east to west the role of which of 
    1476              :         //          these placeholders stores the sendIdxs and which one stores
    1477              :         //          the recvIdxs changes.
    1478              :         // posA, posB: The "a"-axis coordinate of the elements which are part of
    1479              :         //               the boundary, we have two of them as the coordinate
    1480              :         //               can be different depending on if we are looking at the
    1481              :         //               sendIdxs or the recvIdxs.
    1482              :         // idxsA, idxsB: These are going to be the local DOF indices of the
    1483              :         //                 elements which are part of the boundary and which
    1484              :         //                 need to be exchanged. Again we have two as the
    1485              :         //                 indices are going to depend on if we are looking at
    1486              :         //                 the sendIdxs or the recvIdxs
    1487              :         // adom, bdom: The domain extents of the two domains which are part of
    1488              :         //               this exchange.
    1489            8 :         auto flatBoundaryExchange = [this, &neighbors, &ldom](
    1490              :             size_t i, size_t a, size_t f, size_t s,
    1491              :             std::vector<std::vector<size_t> >& va, std::vector<std::vector<size_t> >& vb,
    1492              :             int posA, int posB,
    1493              :             const std::vector<size_t>& idxsA, const std::vector<size_t>& idxsB,
    1494              :             NDIndex<3>& adom, NDIndex<3>& bdom) {
    1495              :             
    1496            4 :             int beginF = std::max(bdom.first()[f], adom.first()[f]);
    1497            4 :             int endF = std::min(bdom.last()[f], adom.last()[f]);
    1498            4 :             int beginS = std::max(bdom.first()[s], adom.first()[s]);
    1499            4 :             int endS = std::min(bdom.last()[s], adom.last()[s]);
    1500              :             
    1501            4 :             neighbors.push_back(i);
    1502            4 :             va.push_back(std::vector<size_t>());
    1503            4 :             vb.push_back(std::vector<size_t>());
    1504            4 :             size_t idx = neighbors.size() - 1;
    1505              : 
    1506              : 
    1507            4 :             indices_t elementPosA(0);
    1508            4 :             elementPosA(a) = posA;
    1509            4 :             indices_t elementPosB(0);
    1510            4 :             elementPosB(a) = posB;
    1511              : 
    1512              :             // Here we now have the double loop that goes though all the
    1513              :             // elements spanned by the plane given by the "f" and "s" axis.
    1514           16 :             for (int k = beginF; k <= endF; ++k) {
    1515           12 :                 elementPosA(f) = k;
    1516           12 :                 elementPosB(f) = k;
    1517           48 :                 for (int l = beginS; l <= endS; ++l) {
    1518           36 :                     elementPosA(s) = l;
    1519           36 :                     elementPosB(s) = l;
    1520              : 
    1521           36 :                     auto dofIndicesA = this->getFEMVectorDOFIndices(elementPosA, ldom);
    1522           36 :                     va[idx].push_back(dofIndicesA[idxsA[0]]);
    1523           36 :                     va[idx].push_back(dofIndicesA[idxsA[1]]);
    1524              : 
    1525           36 :                     auto dofIndicesB = this->getFEMVectorDOFIndices(elementPosB, ldom);
    1526           36 :                     vb[idx].push_back(dofIndicesB[idxsB[0]]);
    1527           36 :                     vb[idx].push_back(dofIndicesB[idxsB[1]]);
    1528           36 :                     vb[idx].push_back(dofIndicesB[idxsB[2]]);
    1529              : 
    1530              :                     
    1531              :                     // We now have reached the end of the first axis and have to
    1532              :                     // figure out if we need to add any additional DOFs. If we 
    1533              :                     // need to add DOFs depends on if we are at the mesh
    1534              :                     // boundary and if one of the two domains does not end here
    1535              :                     // and "overlaps" the other one.
    1536           36 :                     if (k == endF) {
    1537           24 :                         if (endF == layout_m.getDomain().last()[f] ||
    1538           12 :                                 bdom.last()[f] > adom.last()[f]) {
    1539           12 :                             va[idx].push_back(dofIndicesA[idxsA[2]]);
    1540              :                         }
    1541              :                         
    1542           24 :                         if (endF == layout_m.getDomain().last()[f] ||
    1543           12 :                                 adom.last()[f] > bdom.last()[f]) {
    1544           12 :                             vb[idx].push_back(dofIndicesB[idxsB[3]]);
    1545           12 :                             vb[idx].push_back(dofIndicesB[idxsB[4]]);
    1546              :                         }
    1547              :                         
    1548              :                         // This is a modification to the beginning of the f axis
    1549              :                         // we still put it on here as we are guaranteed that
    1550              :                         // like this it only gets called once
    1551              :                         // call this last, as modifies elementPosA(s) 
    1552           12 :                         if (bdom.first()[f] < adom.first()[f]) {
    1553            0 :                             indices_t tmpPos = elementPosA;
    1554            0 :                             tmpPos(f) = beginF-1;
    1555            0 :                             auto dofIndicestmp = this->getFEMVectorDOFIndices(tmpPos, ldom);
    1556            0 :                             va[idx].push_back(dofIndicestmp[idxsA[0]]);
    1557            0 :                             va[idx].push_back(dofIndicestmp[idxsA[1]]);
    1558            0 :                         }
    1559              :                     }
    1560              :                 }
    1561              :                 
    1562              :                 // We now have reached the end of the second axis and have to
    1563              :                 // figure out if we need to add any additional DOFs. If we need
    1564              :                 // to add DOFs depends on if we are at the mesh boundary and if
    1565              :                 // one of the two domains does not end here and "overlaps" the
    1566              :                 // other one.
    1567              : 
    1568           12 :                 if (endS == layout_m.getDomain().last()[s] || bdom.last()[s] > adom.last()[s]) {
    1569           12 :                     elementPosA(s) = endS;
    1570           12 :                     auto dofIndicesA = this->getFEMVectorDOFIndices(elementPosA, ldom);
    1571           12 :                     va[idx].push_back(dofIndicesA[idxsA[3]]);
    1572           12 :                 }
    1573              :                 
    1574           12 :                 if (endS == layout_m.getDomain().last()[s] || adom.last()[s] > bdom.last()[s]) {
    1575           12 :                     elementPosB(s) = endS;
    1576           12 :                     auto dofIndicesB = this->getFEMVectorDOFIndices(elementPosB, ldom);
    1577           12 :                     vb[idx].push_back(dofIndicesB[idxsB[5]]);
    1578           12 :                     vb[idx].push_back(dofIndicesB[idxsB[6]]);
    1579           12 :                 }
    1580              :                 
    1581              :                 // This is a modification to the beginning of the s axis
    1582              :                 // we still put it on here as we are guaranteed that
    1583              :                 // like this it only gets called once
    1584              :                 // call this last, as modifies elementPosA(f);
    1585           12 :                 if (bdom.first()[f] < adom.first()[f]) {
    1586            0 :                     indices_t tmpPos = elementPosA;
    1587            0 :                     tmpPos(s) = beginS-1;
    1588            0 :                     auto dofIndicestmp = this->getFEMVectorDOFIndices(tmpPos, ldom);
    1589            0 :                     va[idx].push_back(dofIndicestmp[idxsA[0]]);
    1590            0 :                     va[idx].push_back(dofIndicestmp[idxsA[1]]);
    1591            0 :                 }
    1592              :             }
    1593              :             // At this point we have reached the end of both axes f and s and 
    1594              :             // therefore now have to make one final check.
    1595           12 :             if ((endF == layout_m.getDomain().last()[f] || adom.last()[f] > bdom.last()[f]) && 
    1596            8 :                 (endS == layout_m.getDomain().last()[s] || adom.last()[s] > bdom.last()[s])) {
    1597            4 :                 elementPosB(f) = endF;
    1598            4 :                 elementPosB(s) = endS;
    1599            4 :                 auto dofIndicesB = this->getFEMVectorDOFIndices(elementPosB, ldom);
    1600            4 :                 vb[idx].push_back(dofIndicesB[idxsB[7]]);
    1601            4 :             }
    1602            4 :         };
    1603              : 
    1604              :         // The parameters are:
    1605              :         // i: The index of the other dom we are looking at (according to doms).
    1606              :         // a: Along which axis the edge is over which the exchange happens,
    1607              :         //      0 = x-axis, 1 = y-axis, 2 = z-axis.
    1608              :         // f,s: While the exchange happens over the edge along the  axis "a" we
    1609              :         //        have store in those two the other two axes.
    1610              :         // ao,bo: These are offset variables as certain exchanges require
    1611              :         //          offsets to certain values.
    1612              :         // va,vb: These are the placeholders for the sendIdxs and recvIdxs
    1613              :         //          arrays, note that depending on if an exchange happens from,
    1614              :         //          e.g. west to east or from east to west the role of which of 
    1615              :         //          these placeholders stores the sendIdxs and which one stores
    1616              :         //          the recvIdxs changes.
    1617              :         // idxsA, idxsB: These are going to be the local DOF indices of the
    1618              :         //                 elements which are part of the boundary and which
    1619              :         //                 need to be exchanged. Again we have two as the
    1620              :         //                 indices are going to depend on if we are looking at
    1621              :         //                 the sendIdxs or the recvIdxs
    1622              :         // odom: The other domain we are exchanging to.
    1623            4 :         auto negativeDiagonalExchange = [this, &neighbors, &ldom](
    1624              :             size_t i, size_t a, size_t f, size_t s, int ao, int bo,
    1625              :             std::vector<std::vector<size_t> >& va, std::vector<std::vector<size_t> >& vb,
    1626              :             const std::vector<size_t>& idxsA, const std::vector<size_t>& idxsB,
    1627              :             NDIndex<3>& odom) {
    1628              :             
    1629            0 :             neighbors.push_back(i);
    1630            0 :             va.push_back(std::vector<size_t>());
    1631            0 :             vb.push_back(std::vector<size_t>());
    1632            0 :             size_t idx = neighbors.size() - 1;
    1633              : 
    1634            0 :             indices_t elementPosA(0);
    1635            0 :             elementPosA(f) = ldom.last()[f];
    1636            0 :             elementPosA(s) = ldom.first()[s] + ao;
    1637              : 
    1638            0 :             indices_t elementPosB(0);
    1639            0 :             elementPosB(f) = ldom.last()[f];
    1640            0 :             elementPosB(s) = ldom.first()[s] + bo;
    1641              : 
    1642            0 :             int begin = std::max(odom.first()[a], ldom.first()[a]);
    1643            0 :             int end = std::min(odom.last()[a], ldom.last()[a]);
    1644              :             // Loop through all the elements along the edge.
    1645            0 :             for (int k = begin; k <= end; ++k) {
    1646            0 :                 elementPosA(a) = k;
    1647            0 :                 elementPosB(a) = k;
    1648              : 
    1649            0 :                 auto dofIndicesA = this->getFEMVectorDOFIndices(elementPosA, ldom);
    1650            0 :                 va[idx].push_back(dofIndicesA[idxsA[0]]);
    1651            0 :                 va[idx].push_back(dofIndicesA[idxsA[1]]);
    1652              : 
    1653            0 :                 auto dofIndicesB = this->getFEMVectorDOFIndices(elementPosB, ldom);
    1654            0 :                 vb[idx].push_back(dofIndicesB[idxsB[0]]);
    1655            0 :                 vb[idx].push_back(dofIndicesB[idxsB[1]]);
    1656              :             }
    1657            0 :         };
    1658              : 
    1659              : 
    1660              :         // The parameters are:
    1661              :         // i: The index of the other dom we are looking at (according to doms).
    1662              :         // a: Along which axis the edge is over which the exchange happens,
    1663              :         //      0 = x-axis, 1 = y-axis, 2 = z-axis.
    1664              :         // f,s: While the exchange happens over the edge along the  axis "a" we
    1665              :         //        have store in those two the other two axes.
    1666              :         // va,vb: These are the placeholders for the sendIdxs and recvIdxs
    1667              :         //          arrays, note that depending on if an exchange happens from,
    1668              :         //          e.g. west to east or from east to west the role of which of 
    1669              :         //          these placeholders stores the sendIdxs and which one stores
    1670              :         //          the recvIdxs changes.
    1671              :         // idxsA, idxsB: These are going to be the local DOF indices of the
    1672              :         //                 elements which are part of the boundary and which
    1673              :         //                 need to be exchanged. Again we have two as the
    1674              :         //                 indices are going to depend on if we are looking at
    1675              :         //                 the sendIdxs or the recvIdxs
    1676              :         // odom: The other domain we are exchanging to.
    1677            4 :         auto positiveDiagonalExchange = [this, &neighbors, &ldom](
    1678              :             size_t i, size_t a, size_t f, size_t s,
    1679              :             indices_t posA, indices_t posB,
    1680              :             std::vector<std::vector<size_t> >& va, std::vector<std::vector<size_t> >& vb,
    1681              :             const std::vector<size_t>& idxsA, const std::vector<size_t>& idxsB,
    1682              :             NDIndex<3>& odom) {
    1683              :             
    1684            0 :             neighbors.push_back(i);
    1685            0 :             va.push_back(std::vector<size_t>());
    1686            0 :             vb.push_back(std::vector<size_t>());
    1687            0 :             size_t idx = neighbors.size() - 1;
    1688              : 
    1689            0 :             indices_t elementPosA(0);
    1690            0 :             elementPosA(f) = posA(f);
    1691            0 :             elementPosA(s) = posA(s);
    1692              : 
    1693            0 :             indices_t elementPosB(0);
    1694            0 :             elementPosB(f) = posB(f);
    1695            0 :             elementPosB(s) = posB(s);
    1696              : 
    1697            0 :             int begin = std::max(odom.first()[a], ldom.first()[a]);
    1698            0 :             int end = std::min(odom.last()[a], ldom.last()[a]);
    1699              : 
    1700            0 :             for (int k = begin; k <= end; ++k) {
    1701            0 :                 elementPosA(a) = k;
    1702            0 :                 elementPosB(a) = k;
    1703              : 
    1704            0 :                 auto dofIndicesA = this->getFEMVectorDOFIndices(elementPosA, ldom);
    1705            0 :                 va[idx].push_back(dofIndicesA[idxsA[0]]);
    1706            0 :                 va[idx].push_back(dofIndicesA[idxsA[1]]);
    1707            0 :                 va[idx].push_back(dofIndicesA[idxsA[2]]);
    1708              : 
    1709            0 :                 auto dofIndicesB = this->getFEMVectorDOFIndices(elementPosB, ldom);
    1710            0 :                 vb[idx].push_back(dofIndicesB[idxsB[0]]);
    1711              :             }
    1712            0 :         };
    1713              : 
    1714              :         // After we now have defined the code required for each of the exchanges
    1715              :         // we can look at all the exchanges we need to make and call the lambdas
    1716              :         // with the appropriate parameters.
    1717              : 
    1718              : 
    1719              :         // Here we loop through all the domains to figure out how we are related
    1720              :         // to them and if we have to do any kind of exchange.
    1721           12 :         for (size_t i = 0; i < doms.extent(0); ++i) {
    1722            8 :             if (i == Comm->rank()) {
    1723              :                 // We are looking at ourself
    1724            4 :                 continue;
    1725              :             }
    1726            4 :             auto odom = doms(i);
    1727              : 
    1728              :             // East boundary
    1729            8 :             if (ldom.last()[0] == odom.first()[0]-1 &&
    1730           14 :                     !(odom.last()[1] < ldom.first()[1] || odom.first()[1] > ldom.last()[1]) && 
    1731            6 :                     !(odom.last()[2] < ldom.first()[2] || odom.first()[2] > ldom.last()[2])) {
    1732              :                 
    1733            2 :                 int pos = ldom.last()[0];
    1734           10 :                 flatBoundaryExchange(
    1735              :                     i, 0, 1, 2,
    1736              :                     recvIdxsTemp, sendIdxsTemp,
    1737              :                     pos, pos,
    1738              :                     {3,5,6,11}, {0,1,4,2,7,8,9,10},
    1739              :                     ldom, odom
    1740              :                 );
    1741              :             }
    1742              : 
    1743              :             // West boundary
    1744            8 :             if (ldom.first()[0] == odom.last()[0]+1 &&
    1745           14 :                     !(odom.last()[1] < ldom.first()[1] || odom.first()[1] > ldom.last()[1]) && 
    1746            6 :                     !(odom.last()[2] < ldom.first()[2] || odom.first()[2] > ldom.last()[2])) {
    1747              :                 
    1748            2 :                 int pos = ldom.first()[0];
    1749           10 :                 flatBoundaryExchange(
    1750              :                     i, 0, 1, 2,
    1751              :                     sendIdxsTemp, recvIdxsTemp,
    1752              :                     pos, pos-1,
    1753              :                     {1,4,7,9}, {0,1,4,2,7,8,9,10},
    1754              :                     odom, ldom
    1755              :                 );
    1756              :             }
    1757              : 
    1758              :             // North boundary
    1759            8 :             if (ldom.last()[1] == odom.first()[1]-1 &&
    1760           12 :                     !(odom.last()[0] < ldom.first()[0] || odom.first()[0] > ldom.last()[0]) &&
    1761            4 :                     !(odom.last()[2] < ldom.first()[2] || odom.first()[2] > ldom.last()[2])) {
    1762              : 
    1763            0 :                 int pos = ldom.last()[1];
    1764            0 :                 flatBoundaryExchange(
    1765              :                     i, 1, 0, 2,
    1766              :                     recvIdxsTemp, sendIdxsTemp,
    1767              :                     pos, pos,
    1768              :                     {2,7,6,10}, {0,1,4,3,5,8,9,11},
    1769              :                     ldom, odom
    1770              :                 );
    1771              :             }
    1772              : 
    1773              :             // South boundary
    1774            8 :             if (ldom.first()[1] == odom.last()[1]+1 &&
    1775           12 :                     !(odom.last()[0] < ldom.first()[0] || odom.first()[0] > ldom.last()[0]) &&
    1776            4 :                     !(odom.last()[2] < ldom.first()[2] || odom.first()[2] > ldom.last()[2])) {
    1777              :                 
    1778            0 :                 int pos = ldom.first()[1];
    1779            0 :                 flatBoundaryExchange(
    1780              :                     i, 1, 0, 2,
    1781              :                     sendIdxsTemp, recvIdxsTemp,
    1782              :                     pos, pos-1,
    1783              :                     {0,4,5,8}, {0,1,4,3,5,8,9,11},
    1784              :                     odom, ldom
    1785              :                 );
    1786              :                 
    1787              :             }
    1788              : 
    1789              :             // Space boundary
    1790            8 :             if (ldom.last()[2] == odom.first()[2]-1 &&
    1791           12 :                     !(odom.last()[0] < ldom.first()[0] || odom.first()[0] > ldom.last()[0]) && 
    1792            4 :                     !(odom.last()[1] < ldom.first()[1] || odom.first()[1] > ldom.last()[1])) {
    1793              :                 
    1794            0 :                 int pos = ldom.last()[2];
    1795            0 :                 flatBoundaryExchange(
    1796              :                     i, 2, 0, 1,
    1797              :                     recvIdxsTemp, sendIdxsTemp,
    1798              :                     pos, pos,
    1799              :                     {8,9,11,10}, {0,1,4,3,5,2,7,6},
    1800              :                     ldom, odom
    1801              :                 );
    1802              :             }
    1803              : 
    1804              :             // Ground boundary
    1805            8 :             if (ldom.first()[2] == odom.last()[2]+1 &&
    1806           12 :                     !(odom.last()[0] < ldom.first()[0] || odom.first()[0] > ldom.last()[0]) && 
    1807            4 :                     !(odom.last()[1] < ldom.first()[1] || odom.first()[1] > ldom.last()[1])) {
    1808              : 
    1809            0 :                 int pos = ldom.first()[2];
    1810            0 :                 flatBoundaryExchange(
    1811              :                     i, 2, 0, 1,
    1812              :                     sendIdxsTemp, recvIdxsTemp,
    1813              :                     pos, pos-1,
    1814              :                     {0,1,3,2}, {0,1,4,3,5,2,7,6},
    1815              :                     odom, ldom
    1816              :                 );
    1817              :             }
    1818              : 
    1819              : 
    1820              :             
    1821              :             // Next up we handle all the annoying diagonals.
    1822              :             // The negative ones:
    1823              :             // Parallel to y from space to ground, west to east
    1824            8 :             if (ldom.last()[0] == odom.first()[0]-1 && ldom.first()[2] == odom.last()[2]+1 && 
    1825            4 :                     !(odom.last()[1] < ldom.first()[1] || odom.first()[1] > ldom.last()[1])) {
    1826              :                 
    1827            0 :                 negativeDiagonalExchange(
    1828              :                     i, 1, 0, 2, 0, -1,
    1829              :                     sendIdxsTemp, recvIdxsTemp,
    1830              :                     {0,1}, {3,5},
    1831              :                     odom
    1832              :                 );
    1833              :             }
    1834              : 
    1835              :             // Parallel to y from ground to space, east to west
    1836            8 :             if (ldom.first()[0] == odom.last()[0]+1 && ldom.last()[2] == odom.first()[2]-1 && 
    1837            4 :                     !(odom.last()[1] < ldom.first()[1] || odom.first()[1] > ldom.last()[1])) {
    1838              :                 
    1839            0 :                 negativeDiagonalExchange(
    1840              :                     i, 1, 2, 0, -1, 0,
    1841              :                     recvIdxsTemp, sendIdxsTemp,
    1842              :                     {8,9}, {1,4},
    1843              :                     odom
    1844              :                 );
    1845              :             }
    1846              : 
    1847              : 
    1848              :             // Parallel to x from space to ground, south to north
    1849            8 :             if (ldom.last()[1] == odom.first()[1]-1 && ldom.first()[2] == odom.last()[2]+1 && 
    1850            4 :                     !(odom.last()[0] < ldom.first()[0] || odom.first()[0] > ldom.last()[0])) {
    1851            0 :                 negativeDiagonalExchange(
    1852              :                     i, 0, 1, 2, 0, -1,
    1853              :                     sendIdxsTemp, recvIdxsTemp,
    1854              :                     {0,1}, {2,7},
    1855              :                     odom
    1856              :                 );
    1857              :             }
    1858              : 
    1859              :             // Parallel to x from ground to space, north to south
    1860            8 :             if (ldom.first()[1] == odom.last()[1]+1 && ldom.last()[2] == odom.first()[2]-1 && 
    1861            4 :                     !(odom.last()[0] < ldom.first()[0] || odom.first()[0] > ldom.last()[0])) {
    1862            0 :                 negativeDiagonalExchange(
    1863              :                     i, 0, 2, 1, -1, 0,
    1864              :                     recvIdxsTemp, sendIdxsTemp,
    1865              :                     {8,9}, {0,4},
    1866              :                     odom
    1867              :                 );
    1868              :             }
    1869              : 
    1870              : 
    1871              :             // Parallel to z from west to east, north to south
    1872            8 :             if (ldom.last()[0] == odom.first()[0]-1 && ldom.first()[1] == odom.last()[1]+1 && 
    1873            4 :                     !(odom.last()[2] < ldom.first()[2] || odom.first()[2] > ldom.last()[2])) {
    1874            0 :                 negativeDiagonalExchange(
    1875              :                     i, 2, 0, 1, 0, -1,
    1876              :                     sendIdxsTemp, recvIdxsTemp,
    1877              :                     {0,4}, {3,5},
    1878              :                     odom
    1879              :                 );
    1880              :             }
    1881              : 
    1882              :             // Parallel to z from east to west, south to north
    1883            8 :             if (ldom.first()[0] == odom.last()[0]+1 && ldom.last()[1] == odom.first()[1]-1 && 
    1884            4 :                     !(odom.last()[2] < ldom.first()[2] || odom.first()[2] > ldom.last()[2])) {
    1885            0 :                 negativeDiagonalExchange(
    1886              :                     i, 2, 1, 0, -1, 0,
    1887              :                     recvIdxsTemp, sendIdxsTemp,
    1888              :                     {2,7}, {1,4},
    1889              :                     odom
    1890              :                 );
    1891              :             }
    1892              : 
    1893              : 
    1894              : 
    1895              :             // The positive ones
    1896              :             // Parallel to y from ground to space, west to east
    1897            8 :             if (ldom.last()[0] == odom.first()[0]-1 && ldom.last()[2] == odom.first()[2]-1 && 
    1898            4 :                     !(odom.last()[1] < ldom.first()[1] || odom.first()[1] > ldom.last()[1])) {
    1899            0 :                 positiveDiagonalExchange(
    1900              :                     i, 1, 0, 2,
    1901            0 :                     ldom.last(), ldom.last(),
    1902              :                     sendIdxsTemp, recvIdxsTemp,
    1903              :                     {0,1,4}, {11},
    1904              :                     odom
    1905              :                 );
    1906              :             }
    1907              : 
    1908              :             // Parallel to y from space to ground, east to west
    1909            8 :             if (ldom.first()[0] == odom.last()[0]+1 && ldom.first()[2] == odom.last()[2]+1 && 
    1910            4 :                     !(odom.last()[1] < ldom.first()[1] || odom.first()[1] > ldom.last()[1])) {
    1911            0 :                 positiveDiagonalExchange(
    1912              :                     i, 1, 0, 2,
    1913            0 :                     ldom.first()-1, ldom.first(),
    1914              :                     recvIdxsTemp, sendIdxsTemp,
    1915              :                     {0,1,4}, {1},
    1916              :                     odom
    1917              :                 );
    1918              :             }
    1919              : 
    1920              : 
    1921              :             // Parallel to x from ground to space, south to north
    1922            8 :             if (ldom.last()[1] == odom.first()[1]-1 && ldom.last()[2] == odom.first()[2]-1 && 
    1923            4 :                     !(odom.last()[0] < ldom.first()[0] || odom.first()[0] > ldom.last()[0])) {
    1924            0 :                 positiveDiagonalExchange(
    1925              :                     i, 0, 1, 2,
    1926            0 :                     ldom.last(), ldom.last(),
    1927              :                     sendIdxsTemp, recvIdxsTemp,
    1928              :                     {0,1,4}, {10},
    1929              :                     odom
    1930              :                 );
    1931              :             }
    1932              : 
    1933              :             // Parallel to x from space to ground, north to south
    1934            8 :             if (ldom.first()[1] == odom.last()[1]+1 && ldom.first()[2] == odom.last()[2]+1 && 
    1935            4 :                     !(odom.last()[0] < ldom.first()[0] || odom.first()[0] > ldom.last()[0])) {
    1936            0 :                 positiveDiagonalExchange(
    1937              :                     i, 0, 1, 2,
    1938            0 :                     ldom.first()-1, ldom.first(),
    1939              :                     recvIdxsTemp, sendIdxsTemp,
    1940              :                     {0,1,4}, {0},
    1941              :                     odom
    1942              :                 );
    1943              :             }
    1944              : 
    1945              : 
    1946              :             // Parallel to z from west to east, south to north
    1947            8 :             if (ldom.last()[0] == odom.first()[0]-1 && ldom.last()[1] == odom.first()[1]-1 && 
    1948            4 :                     !(odom.last()[2] < ldom.first()[2] || odom.first()[2] > ldom.last()[2])) {
    1949            0 :                 positiveDiagonalExchange(
    1950              :                     i, 2, 0, 1,
    1951            0 :                     ldom.last(), ldom.last(),
    1952              :                     sendIdxsTemp, recvIdxsTemp,
    1953              :                     {0,1,4}, {6},
    1954              :                     odom
    1955              :                 );
    1956              :             }
    1957              : 
    1958              :             // Parallel to z from east to west, north to south
    1959            8 :             if (ldom.first()[0] == odom.last()[0]+1 && ldom.first()[1] == odom.last()[1]+1 && 
    1960            4 :                     !(odom.last()[2] < ldom.first()[2] || odom.first()[2] > ldom.last()[2])) {
    1961            0 :                 positiveDiagonalExchange(
    1962              :                     i, 2, 0, 1,
    1963            0 :                     ldom.first()-1, ldom.first(),
    1964              :                     recvIdxsTemp, sendIdxsTemp,
    1965              :                     {0,1,4}, {4},
    1966              :                     odom
    1967              :                 );
    1968              :             }
    1969              :             
    1970              :         }
    1971              :         
    1972              : 
    1973              : 
    1974              : 
    1975              :         // Here we now have to translate the sendIdxsTemp and recvIdxsTemp which
    1976              :         // are std::vectors<std::vector> into the correct list type which
    1977              :         // is std::vector<Kokkos::View>
    1978            8 :         for (size_t i = 0; i < neighbors.size(); ++i) {
    1979            4 :             sendIdxs.push_back(Kokkos::View<size_t*>("FEMvector::sendIdxs[" + std::to_string(i) +
    1980            4 :                                                         "]", sendIdxsTemp[i].size()));
    1981            4 :             recvIdxs.push_back(Kokkos::View<size_t*>("FEMvector::recvIdxs[" + std::to_string(i) +
    1982            4 :                                                         "]", recvIdxsTemp[i].size()));
    1983            4 :             auto sendView = sendIdxs[i];
    1984            4 :             auto recvView = recvIdxs[i];
    1985            4 :             auto hSendView = Kokkos::create_mirror_view(sendView);
    1986            4 :             auto hRecvView = Kokkos::create_mirror_view(recvView);
    1987              :             
    1988          132 :             for (size_t j = 0; j < sendIdxsTemp[i].size(); ++j) {
    1989          256 :                 hSendView(j) = sendIdxsTemp[i][j];
    1990              :             }
    1991              :             
    1992          132 :             for (size_t j = 0; j < recvIdxsTemp[i].size(); ++j) {
    1993          256 :                 hRecvView(j) = recvIdxsTemp[i][j];
    1994              :             }
    1995              : 
    1996            4 :             Kokkos::deep_copy(sendView, hSendView);
    1997            4 :             Kokkos::deep_copy(recvView, hRecvView);
    1998              :         }
    1999              :         
    2000              : 
    2001              :         
    2002              :         // Now finaly create the FEMVector
    2003            4 :         indices_t extents(0);
    2004            4 :         extents = (ldom.last() - ldom.first()) + 3;
    2005            4 :         size_t nx = extents(0);
    2006            4 :         size_t ny = extents(1);
    2007            4 :         size_t nz = extents(2);
    2008            4 :         size_t n = (nz-1)*(nx*(ny-1) + ny*(nx-1) + nx*ny) + nx*(ny-1) + ny*(nx-1);
    2009            4 :         FEMVector<T> vec(n, neighbors, sendIdxs, recvIdxs);
    2010              :         
    2011            4 :         return vec;
    2012            4 :     }
    2013              : 
    2014              : 
    2015              : 
    2016              : }  // namespace ippl
         |