LCOV - code coverage report
Current view: top level - src/FEM - LagrangeSpace.hpp (source / functions) Coverage Total Hit
Test: final_report.info Lines: 28.8 % 725 209
Test Date: 2025-09-01 09:40:18 Functions: 51.4 % 331 170

            Line data    Source code
       1              : 
       2              : namespace ippl {
       3              : 
       4              :     // LagrangeSpace constructor, which calls the FiniteElementSpace constructor,
       5              :     // and decomposes the elements among ranks according to layout.
       6              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
       7              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
       8          408 :     LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::LagrangeSpace(
       9              :         UniformCartesian<T, Dim>& mesh, ElementType& ref_element, const QuadratureType& quadrature,
      10              :         const Layout_t& layout)
      11              :         : FiniteElementSpace<T, Dim, getLagrangeNumElementDOFs(Dim, Order), ElementType,
      12          408 :                              QuadratureType, FieldLHS, FieldRHS>(mesh, ref_element, quadrature) {
      13              :         // Assert that the dimension is either 1, 2 or 3.
      14              :         static_assert(Dim >= 1 && Dim <= 3,
      15              :                       "Finite Element space only supports 1D, 2D and 3D meshes");
      16              : 
      17              :         // Initialize the elementIndices view
      18          408 :         initializeElementIndices(layout);
      19          408 :     }
      20              : 
      21              :     // LagrangeSpace constructor, which calls the FiniteElementSpace constructor.
      22              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
      23              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
      24           12 :     LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::LagrangeSpace(
      25              :         UniformCartesian<T, Dim>& mesh, ElementType& ref_element, const QuadratureType& quadrature)
      26              :         : FiniteElementSpace<T, Dim, getLagrangeNumElementDOFs(Dim, Order), ElementType,
      27           12 :                              QuadratureType, FieldLHS, FieldRHS>(mesh, ref_element, quadrature) {
      28              :         // Assert that the dimension is either 1, 2 or 3.
      29              :         static_assert(Dim >= 1 && Dim <= 3,
      30              :                       "Finite Element space only supports 1D, 2D and 3D meshes");
      31           12 :     }
      32              : 
      33              :     // LagrangeSpace initializer, to be made available to the FEMPoissonSolver 
      34              :     // such that we can call it from setRhs.
      35              :     // Sets the correct mesh ad decomposes the elements among ranks according to layout.
      36              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
      37              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
      38            0 :     void LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::initialize(
      39              :         UniformCartesian<T, Dim>& mesh, const Layout_t& layout)
      40              :     {
      41              :         FiniteElementSpace<T, Dim, getLagrangeNumElementDOFs(Dim, Order), ElementType,
      42            0 :                            QuadratureType, FieldLHS, FieldRHS>::setMesh(mesh);
      43              : 
      44              :         // Initialize the elementIndices view
      45            0 :         initializeElementIndices(layout);
      46            0 :     }
      47              : 
      48              :     // Initialize element indices Kokkos View by distributing elements among MPI ranks.
      49              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
      50              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
      51          408 :     void LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
      52              :                        FieldRHS>::initializeElementIndices(const Layout_t& layout) {
      53          408 :         const auto& ldom = layout.getLocalNDIndex();
      54          408 :         int npoints      = ldom.size();
      55          408 :         auto first       = ldom.first();
      56          408 :         auto last        = ldom.last();
      57          408 :         ippl::Vector<double, Dim> bounds;
      58              : 
      59         1260 :         for (size_t d = 0; d < Dim; ++d) {
      60          852 :             bounds[d] = this->nr_m[d] - 1;
      61              :         }
      62              : 
      63          408 :         int upperBoundaryPoints = -1;
      64              : 
      65              :         // We iterate over the local domain points, getting the corresponding elements, 
      66              :         // while tagging upper boundary points such that they can be removed after.
      67          408 :         Kokkos::View<size_t*> points("npoints", npoints);
      68          408 :         Kokkos::parallel_reduce(
      69            0 :             "ComputePoints", npoints,
      70        10758 :             KOKKOS_CLASS_LAMBDA(const int i, int& local) {
      71         9942 :                 int idx = i;
      72         9942 :                 indices_t val;
      73         9942 :                 bool isBoundary = false;
      74        37710 :                 for (unsigned int d = 0; d < Dim; ++d) {
      75        27768 :                     int range = last[d] - first[d] + 1;
      76        27768 :                     val[d]    = first[d] + (idx % range);
      77        27768 :                     idx /= range;
      78        27768 :                     if (val[d] == bounds[d]) {
      79         5358 :                         isBoundary = true;
      80              :                     }
      81              :                 }
      82         9942 :                 points(i) = (!isBoundary) * (this->getElementIndex(val));
      83         9942 :                 local += isBoundary;
      84         9942 :             },
      85          816 :             Kokkos::Sum<int>(upperBoundaryPoints));
      86          408 :         Kokkos::fence();
      87              : 
      88              :         // The elementIndices will be the same array as computed above,
      89              :         // with the tagged upper boundary points removed.
      90          408 :         int elementsPerRank = npoints - upperBoundaryPoints;
      91          408 :         elementIndices      = Kokkos::View<size_t*>("i", elementsPerRank);
      92          408 :         Kokkos::View<size_t> index("index");
      93              : 
      94          408 :         Kokkos::parallel_for(
      95        20700 :             "RemoveNaNs", npoints, KOKKOS_CLASS_LAMBDA(const int i) {
      96        19884 :                 if ((points(i) != 0) || (i == 0)) {
      97        11124 :                     const size_t idx    = Kokkos::atomic_fetch_add(&index(), 1);
      98        16686 :                     elementIndices(idx) = points(i);
      99              :                 }
     100              :             });
     101          408 :     }
     102              : 
     103              :     ///////////////////////////////////////////////////////////////////////
     104              :     /// Degree of Freedom operations //////////////////////////////////////
     105              :     ///////////////////////////////////////////////////////////////////////
     106              : 
     107              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     108              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     109           12 :     KOKKOS_FUNCTION size_t LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     110              :                                          FieldRHS>::numGlobalDOFs() const {
     111           12 :         size_t num_global_dofs = 1;
     112           36 :         for (size_t d = 0; d < Dim; ++d) {
     113           24 :             num_global_dofs *= this->nr_m[d] * Order;
     114              :         }
     115              : 
     116           12 :         return num_global_dofs;
     117              :     }
     118              : 
     119              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     120              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     121              :     KOKKOS_FUNCTION
     122       113032 :     size_t LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::getLocalDOFIndex
     123              :     (const size_t& elementIndex, const size_t& globalDOFIndex) const {
     124              :         static_assert(Dim == 1 || Dim == 2 || Dim == 3, "Dim must be 1, 2 or 3");
     125              :         // TODO fix not order independent, only works for order 1
     126              :         static_assert(Order == 1, "Only order 1 is supported at the moment");
     127              : 
     128              :         // Get all the global DOFs for the element
     129       113032 :         const Vector<size_t, numElementDOFs> global_dofs =
     130       113032 :             this->getGlobalDOFIndices(elementIndex);
     131              : 
     132              :         // Find the global DOF in the vector and return the local DOF index
     133              :         // TODO this can be done faster since the global DOFs are sorted
     134       399232 :         for (size_t i = 0; i < global_dofs.dim; ++i) {
     135       398536 :             if (global_dofs[i] == globalDOFIndex) {
     136       112336 :                 return i;
     137              :             }
     138              :         }
     139          696 :         return std::numeric_limits<size_t>::quiet_NaN();
     140              :         //throw IpplException("LagrangeSpace::getLocalDOFIndex()",
     141              :         //                    "FEM Lagrange Space: Global DOF not found in specified element");
     142       113032 :     }
     143              : 
     144              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     145              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     146              :     KOKKOS_FUNCTION size_t
     147          336 :     LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     148              :                   FieldRHS>::getGlobalDOFIndex(const size_t& elementIndex,
     149              :                                                const size_t& localDOFIndex) const {
     150          336 :         const auto global_dofs = this->getGlobalDOFIndices(elementIndex);
     151              : 
     152          672 :         return global_dofs[localDOFIndex];
     153          336 :     }
     154              : 
     155              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     156              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     157              :     KOKKOS_FUNCTION Vector<size_t, LagrangeSpace<T, Dim, Order, ElementType, QuadratureType,
     158              :                                                  FieldLHS, FieldRHS>::numElementDOFs>
     159          452 :     LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     160              :                   FieldRHS>::getLocalDOFIndices() const {
     161          452 :         Vector<size_t, numElementDOFs> localDOFs;
     162              : 
     163         3756 :         for (size_t dof = 0; dof < numElementDOFs; ++dof) {
     164         3304 :             localDOFs[dof] = dof;
     165              :         }
     166              : 
     167          452 :         return localDOFs;
     168              :     }
     169              : 
     170              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     171              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     172              :     KOKKOS_FUNCTION Vector<size_t, LagrangeSpace<T, Dim, Order, ElementType, QuadratureType,
     173              :                                                  FieldLHS, FieldRHS>::numElementDOFs>
     174       137820 :     LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     175              :                   FieldRHS>::getGlobalDOFIndices(const size_t& elementIndex) const {
     176       137820 :         Vector<size_t, numElementDOFs> globalDOFs(0);
     177              : 
     178              :         // get element pos
     179       137820 :         indices_t elementPos = this->getElementNDIndex(elementIndex);
     180              : 
     181              :         // Compute the vector to multiply the ndindex with
     182       137820 :         ippl::Vector<size_t, Dim> vec(1);
     183       325080 :         for (size_t d = 1; d < dim; ++d) {
     184       448028 :             for (size_t d2 = d; d2 < Dim; ++d2) {
     185       260768 :                 vec[d2] *= this->nr_m[d - 1];
     186              :             }
     187              :         }
     188       137820 :         vec *= Order;  // Multiply each dimension by the order
     189       137820 :         size_t smallestGlobalDOF = elementPos.dot(vec);
     190              : 
     191              :         // Add the vertex DOFs
     192       137820 :         globalDOFs[0] = smallestGlobalDOF;
     193       137820 :         globalDOFs[1] = smallestGlobalDOF + Order;
     194              : 
     195              :         if (Dim >= 2) {
     196       113752 :             globalDOFs[2] = globalDOFs[1] + this->nr_m[1] * Order;
     197       113752 :             globalDOFs[3] = globalDOFs[0] + this->nr_m[1] * Order;
     198              :         }
     199              :         if (Dim >= 3) {
     200        73508 :             globalDOFs[4] = globalDOFs[0] + this->nr_m[1] * this->nr_m[2] * Order;
     201        73508 :             globalDOFs[5] = globalDOFs[1] + this->nr_m[1] * this->nr_m[2] * Order;
     202        73508 :             globalDOFs[6] = globalDOFs[2] + this->nr_m[1] * this->nr_m[2] * Order;
     203        73508 :             globalDOFs[7] = globalDOFs[3] + this->nr_m[1] * this->nr_m[2] * Order;
     204              :         }
     205              : 
     206              :         if (Order > 1) {
     207              :             // If the order is greater than 1, there are edge and face DOFs, otherwise the work is
     208              :             // done
     209              : 
     210              :             // Add the edge DOFs
     211              :             if (Dim >= 2) {
     212              :                 for (size_t i = 0; i < Order - 1; ++i) {
     213              :                     globalDOFs[8 + i]                   = globalDOFs[0] + i + 1;
     214              :                     globalDOFs[8 + Order - 1 + i]       = globalDOFs[1] + (i + 1) * this->nr_m[1];
     215              :                     globalDOFs[8 + 2 * (Order - 1) + i] = globalDOFs[2] - (i + 1);
     216              :                     globalDOFs[8 + 3 * (Order - 1) + i] = globalDOFs[3] - (i + 1) * this->nr_m[1];
     217              :                 }
     218              :             }
     219              :             if (Dim >= 3) {
     220              :                 // TODO
     221              :             }
     222              : 
     223              :             // Add the face DOFs
     224              :             if (Dim >= 2) {
     225              :                 for (size_t i = 0; i < Order - 1; ++i) {
     226              :                     for (size_t j = 0; j < Order - 1; ++j) {
     227              :                         // TODO CHECK
     228              :                         globalDOFs[8 + 4 * (Order - 1) + i * (Order - 1) + j] =
     229              :                             globalDOFs[0] + (i + 1) + (j + 1) * this->nr_m[1];
     230              :                         globalDOFs[8 + 4 * (Order - 1) + (Order - 1) * (Order - 1) + i * (Order - 1)
     231              :                                    + j] = globalDOFs[1] + (i + 1) + (j + 1) * this->nr_m[1];
     232              :                         globalDOFs[8 + 4 * (Order - 1) + 2 * (Order - 1) * (Order - 1)
     233              :                                    + i * (Order - 1) + j] =
     234              :                             globalDOFs[2] - (i + 1) + (j + 1) * this->nr_m[1];
     235              :                         globalDOFs[8 + 4 * (Order - 1) + 3 * (Order - 1) * (Order - 1)
     236              :                                    + i * (Order - 1) + j] =
     237              :                             globalDOFs[3] - (i + 1) + (j + 1) * this->nr_m[1];
     238              :                     }
     239              :                 }
     240              :             }
     241              :         }
     242              : 
     243       137820 :         return globalDOFs;
     244       137820 :     }
     245              : 
     246              :     ///////////////////////////////////////////////////////////////////////
     247              :     /// Basis functions and gradients /////////////////////////////////////
     248              :     ///////////////////////////////////////////////////////////////////////
     249              : 
     250              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     251              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     252              :     KOKKOS_FUNCTION T
     253       379040 :     LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::
     254              :         evaluateRefElementShapeFunction(
     255              :             const size_t& localDOF,
     256              :             const LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     257              :                                 FieldRHS>::point_t& localPoint) const {
     258              :         static_assert(Order == 1, "Only order 1 is supported at the moment");
     259              :         // Assert that the local vertex index is valid.
     260       379040 :         assert(localDOF < numElementDOFs
     261              :                && "The local vertex index is invalid");  // TODO assumes 1st order Lagrange
     262              : 
     263       379040 :         assert(this->ref_element_m.isPointInRefElement(localPoint)
     264              :                && "Point is not in reference element");
     265              : 
     266              :         // Get the local vertex indices for the local vertex index.
     267              :         // TODO fix not order independent, only works for order 1
     268       379040 :         const point_t ref_element_point = this->ref_element_m.getLocalVertices()[localDOF];
     269              : 
     270              :         // The variable that accumulates the product of the shape functions.
     271       379040 :         T product = 1;
     272              : 
     273      1444944 :         for (size_t d = 0; d < Dim; d++) {
     274      1065904 :             if (localPoint[d] < ref_element_point[d]) {
     275       532952 :                 product *= localPoint[d];
     276              :             } else {
     277       532952 :                 product *= 1.0 - localPoint[d];
     278              :             }
     279              :         }
     280              : 
     281       379040 :         return product;
     282       379040 :     }
     283              : 
     284              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     285              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     286              :     KOKKOS_FUNCTION typename LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     287              :                                            FieldRHS>::point_t
     288       262600 :     LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::
     289              :         evaluateRefElementShapeFunctionGradient(
     290              :             const size_t& localDOF,
     291              :             const LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     292              :                                 FieldRHS>::point_t& localPoint) const {
     293              :         // TODO fix not order independent, only works for order 1
     294              :         static_assert(Order == 1 && "Only order 1 is supported at the moment");
     295              : 
     296              :         // Assert that the local vertex index is valid.
     297       262600 :         assert(localDOF < numElementDOFs && "The local vertex index is invalid");
     298              : 
     299       262600 :         assert(this->ref_element_m.isPointInRefElement(localPoint)
     300              :                && "Point is not in reference element");
     301              : 
     302              :         // Get the local dof nd_index
     303       262600 :         const vertex_points_t local_vertex_points = this->ref_element_m.getLocalVertices();
     304              : 
     305       262600 :         const point_t& local_vertex_point = local_vertex_points[localDOF];
     306              : 
     307       262600 :         point_t gradient(1);
     308              : 
     309              :         // To construct the gradient we need to loop over the dimensions and multiply the
     310              :         // shape functions in each dimension except the current one. The one of the current
     311              :         // dimension is replaced by the derivative of the shape function in that dimension,
     312              :         // which is either 1 or -1.
     313      1043664 :         for (size_t d = 0; d < Dim; d++) {
     314              :             // The variable that accumulates the product of the shape functions.
     315       781064 :             T product = 1;
     316              : 
     317      3111120 :             for (size_t d2 = 0; d2 < Dim; d2++) {
     318      2330056 :                 if (d2 == d) {
     319       781064 :                     if (localPoint[d] < local_vertex_point[d]) {
     320       390532 :                         product *= 1;
     321              :                     } else {
     322       390532 :                         product *= -1;
     323              :                     }
     324              :                 } else {
     325      1548992 :                     if (localPoint[d2] < local_vertex_point[d2]) {
     326       774496 :                         product *= localPoint[d2];
     327              :                     } else {
     328       774496 :                         product *= 1.0 - localPoint[d2];
     329              :                     }
     330              :                 }
     331              :             }
     332              : 
     333       781064 :             gradient[d] = product;
     334              :         }
     335              : 
     336       262600 :         return gradient;
     337       262600 :     }
     338              : 
     339              :     ///////////////////////////////////////////////////////////////////////
     340              :     /// Assembly operations ///////////////////////////////////////////////
     341              :     ///////////////////////////////////////////////////////////////////////
     342              : 
     343              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     344              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     345              :     template <typename F>
     346            8 :     FieldLHS LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     347              :                            FieldRHS>::evaluateAx(FieldLHS& field, F& evalFunction) const {
     348            8 :         Inform m("");
     349              : 
     350              :         // start a timer
     351            8 :         static IpplTimings::TimerRef evalAx = IpplTimings::getTimer("evaluateAx");
     352            8 :         IpplTimings::startTimer(evalAx);
     353              : 
     354              :         // get number of ghost cells in field
     355            8 :         const int nghost = field.getNghost();
     356              : 
     357              :         // create a new field for result with view initialized to zero (views are initialized to
     358              :         // zero by default)
     359            8 :         FieldLHS resultField(field.get_mesh(), field.getLayout(), nghost);
     360              : 
     361              :         // List of quadrature weights
     362            8 :         const Vector<T, QuadratureType::numElementNodes> w =
     363            8 :             this->quadrature_m.getWeightsForRefElement();
     364              : 
     365              :         // List of quadrature nodes
     366            8 :         const Vector<point_t, QuadratureType::numElementNodes> q =
     367            8 :             this->quadrature_m.getIntegrationNodesForRefElement();
     368              : 
     369              :         // TODO move outside of evaluateAx (I think it is possible for other problems as well)
     370              :         // Gradients of the basis functions for the DOF at the quadrature nodes
     371            8 :         Vector<Vector<point_t, numElementDOFs>, QuadratureType::numElementNodes> grad_b_q;
     372           16 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     373           48 :             for (size_t i = 0; i < numElementDOFs; ++i) {
     374           40 :                 grad_b_q[k][i] = this->evaluateRefElementShapeFunctionGradient(i, q[k]);
     375              :             }
     376              :         }
     377              : 
     378              :         // Make local element matrix -- does not change through the element mesh
     379              :         // Element matrix
     380            8 :         Vector<Vector<T, numElementDOFs>, numElementDOFs> A_K;
     381              : 
     382              :         // 1. Compute the Galerkin element matrix A_K
     383           48 :         for (size_t i = 0; i < numElementDOFs; ++i) {
     384          312 :             for (size_t j = 0; j < numElementDOFs; ++j) {
     385          272 :                 A_K[i][j] = 0.0;
     386          544 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     387          272 :                     A_K[i][j] += w[k] * evalFunction(i, j, grad_b_q[k]);
     388              :                 }
     389              :             }
     390              :         }
     391              : 
     392              :         // Get field data and atomic result data,
     393              :         // since it will be added to during the kokkos loop
     394            8 :         ViewType view             = field.getView();
     395            8 :         AtomicViewType resultView = resultField.getView();
     396              : 
     397              :         // Get boundary conditions from field
     398            8 :         BConds<FieldLHS, Dim>& bcField = field.getFieldBC();
     399            8 :         FieldBC bcType = bcField[0]->getBCType();
     400              : 
     401              :         // Get domain information
     402            8 :         auto ldom = (field.getLayout()).getLocalNDIndex();
     403              : 
     404              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
     405              :         using policy_type = Kokkos::RangePolicy<exec_space>;
     406              : 
     407              :         // start a timer
     408            8 :         static IpplTimings::TimerRef outer_loop = IpplTimings::getTimer("evaluateAx: outer loop");
     409            8 :         IpplTimings::startTimer(outer_loop);
     410              : 
     411              :         // Loop over elements to compute contributions
     412            8 :         Kokkos::parallel_for(
     413           16 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
     414          152 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
     415            0 :                 const size_t elementIndex                            = elementIndices(index);
     416          136 :                 const Vector<size_t, numElementDOFs> local_dof = this->getLocalDOFIndices();
     417            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
     418          136 :                     this->getGlobalDOFIndices(elementIndex);
     419          136 :                 Vector<indices_t, numElementDOFs> global_dof_ndindices;
     420              : 
     421         1176 :                 for (size_t i = 0; i < numElementDOFs; ++i) {
     422         1040 :                     global_dof_ndindices[i] = this->getMeshVertexNDIndex(global_dofs[i]);
     423              :                 }
     424              : 
     425              :                 // local DOF indices (both i and j go from 0 to numDOFs-1 in the element)
     426              :                 size_t i, j;
     427              : 
     428              :                 // global DOF n-dimensional indices (Vector of N indices representing indices in
     429              :                 // each dimension)
     430          136 :                 indices_t I_nd, J_nd;
     431              : 
     432              :                 // 2. Compute the contribution to resultAx = A*x with A_K
     433         1176 :                 for (i = 0; i < numElementDOFs; ++i) {
     434            0 :                     I_nd = global_dof_ndindices[i];
     435              : 
     436              :                     // Handle boundary DOFs
     437              :                     // If Zero Dirichlet BCs, skip this DOF
     438              :                     // If Constant Dirichlet BCs, identity
     439         1040 :                     if ((bcType == CONSTANT_FACE) && (this->isDOFOnBoundary(I_nd))) {
     440            0 :                         for (unsigned d = 0; d < Dim; ++d) {
     441            0 :                             I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     442              :                         }
     443            0 :                         apply(resultView, I_nd) =  apply(view, I_nd);
     444            0 :                         continue;
     445         1040 :                     } else if ((bcType == ZERO_FACE) && (this->isDOFOnBoundary(I_nd))) {
     446            0 :                         continue;
     447              :                     }
     448              : 
     449              :                     // get the appropriate index for the Kokkos view of the field
     450         1752 :                     for (unsigned d = 0; d < Dim; ++d) {
     451            0 :                         I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     452              :                     }
     453              : 
     454         3924 :                     for (j = 0; j < numElementDOFs; ++j) {
     455            0 :                         J_nd = global_dof_ndindices[j];
     456              : 
     457              :                         // Skip boundary DOFs (Zero & Constant Dirichlet BCs)
     458            0 :                         if (((bcType == ZERO_FACE) || (bcType == CONSTANT_FACE)) 
     459         3480 :                             && this->isDOFOnBoundary(J_nd)) {
     460            0 :                             continue;
     461              :                         }
     462              : 
     463              :                         // get the appropriate index for the Kokkos view of the field
     464         8040 :                         for (unsigned d = 0; d < Dim; ++d) {
     465            0 :                             J_nd[d] = J_nd[d] - ldom[d].first() + nghost;
     466              :                         }
     467              : 
     468         2020 :                         apply(resultView, I_nd) += A_K[i][j] * apply(view, J_nd);
     469              :                     }
     470              :                 }
     471            0 :             });
     472            8 :         IpplTimings::stopTimer(outer_loop);
     473              : 
     474            8 :         if (bcType == PERIODIC_FACE) {
     475            0 :             resultField.accumulateHalo();
     476            0 :             bcField.apply(resultField);
     477            0 :             bcField.assignGhostToPhysical(resultField);
     478              :         } else {
     479            8 :             resultField.accumulateHalo_noghost();
     480              :         }
     481              : 
     482            8 :         IpplTimings::stopTimer(evalAx);
     483              : 
     484            8 :         return resultField;
     485            8 :     }
     486              : 
     487              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     488              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     489              :     template <typename F>
     490            0 :     FieldLHS LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     491              :                            FieldRHS>::evaluateAx_lower(FieldLHS& field, F& evalFunction) const {
     492            0 :         Inform m("");
     493              : 
     494              :         // start a timer
     495            0 :         static IpplTimings::TimerRef evalAx = IpplTimings::getTimer("evaluateAx");
     496            0 :         IpplTimings::startTimer(evalAx);
     497              : 
     498              :         // get number of ghost cells in field
     499            0 :         const int nghost = field.getNghost();
     500              : 
     501              :         // create a new field for result with view initialized to zero (views are initialized to
     502              :         // zero by default)
     503            0 :         FieldLHS resultField(field.get_mesh(), field.getLayout(), nghost);
     504              : 
     505              :         // List of quadrature weights
     506            0 :         const Vector<T, QuadratureType::numElementNodes> w =
     507            0 :             this->quadrature_m.getWeightsForRefElement();
     508              : 
     509              :         // List of quadrature nodes
     510            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
     511            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
     512              : 
     513              :         // TODO move outside of evaluateAx (I think it is possible for other problems as well)
     514              :         // Gradients of the basis functions for the DOF at the quadrature nodes
     515            0 :         Vector<Vector<point_t, numElementDOFs>, QuadratureType::numElementNodes> grad_b_q;
     516            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     517            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
     518            0 :                 grad_b_q[k][i] = this->evaluateRefElementShapeFunctionGradient(i, q[k]);
     519              :             }
     520              :         }
     521              : 
     522              :         // Make local element matrix -- does not change through the element mesh
     523              :         // Element matrix
     524            0 :         Vector<Vector<T, numElementDOFs>, numElementDOFs> A_K;
     525              : 
     526              :         // 1. Compute the Galerkin element matrix A_K
     527            0 :         for (size_t i = 0; i < numElementDOFs; ++i) {
     528            0 :             for (size_t j = 0; j < numElementDOFs; ++j) {
     529            0 :                 A_K[i][j] = 0.0;
     530            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     531            0 :                     A_K[i][j] += w[k] * evalFunction(i, j, grad_b_q[k]);
     532              :                 }
     533              :             }
     534              :         }
     535              : 
     536              :         // Get field data and atomic result data,
     537              :         // since it will be added to during the kokkos loop
     538            0 :         ViewType view             = field.getView();
     539            0 :         AtomicViewType resultView = resultField.getView();
     540              : 
     541              :         // Get boundary conditions from field
     542            0 :         BConds<FieldLHS, Dim>& bcField = field.getFieldBC();
     543            0 :         FieldBC bcType = bcField[0]->getBCType();
     544              : 
     545              :         // Get domain information
     546            0 :         auto ldom = (field.getLayout()).getLocalNDIndex();
     547              : 
     548              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
     549              :         using policy_type = Kokkos::RangePolicy<exec_space>;
     550              : 
     551              :         // start a timer
     552            0 :         static IpplTimings::TimerRef outer_loop = IpplTimings::getTimer("evaluateAx: outer loop");
     553            0 :         IpplTimings::startTimer(outer_loop);
     554              : 
     555              :         // Loop over elements to compute contributions
     556            0 :         Kokkos::parallel_for(
     557            0 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
     558            0 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
     559            0 :                 const size_t elementIndex                            = elementIndices(index);
     560            0 :                 const Vector<size_t, numElementDOFs> local_dof = this->getLocalDOFIndices();
     561            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
     562            0 :                     this->getGlobalDOFIndices(elementIndex);
     563            0 :                 Vector<indices_t, numElementDOFs> global_dof_ndindices;
     564              : 
     565            0 :                 for (size_t i = 0; i < numElementDOFs; ++i) {
     566            0 :                     global_dof_ndindices[i] = this->getMeshVertexNDIndex(global_dofs[i]);
     567              :                 }
     568              : 
     569              :                 // local DOF indices
     570              :                 size_t i, j;
     571              : 
     572              :                 // global DOF n-dimensional indices (Vector of N indices representing indices in
     573              :                 // each dimension)
     574            0 :                 indices_t I_nd, J_nd;
     575              : 
     576              :                 // 2. Compute the contribution to resultAx = A*x with A_K
     577            0 :                 for (i = 0; i < numElementDOFs; ++i) {
     578            0 :                     I_nd = global_dof_ndindices[i];
     579              : 
     580              :                     // Handle boundary DOFs
     581              :                     // If Zero Dirichlet BCs, skip this DOF
     582              :                     // If Constant Dirichlet BCs, identity
     583            0 :                     if ((bcType == CONSTANT_FACE) && (this->isDOFOnBoundary(I_nd))) {
     584            0 :                         for (unsigned d = 0; d < Dim; ++d) {
     585            0 :                             I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     586              :                         }
     587            0 :                         apply(resultView, I_nd) =  apply(view, I_nd);
     588            0 :                         continue;
     589            0 :                     } else if ((bcType == ZERO_FACE) && (this->isDOFOnBoundary(I_nd))) {
     590            0 :                         continue;
     591              :                     }
     592              : 
     593              :                     // get the appropriate index for the Kokkos view of the field
     594            0 :                     for (unsigned d = 0; d < Dim; ++d) {
     595            0 :                         I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     596              :                     }
     597              : 
     598            0 :                     for (j = 0; j < numElementDOFs; ++j) {
     599            0 :                         J_nd = global_dof_ndindices[j];
     600              : 
     601            0 :                         if (global_dofs[i] >= global_dofs[j]) {
     602            0 :                             continue;
     603              :                         }
     604              : 
     605              :                         // Skip boundary DOFs (Zero & Constant Dirichlet BCs)
     606            0 :                         if (((bcType == ZERO_FACE) || (bcType == CONSTANT_FACE)) 
     607            0 :                             && this->isDOFOnBoundary(J_nd)) {
     608            0 :                             continue;
     609              :                         }
     610              : 
     611              :                         // get the appropriate index for the Kokkos view of the field
     612            0 :                         for (unsigned d = 0; d < Dim; ++d) {
     613            0 :                             J_nd[d] = J_nd[d] - ldom[d].first() + nghost;
     614              :                         }
     615              : 
     616            0 :                         apply(resultView, I_nd) += A_K[i][j] * apply(view, J_nd);
     617              :                     }
     618              :                 }
     619            0 :             });
     620            0 :         IpplTimings::stopTimer(outer_loop);
     621              : 
     622            0 :         if (bcType == PERIODIC_FACE) {
     623            0 :             resultField.accumulateHalo();
     624            0 :             bcField.apply(resultField);
     625            0 :             bcField.assignGhostToPhysical(resultField);
     626              :         } else {
     627            0 :             resultField.accumulateHalo_noghost();
     628              :         }
     629              : 
     630            0 :         IpplTimings::stopTimer(evalAx);
     631              : 
     632            0 :         return resultField;
     633            0 :     }
     634              : 
     635              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     636              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     637              :     template <typename F>
     638            0 :     FieldLHS LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     639              :                            FieldRHS>::evaluateAx_upper(FieldLHS& field, F& evalFunction) const {
     640            0 :         Inform m("");
     641              : 
     642              :         // start a timer
     643            0 :         static IpplTimings::TimerRef evalAx = IpplTimings::getTimer("evaluateAx");
     644            0 :         IpplTimings::startTimer(evalAx);
     645              : 
     646              :         // get number of ghost cells in field
     647            0 :         const int nghost = field.getNghost();
     648              : 
     649              :         // create a new field for result with view initialized to zero (views are initialized to
     650              :         // zero by default)
     651            0 :         FieldLHS resultField(field.get_mesh(), field.getLayout(), nghost);
     652              : 
     653              :         // List of quadrature weights
     654            0 :         const Vector<T, QuadratureType::numElementNodes> w =
     655            0 :             this->quadrature_m.getWeightsForRefElement();
     656              : 
     657              :         // List of quadrature nodes
     658            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
     659            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
     660              : 
     661              :         // TODO move outside of evaluateAx (I think it is possible for other problems as well)
     662              :         // Gradients of the basis functions for the DOF at the quadrature nodes
     663            0 :         Vector<Vector<point_t, numElementDOFs>, QuadratureType::numElementNodes> grad_b_q;
     664            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     665            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
     666            0 :                 grad_b_q[k][i] = this->evaluateRefElementShapeFunctionGradient(i, q[k]);
     667              :             }
     668              :         }
     669              : 
     670              :         // Make local element matrix -- does not change through the element mesh
     671              :         // Element matrix
     672            0 :         Vector<Vector<T, numElementDOFs>, numElementDOFs> A_K;
     673              : 
     674              :         // 1. Compute the Galerkin element matrix A_K
     675            0 :         for (size_t i = 0; i < numElementDOFs; ++i) {
     676            0 :             for (size_t j = 0; j < numElementDOFs; ++j) {
     677            0 :                 A_K[i][j] = 0.0;
     678            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     679            0 :                     A_K[i][j] += w[k] * evalFunction(i, j, grad_b_q[k]);
     680              :                 }
     681              :             }
     682              :         }
     683              : 
     684              :         // Get field data and atomic result data,
     685              :         // since it will be added to during the kokkos loop
     686            0 :         ViewType view             = field.getView();
     687            0 :         AtomicViewType resultView = resultField.getView();
     688              : 
     689              :         // Get boundary conditions from field
     690            0 :         BConds<FieldLHS, Dim>& bcField = field.getFieldBC();
     691            0 :         FieldBC bcType = bcField[0]->getBCType();
     692              : 
     693              :         // Get domain information
     694            0 :         auto ldom = (field.getLayout()).getLocalNDIndex();
     695              : 
     696              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
     697              :         using policy_type = Kokkos::RangePolicy<exec_space>;
     698              : 
     699              :         // start a timer
     700            0 :         static IpplTimings::TimerRef outer_loop = IpplTimings::getTimer("evaluateAx: outer loop");
     701            0 :         IpplTimings::startTimer(outer_loop);
     702              : 
     703              :         // Loop over elements to compute contributions
     704            0 :         Kokkos::parallel_for(
     705            0 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
     706            0 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
     707            0 :                 const size_t elementIndex                            = elementIndices(index);
     708            0 :                 const Vector<size_t, numElementDOFs> local_dof = this->getLocalDOFIndices();
     709            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
     710            0 :                     this->getGlobalDOFIndices(elementIndex);
     711            0 :                 Vector<indices_t, numElementDOFs> global_dof_ndindices;
     712              : 
     713            0 :                 for (size_t i = 0; i < numElementDOFs; ++i) {
     714            0 :                     global_dof_ndindices[i] = this->getMeshVertexNDIndex(global_dofs[i]);
     715              :                 }
     716              : 
     717              :                 // local DOF indices
     718              :                 size_t i, j;
     719              : 
     720              :                 // global DOF n-dimensional indices (Vector of N indices representing indices in
     721              :                 // each dimension)
     722            0 :                 indices_t I_nd, J_nd;
     723              : 
     724              :                 // 2. Compute the contribution to resultAx = A*x with A_K
     725            0 :                 for (i = 0; i < numElementDOFs; ++i) {
     726            0 :                     I_nd = global_dof_ndindices[i];
     727              : 
     728              :                     // Handle boundary DOFs
     729              :                     // If Zero Dirichlet BCs, skip this DOF
     730              :                     // If Constant Dirichlet BCs, identity
     731            0 :                     if ((bcType == CONSTANT_FACE) && (this->isDOFOnBoundary(I_nd))) {
     732            0 :                         for (unsigned d = 0; d < Dim; ++d) {
     733            0 :                             I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     734              :                         }
     735            0 :                         apply(resultView, I_nd) =  apply(view, I_nd);
     736            0 :                         continue;
     737            0 :                     } else if ((bcType == ZERO_FACE) && (this->isDOFOnBoundary(I_nd))) {
     738            0 :                         continue;
     739              :                     }
     740              : 
     741              :                     // get the appropriate index for the Kokkos view of the field
     742            0 :                     for (unsigned d = 0; d < Dim; ++d) {
     743            0 :                         I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     744              :                     }
     745              : 
     746            0 :                     for (j = 0; j < numElementDOFs; ++j) {
     747            0 :                         J_nd = global_dof_ndindices[j];
     748              : 
     749            0 :                         if (global_dofs[i] <= global_dofs[j]) {
     750            0 :                             continue;
     751              :                         }
     752              : 
     753              :                         // Skip boundary DOFs (Zero & Constant Dirichlet BCs)
     754            0 :                         if (((bcType == ZERO_FACE) || (bcType == CONSTANT_FACE)) 
     755            0 :                             && this->isDOFOnBoundary(J_nd)) {
     756            0 :                             continue;
     757              :                         }
     758              : 
     759              :                         // get the appropriate index for the Kokkos view of the field
     760            0 :                         for (unsigned d = 0; d < Dim; ++d) {
     761            0 :                             J_nd[d] = J_nd[d] - ldom[d].first() + nghost;
     762              :                         }
     763              : 
     764            0 :                         apply(resultView, I_nd) += A_K[i][j] * apply(view, J_nd);
     765              :                     }
     766              :                 }
     767            0 :             });
     768            0 :         IpplTimings::stopTimer(outer_loop);
     769              : 
     770            0 :         if (bcType == PERIODIC_FACE) {
     771            0 :             resultField.accumulateHalo();
     772            0 :             bcField.apply(resultField);
     773            0 :             bcField.assignGhostToPhysical(resultField);
     774              :         } else {
     775            0 :             resultField.accumulateHalo_noghost();
     776              :         }
     777              : 
     778            0 :         IpplTimings::stopTimer(evalAx);
     779              : 
     780            0 :         return resultField;
     781            0 :     }
     782              : 
     783              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     784              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     785              :     template <typename F>
     786            0 :     FieldLHS LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     787              :                            FieldRHS>::evaluateAx_upperlower(FieldLHS& field, F& evalFunction) const {
     788            0 :         Inform m("");
     789              : 
     790              :         // start a timer
     791            0 :         static IpplTimings::TimerRef evalAx = IpplTimings::getTimer("evaluateAx");
     792            0 :         IpplTimings::startTimer(evalAx);
     793              : 
     794              :         // get number of ghost cells in field
     795            0 :         const int nghost = field.getNghost();
     796              : 
     797              :         // create a new field for result with view initialized to zero (views are initialized to
     798              :         // zero by default)
     799            0 :         FieldLHS resultField(field.get_mesh(), field.getLayout(), nghost);
     800              : 
     801              :         // List of quadrature weights
     802            0 :         const Vector<T, QuadratureType::numElementNodes> w =
     803            0 :             this->quadrature_m.getWeightsForRefElement();
     804              : 
     805              :         // List of quadrature nodes
     806            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
     807            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
     808              : 
     809              :         // TODO move outside of evaluateAx (I think it is possible for other problems as well)
     810              :         // Gradients of the basis functions for the DOF at the quadrature nodes
     811            0 :         Vector<Vector<point_t, numElementDOFs>, QuadratureType::numElementNodes> grad_b_q;
     812            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     813            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
     814            0 :                 grad_b_q[k][i] = this->evaluateRefElementShapeFunctionGradient(i, q[k]);
     815              :             }
     816              :         }
     817              : 
     818              :         // Make local element matrix -- does not change through the element mesh
     819              :         // Element matrix
     820            0 :         Vector<Vector<T, numElementDOFs>, numElementDOFs> A_K;
     821              : 
     822              :         // 1. Compute the Galerkin element matrix A_K
     823            0 :         for (size_t i = 0; i < numElementDOFs; ++i) {
     824            0 :             for (size_t j = 0; j < numElementDOFs; ++j) {
     825            0 :                 A_K[i][j] = 0.0;
     826            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     827            0 :                     A_K[i][j] += w[k] * evalFunction(i, j, grad_b_q[k]);
     828              :                 }
     829              :             }
     830              :         }
     831              : 
     832              :         // Get field data and atomic result data,
     833              :         // since it will be added to during the kokkos loop
     834            0 :         ViewType view             = field.getView();
     835            0 :         AtomicViewType resultView = resultField.getView();
     836              : 
     837              :         // Get boundary conditions from field
     838            0 :         BConds<FieldLHS, Dim>& bcField = field.getFieldBC();
     839            0 :         FieldBC bcType = bcField[0]->getBCType();
     840              : 
     841              :         // Get domain information
     842            0 :         auto ldom = (field.getLayout()).getLocalNDIndex();
     843              : 
     844              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
     845              :         using policy_type = Kokkos::RangePolicy<exec_space>;
     846              : 
     847              :         // start a timer
     848            0 :         static IpplTimings::TimerRef outer_loop = IpplTimings::getTimer("evaluateAx: outer loop");
     849            0 :         IpplTimings::startTimer(outer_loop);
     850              : 
     851              :         // Loop over elements to compute contributions
     852            0 :         Kokkos::parallel_for(
     853            0 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
     854            0 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
     855            0 :                 const size_t elementIndex                            = elementIndices(index);
     856            0 :                 const Vector<size_t, numElementDOFs> local_dof = this->getLocalDOFIndices();
     857            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
     858            0 :                     this->getGlobalDOFIndices(elementIndex);
     859            0 :                 Vector<indices_t, numElementDOFs> global_dof_ndindices;
     860              : 
     861            0 :                 for (size_t i = 0; i < numElementDOFs; ++i) {
     862            0 :                     global_dof_ndindices[i] = this->getMeshVertexNDIndex(global_dofs[i]);
     863              :                 }
     864              : 
     865              :                 // local DOF indices
     866              :                 size_t i, j;
     867              : 
     868              :                 // global DOF n-dimensional indices (Vector of N indices representing indices in
     869              :                 // each dimension)
     870            0 :                 indices_t I_nd, J_nd;
     871              : 
     872              :                 // 2. Compute the contribution to resultAx = A*x with A_K
     873            0 :                 for (i = 0; i < numElementDOFs; ++i) {
     874            0 :                     I_nd = global_dof_ndindices[i];
     875              : 
     876              :                     // Handle boundary DOFs
     877              :                     // If Zero Dirichlet BCs, skip this DOF
     878              :                     // If Constant Dirichlet BCs, identity
     879            0 :                     if ((bcType == CONSTANT_FACE) && (this->isDOFOnBoundary(I_nd))) {
     880            0 :                         for (unsigned d = 0; d < Dim; ++d) {
     881            0 :                             I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     882              :                         }
     883            0 :                         apply(resultView, I_nd) =  apply(view, I_nd);
     884            0 :                         continue;
     885            0 :                     } else if ((bcType == ZERO_FACE) && (this->isDOFOnBoundary(I_nd))) {
     886            0 :                         continue;
     887              :                     }
     888              : 
     889              :                     // get the appropriate index for the Kokkos view of the field
     890            0 :                     for (unsigned d = 0; d < Dim; ++d) {
     891            0 :                         I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
     892              :                     }
     893              : 
     894            0 :                     for (j = 0; j < numElementDOFs; ++j) {
     895            0 :                         J_nd = global_dof_ndindices[j];
     896              : 
     897            0 :                         if (global_dofs[i] == global_dofs[j]) {
     898            0 :                             continue;
     899              :                         }
     900              : 
     901              :                         // Skip boundary DOFs (Zero & Constant Dirichlet BCs)
     902            0 :                         if (((bcType == ZERO_FACE) || (bcType == CONSTANT_FACE)) 
     903            0 :                             && this->isDOFOnBoundary(J_nd)) {
     904            0 :                             continue;
     905              :                         }
     906              : 
     907              :                         // get the appropriate index for the Kokkos view of the field
     908            0 :                         for (unsigned d = 0; d < Dim; ++d) {
     909            0 :                             J_nd[d] = J_nd[d] - ldom[d].first() + nghost;
     910              :                         }
     911              : 
     912            0 :                         apply(resultView, I_nd) += A_K[i][j] * apply(view, J_nd);
     913              :                     }
     914              :                 }
     915            0 :             });
     916            0 :         IpplTimings::stopTimer(outer_loop);
     917              : 
     918            0 :         if (bcType == PERIODIC_FACE) {
     919            0 :             resultField.accumulateHalo();
     920            0 :             bcField.apply(resultField);
     921            0 :             bcField.assignGhostToPhysical(resultField);
     922              :         } else {
     923            0 :             resultField.accumulateHalo_noghost();
     924              :         }
     925              : 
     926            0 :         IpplTimings::stopTimer(evalAx);
     927              : 
     928            0 :         return resultField;
     929            0 :     }
     930              : 
     931              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
     932              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
     933              :     template <typename F>
     934            0 :     FieldLHS LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
     935              :                            FieldRHS>::evaluateAx_inversediag(FieldLHS& field, F& evalFunction) const {
     936            0 :         Inform m("");
     937              : 
     938              :         // start a timer
     939            0 :         static IpplTimings::TimerRef evalAx = IpplTimings::getTimer("evaluateAx");
     940            0 :         IpplTimings::startTimer(evalAx);
     941              : 
     942              :         // get number of ghost cells in field
     943            0 :         const int nghost = field.getNghost();
     944              : 
     945              :         // create a new field for result with view initialized to zero (views are initialized to
     946              :         // zero by default)
     947            0 :         FieldLHS resultField(field.get_mesh(), field.getLayout(), nghost);
     948              : 
     949              :         // List of quadrature weights
     950            0 :         const Vector<T, QuadratureType::numElementNodes> w =
     951            0 :             this->quadrature_m.getWeightsForRefElement();
     952              : 
     953              :         // List of quadrature nodes
     954            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
     955            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
     956              : 
     957              :         // TODO move outside of evaluateAx (I think it is possible for other problems as well)
     958              :         // Gradients of the basis functions for the DOF at the quadrature nodes
     959            0 :         Vector<Vector<point_t, numElementDOFs>, QuadratureType::numElementNodes> grad_b_q;
     960            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     961            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
     962            0 :                 grad_b_q[k][i] = this->evaluateRefElementShapeFunctionGradient(i, q[k]);
     963              :             }
     964              :         }
     965              : 
     966              :         // Make local element matrix -- does not change through the element mesh
     967              :         // Element matrix
     968            0 :         Vector<Vector<T, numElementDOFs>, numElementDOFs> A_K;
     969              : 
     970              :         // 1. Compute the Galerkin element matrix A_K
     971            0 :         for (size_t i = 0; i < numElementDOFs; ++i) {
     972            0 :             for (size_t j = 0; j < numElementDOFs; ++j) {
     973            0 :                 A_K[i][j] = 0.0;
     974            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
     975            0 :                     A_K[i][j] += w[k] * evalFunction(i, j, grad_b_q[k]);
     976              :                 }
     977              :             }
     978              :         }
     979              : 
     980              :         // Get field data and atomic result data,
     981              :         // since it will be added to during the kokkos loop
     982            0 :         ViewType view             = field.getView();
     983            0 :         AtomicViewType resultView = resultField.getView();
     984              : 
     985              :         // Get boundary conditions from field
     986            0 :         BConds<FieldLHS, Dim>& bcField = field.getFieldBC();
     987            0 :         FieldBC bcType = bcField[0]->getBCType();
     988              : 
     989              :         // Get domain information
     990            0 :         auto ldom = (field.getLayout()).getLocalNDIndex();
     991              : 
     992              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
     993              :         using policy_type = Kokkos::RangePolicy<exec_space>;
     994              : 
     995              :         // start a timer
     996            0 :         static IpplTimings::TimerRef outer_loop = IpplTimings::getTimer("evaluateAx: outer loop");
     997            0 :         IpplTimings::startTimer(outer_loop);
     998              : 
     999              :         // Loop over elements to compute contributions
    1000            0 :         Kokkos::parallel_for(
    1001            0 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
    1002            0 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
    1003            0 :                 const size_t elementIndex                            = elementIndices(index);
    1004            0 :                 const Vector<size_t, numElementDOFs> local_dof = this->getLocalDOFIndices();
    1005            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
    1006            0 :                     this->getGlobalDOFIndices(elementIndex);
    1007            0 :                 Vector<indices_t, numElementDOFs> global_dof_ndindices;
    1008              : 
    1009            0 :                 for (size_t i = 0; i < numElementDOFs; ++i) {
    1010            0 :                     global_dof_ndindices[i] = this->getMeshVertexNDIndex(global_dofs[i]);
    1011              :                 }
    1012              : 
    1013              :                 // local DOF indices
    1014              :                 size_t i, j;
    1015              : 
    1016              :                 // global DOF n-dimensional indices (Vector of N indices representing indices in
    1017              :                 // each dimension)
    1018            0 :                 indices_t I_nd, J_nd;
    1019              : 
    1020              :                 // 2. Compute the contribution to resultAx = A*x with A_K
    1021            0 :                 for (i = 0; i < numElementDOFs; ++i) {
    1022            0 :                     I_nd = global_dof_ndindices[i];
    1023              : 
    1024              :                     // Handle boundary DOFs
    1025              :                     // If Zero Dirichlet BCs, skip this DOF
    1026              :                     // If Constant Dirichlet BCs, identity
    1027            0 :                     if ((bcType == CONSTANT_FACE) && (this->isDOFOnBoundary(I_nd))) {
    1028            0 :                         for (unsigned d = 0; d < Dim; ++d) {
    1029            0 :                             I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
    1030              :                         }
    1031            0 :                         apply(resultView, I_nd) = 1.0;
    1032            0 :                         continue;
    1033            0 :                     } else if ((bcType == ZERO_FACE) && (this->isDOFOnBoundary(I_nd))) {
    1034            0 :                         continue;
    1035              :                     }
    1036              : 
    1037              :                     // get the appropriate index for the Kokkos view of the field
    1038            0 :                     for (unsigned d = 0; d < Dim; ++d) {
    1039            0 :                         I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
    1040              :                     }
    1041              : 
    1042            0 :                     for (j = 0; j < numElementDOFs; ++j) {
    1043            0 :                         if (global_dofs[i] == global_dofs[j]) {
    1044            0 :                             J_nd = global_dof_ndindices[j];
    1045              : 
    1046              :                             // Skip boundary DOFs (Zero & Constant Dirichlet BCs)
    1047            0 :                             if (((bcType == ZERO_FACE) || (bcType == CONSTANT_FACE)) 
    1048            0 :                                 && this->isDOFOnBoundary(J_nd)) {
    1049            0 :                                 continue;
    1050              :                             }
    1051              : 
    1052              :                             // get the appropriate index for the Kokkos view of the field
    1053            0 :                             for (unsigned d = 0; d < Dim; ++d) {
    1054            0 :                                 J_nd[d] = J_nd[d] - ldom[d].first() + nghost;
    1055              :                             }
    1056              : 
    1057              :                             // sum up all contributions of element matrix
    1058            0 :                             apply(resultView, I_nd) += A_K[i][j];
    1059              :                         }
    1060              :                     }
    1061              :                 }
    1062            0 :             });
    1063              : 
    1064            0 :         if (bcType == PERIODIC_FACE) {
    1065            0 :             resultField.accumulateHalo();
    1066            0 :             bcField.apply(resultField);
    1067            0 :             bcField.assignGhostToPhysical(resultField);
    1068              :         } else {
    1069            0 :             resultField.accumulateHalo_noghost();
    1070              :         }
    1071              : 
    1072              :         // apply the inverse diagonal after already summed all contributions from element matrices
    1073              :         using index_array_type = typename RangePolicy<Dim, exec_space>::index_array_type;
    1074            0 :         ippl::parallel_for("Loop over result view to apply inverse", field.getFieldRangePolicy(),
    1075            0 :             KOKKOS_LAMBDA(const index_array_type& args) {
    1076            0 :                 if (apply(resultView, args) != 0.0) {
    1077            0 :                     apply(resultView, args) = (1.0 / apply(resultView, args)) * apply(view, args);
    1078              :                 }
    1079              :             });
    1080            0 :         IpplTimings::stopTimer(outer_loop);
    1081              : 
    1082            0 :         IpplTimings::stopTimer(evalAx);
    1083              : 
    1084            0 :         return resultField;
    1085            0 :     }
    1086              : 
    1087              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
    1088              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
    1089              :     template <typename F>
    1090            0 :     FieldLHS LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
    1091              :                            FieldRHS>::evaluateAx_diag(FieldLHS& field, F& evalFunction) const {
    1092            0 :         Inform m("");
    1093              : 
    1094              :         // start a timer
    1095            0 :         static IpplTimings::TimerRef evalAx = IpplTimings::getTimer("evaluateAx");
    1096            0 :         IpplTimings::startTimer(evalAx);
    1097              : 
    1098              :         // get number of ghost cells in field
    1099            0 :         const int nghost = field.getNghost();
    1100              : 
    1101              :         // create a new field for result with view initialized to zero (views are initialized to
    1102              :         // zero by default)
    1103            0 :         FieldLHS resultField(field.get_mesh(), field.getLayout(), nghost);
    1104              : 
    1105              :         // List of quadrature weights
    1106            0 :         const Vector<T, QuadratureType::numElementNodes> w =
    1107            0 :             this->quadrature_m.getWeightsForRefElement();
    1108              : 
    1109              :         // List of quadrature nodes
    1110            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
    1111            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
    1112              : 
    1113              :         // TODO move outside of evaluateAx (I think it is possible for other problems as well)
    1114              :         // Gradients of the basis functions for the DOF at the quadrature nodes
    1115            0 :         Vector<Vector<point_t, numElementDOFs>, QuadratureType::numElementNodes> grad_b_q;
    1116            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1117            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
    1118            0 :                 grad_b_q[k][i] = this->evaluateRefElementShapeFunctionGradient(i, q[k]);
    1119              :             }
    1120              :         }
    1121              : 
    1122              :         // Make local element matrix -- does not change through the element mesh
    1123              :         // Element matrix
    1124            0 :         Vector<Vector<T, numElementDOFs>, numElementDOFs> A_K;
    1125              : 
    1126              :         // 1. Compute the Galerkin element matrix A_K
    1127            0 :         for (size_t i = 0; i < numElementDOFs; ++i) {
    1128            0 :             for (size_t j = 0; j < numElementDOFs; ++j) {
    1129            0 :                 A_K[i][j] = 0.0;
    1130            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1131            0 :                     A_K[i][j] += w[k] * evalFunction(i, j, grad_b_q[k]);
    1132              :                 }
    1133              :             }
    1134              :         }
    1135              : 
    1136              :         // Get field data and atomic result data,
    1137              :         // since it will be added to during the kokkos loop
    1138            0 :         ViewType view             = field.getView();
    1139            0 :         AtomicViewType resultView = resultField.getView();
    1140              : 
    1141              :         // Get boundary conditions from field
    1142            0 :         BConds<FieldLHS, Dim>& bcField = field.getFieldBC();
    1143            0 :         FieldBC bcType = bcField[0]->getBCType();
    1144              : 
    1145              :         // Get domain information
    1146            0 :         auto ldom = (field.getLayout()).getLocalNDIndex();
    1147              : 
    1148              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
    1149              :         using policy_type = Kokkos::RangePolicy<exec_space>;
    1150              : 
    1151              :         // start a timer
    1152            0 :         static IpplTimings::TimerRef outer_loop = IpplTimings::getTimer("evaluateAx: outer loop");
    1153            0 :         IpplTimings::startTimer(outer_loop);
    1154              : 
    1155              :         // Loop over elements to compute contributions
    1156            0 :         Kokkos::parallel_for(
    1157            0 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
    1158            0 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
    1159            0 :                 const size_t elementIndex                            = elementIndices(index);
    1160            0 :                 const Vector<size_t, numElementDOFs> local_dof = this->getLocalDOFIndices();
    1161            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
    1162            0 :                     this->getGlobalDOFIndices(elementIndex);
    1163            0 :                 Vector<indices_t, numElementDOFs> global_dof_ndindices;
    1164              : 
    1165            0 :                 for (size_t i = 0; i < numElementDOFs; ++i) {
    1166            0 :                     global_dof_ndindices[i] = this->getMeshVertexNDIndex(global_dofs[i]);
    1167              :                 }
    1168              : 
    1169              :                 // local DOF indices
    1170              :                 size_t i, j;
    1171              : 
    1172              :                 // global DOF n-dimensional indices (Vector of N indices representing indices in
    1173              :                 // each dimension)
    1174            0 :                 indices_t I_nd, J_nd;
    1175              : 
    1176              :                 // 2. Compute the contribution to resultAx = A*x with A_K
    1177            0 :                 for (i = 0; i < numElementDOFs; ++i) {
    1178            0 :                     I_nd = global_dof_ndindices[i];
    1179              : 
    1180              :                     // Handle boundary DOFs
    1181              :                     // If Zero Dirichlet BCs, skip this DOF
    1182              :                     // If Constant Dirichlet BCs, identity
    1183            0 :                     if ((bcType == CONSTANT_FACE) && (this->isDOFOnBoundary(I_nd))) {
    1184            0 :                         for (unsigned d = 0; d < Dim; ++d) {
    1185            0 :                             I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
    1186              :                         }
    1187            0 :                         apply(resultView, I_nd) =  apply(view, I_nd);
    1188            0 :                         continue;
    1189            0 :                     } else if ((bcType == ZERO_FACE) && (this->isDOFOnBoundary(I_nd))) {
    1190            0 :                         continue;
    1191              :                     }
    1192              : 
    1193              :                     // get the appropriate index for the Kokkos view of the field
    1194            0 :                     for (unsigned d = 0; d < Dim; ++d) {
    1195            0 :                         I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
    1196              :                     }
    1197              : 
    1198            0 :                     for (j = 0; j < numElementDOFs; ++j) {
    1199            0 :                         if (global_dofs[i] == global_dofs[j]) {
    1200            0 :                             J_nd = global_dof_ndindices[j];
    1201              : 
    1202              :                             // Skip boundary DOFs (Zero & Constant Dirichlet BCs)
    1203            0 :                             if (((bcType == ZERO_FACE) || (bcType == CONSTANT_FACE)) 
    1204            0 :                                 && this->isDOFOnBoundary(J_nd)) {
    1205            0 :                                 continue;
    1206              :                             }
    1207              : 
    1208              :                             // get the appropriate index for the Kokkos view of the field
    1209            0 :                             for (unsigned d = 0; d < Dim; ++d) {
    1210            0 :                                 J_nd[d] = J_nd[d] - ldom[d].first() + nghost;
    1211              :                             }
    1212              : 
    1213            0 :                             apply(resultView, I_nd) += A_K[i][j] * apply(view, J_nd);
    1214              :                         }
    1215              :                     }
    1216              :                 }
    1217            0 :             });
    1218            0 :         IpplTimings::stopTimer(outer_loop);
    1219              : 
    1220            0 :         if (bcType == PERIODIC_FACE) {
    1221            0 :             resultField.accumulateHalo();
    1222            0 :             bcField.apply(resultField);
    1223            0 :             bcField.assignGhostToPhysical(resultField);
    1224              :         } else {
    1225            0 :             resultField.accumulateHalo_noghost();
    1226              :         }
    1227              : 
    1228            0 :         IpplTimings::stopTimer(evalAx);
    1229              : 
    1230            0 :         return resultField;
    1231            0 :     }
    1232              : 
    1233              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
    1234              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
    1235              :     template <typename F>
    1236            0 :     FieldLHS LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
    1237              :                            FieldRHS>::evaluateAx_lift(FieldLHS& field, F& evalFunction) const {
    1238            0 :         Inform m("");
    1239              : 
    1240              :         // get number of ghost cells in field
    1241            0 :         const int nghost = field.getNghost();
    1242              : 
    1243              :         // create a new field for result with view initialized to zero (views are initialized to
    1244              :         // zero by default)
    1245            0 :         FieldLHS resultField(field.get_mesh(), field.getLayout(), nghost);
    1246              : 
    1247              :         // List of quadrature weights
    1248            0 :         const Vector<T, QuadratureType::numElementNodes> w =
    1249            0 :             this->quadrature_m.getWeightsForRefElement();
    1250              : 
    1251              :         // List of quadrature nodes
    1252            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
    1253            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
    1254              : 
    1255              :         // TODO move outside of evaluateAx (I think it is possible for other problems as well)
    1256              :         // Gradients of the basis functions for the DOF at the quadrature nodes
    1257            0 :         Vector<Vector<point_t, numElementDOFs>, QuadratureType::numElementNodes> grad_b_q;
    1258            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1259            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
    1260            0 :                 grad_b_q[k][i] = this->evaluateRefElementShapeFunctionGradient(i, q[k]);
    1261              :             }
    1262              :         }
    1263              : 
    1264              :         // Make local element matrix -- does not change through the element mesh
    1265              :         // Element matrix
    1266            0 :         Vector<Vector<T, numElementDOFs>, numElementDOFs> A_K;
    1267              : 
    1268              :         // 1. Compute the Galerkin element matrix A_K
    1269            0 :         for (size_t i = 0; i < numElementDOFs; ++i) {
    1270            0 :             for (size_t j = 0; j < numElementDOFs; ++j) {
    1271            0 :                 A_K[i][j] = 0.0;
    1272            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1273            0 :                     A_K[i][j] += w[k] * evalFunction(i, j, grad_b_q[k]);
    1274              :                 }
    1275              :             }
    1276              :         }
    1277              : 
    1278              :         // Get field data and atomic result data,
    1279              :         // since it will be added to during the kokkos loop
    1280            0 :         ViewType view             = field.getView();
    1281            0 :         AtomicViewType resultView = resultField.getView();
    1282              : 
    1283              :         // Get domain information
    1284            0 :         auto ldom = (field.getLayout()).getLocalNDIndex();
    1285              : 
    1286              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
    1287              :         using policy_type = Kokkos::RangePolicy<exec_space>;
    1288              : 
    1289              :         // Loop over elements to compute contributions
    1290            0 :         Kokkos::parallel_for(
    1291            0 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
    1292            0 :             KOKKOS_CLASS_LAMBDA(const size_t index) {
    1293            0 :                 const size_t elementIndex                            = elementIndices(index);
    1294            0 :                 const Vector<size_t, numElementDOFs> local_dof = this->getLocalDOFIndices();
    1295            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
    1296            0 :                     this->getGlobalDOFIndices(elementIndex);
    1297            0 :                 Vector<indices_t, numElementDOFs> global_dof_ndindices;
    1298              : 
    1299            0 :                 for (size_t i = 0; i < numElementDOFs; ++i) {
    1300            0 :                     global_dof_ndindices[i] = this->getMeshVertexNDIndex(global_dofs[i]);
    1301              :                 }
    1302              : 
    1303              :                 // local DOF indices (both i and j go from 0 to numDOFs-1 in the element)
    1304              :                 size_t i, j;
    1305              : 
    1306              :                 // global DOF n-dimensional indices (Vector of N indices representing indices in
    1307              :                 // each dimension)
    1308            0 :                 indices_t I_nd, J_nd;
    1309              : 
    1310              :                 // 2. Compute the contribution to resultAx = A*x with A_K
    1311            0 :                 for (i = 0; i < numElementDOFs; ++i) {
    1312            0 :                     I_nd = global_dof_ndindices[i];
    1313              : 
    1314              :                     // Skip if on a row of the matrix
    1315            0 :                     if (this->isDOFOnBoundary(I_nd)) {
    1316            0 :                         continue;
    1317              :                     }
    1318              : 
    1319              :                     // get the appropriate index for the Kokkos view of the field
    1320            0 :                     for (unsigned d = 0; d < Dim; ++d) {
    1321            0 :                         I_nd[d] = I_nd[d] - ldom[d].first() + nghost;
    1322              :                     }
    1323              : 
    1324            0 :                     for (j = 0; j < numElementDOFs; ++j) {
    1325            0 :                         J_nd = global_dof_ndindices[j];
    1326              : 
    1327              :                         // Contribute to lifting only if on a boundary DOF
    1328            0 :                         if (this->isDOFOnBoundary(J_nd)) {
    1329              :                             // get the appropriate index for the Kokkos view of the field
    1330            0 :                             for (unsigned d = 0; d < Dim; ++d) {
    1331            0 :                                 J_nd[d] = J_nd[d] - ldom[d].first() + nghost;
    1332              :                             }
    1333            0 :                             apply(resultView, I_nd) += A_K[i][j] * apply(view, J_nd);
    1334            0 :                             continue;
    1335            0 :                         }
    1336              : 
    1337              :                     }
    1338              :                 }
    1339            0 :             });
    1340            0 :         resultField.accumulateHalo();
    1341              : 
    1342            0 :         return resultField;
    1343            0 :     }
    1344              : 
    1345              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
    1346              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
    1347           20 :     void LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS,
    1348              :                        FieldRHS>::evaluateLoadVector(FieldRHS& field) const {
    1349           20 :         Inform m("");
    1350              : 
    1351              :         // start a timer
    1352           20 :         static IpplTimings::TimerRef evalLoadV = IpplTimings::getTimer("evaluateLoadVector");
    1353           20 :         IpplTimings::startTimer(evalLoadV);
    1354              : 
    1355              :         // List of quadrature weights
    1356           20 :         const Vector<T, QuadratureType::numElementNodes> w =
    1357           20 :             this->quadrature_m.getWeightsForRefElement();
    1358              : 
    1359              :         // List of quadrature nodes
    1360           20 :         const Vector<point_t, QuadratureType::numElementNodes> q =
    1361           20 :             this->quadrature_m.getIntegrationNodesForRefElement();
    1362              : 
    1363           20 :         const indices_t zeroNdIndex = Vector<size_t, Dim>(0);
    1364              : 
    1365              :         // Evaluate the basis functions for the DOF at the quadrature nodes
    1366           20 :         Vector<Vector<T, numElementDOFs>, QuadratureType::numElementNodes> basis_q;
    1367          648 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1368         5108 :             for (size_t i = 0; i < numElementDOFs; ++i) {
    1369         4480 :                 basis_q[k][i] = this->evaluateRefElementShapeFunction(i, q[k]);
    1370              :             }
    1371              :         }
    1372              : 
    1373              :         // Absolute value of det Phi_K
    1374           20 :         const T absDetDPhi = Kokkos::abs(this->ref_element_m.getDeterminantOfTransformationJacobian(
    1375           40 :             this->getElementMeshVertexPoints(zeroNdIndex)));
    1376              : 
    1377              :         // Get domain information and ghost cells
    1378           20 :         auto ldom        = (field.getLayout()).getLocalNDIndex();
    1379           20 :         const int nghost = field.getNghost();
    1380              : 
    1381              :         // Get boundary conditions from field
    1382           20 :         BConds<FieldRHS, Dim>& bcField = field.getFieldBC();
    1383           20 :         FieldBC bcType = bcField[0]->getBCType();
    1384              : 
    1385           20 :         FieldRHS temp_field(field.get_mesh(), field.getLayout(), nghost);
    1386           20 :         temp_field.setFieldBC(bcField);
    1387              : 
    1388              :         // Get field data and make it atomic,
    1389              :         // since it will be added to during the kokkos loop
    1390              :         // We work with a temporary field since we need to use field
    1391              :         // to evaluate the load vector; then we assign temp to RHS field
    1392           20 :         AtomicViewType atomic_view = temp_field.getView();
    1393              : 
    1394              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
    1395              :         using policy_type = Kokkos::RangePolicy<exec_space>;
    1396              : 
    1397              :         // start a timer
    1398           20 :         static IpplTimings::TimerRef outer_loop =
    1399           20 :             IpplTimings::getTimer("evaluateLoadVec: outer loop");
    1400           20 :         IpplTimings::startTimer(outer_loop);
    1401              : 
    1402              :         // Loop over elements to compute contributions
    1403           20 :         Kokkos::parallel_for(
    1404           40 :             "Loop over elements", policy_type(0, elementIndices.extent(0)),
    1405          344 :             KOKKOS_CLASS_LAMBDA(size_t index) {
    1406            0 :                 const size_t elementIndex                              = elementIndices(index);
    1407          304 :                 const Vector<size_t, numElementDOFs> local_dofs  = this->getLocalDOFIndices();
    1408            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
    1409          304 :                     this->getGlobalDOFIndices(elementIndex);
    1410              : 
    1411              :                 size_t i, I;
    1412              : 
    1413              :                 // 1. Compute b_K
    1414         4720 :                 for (i = 0; i < numElementDOFs; ++i) {
    1415            0 :                     I = global_dofs[i];
    1416              : 
    1417              :                     // TODO fix for higher order
    1418         2208 :                     auto dof_ndindex_I = this->getMeshVertexNDIndex(I);
    1419              : 
    1420              :                     // Skip boundary DOFs (Zero and Constant Dirichlet BCs)
    1421            0 :                     if (((bcType == ZERO_FACE) || (bcType == CONSTANT_FACE))
    1422         2208 :                         && (this->isDOFOnBoundary(dof_ndindex_I))) {
    1423            0 :                         continue;
    1424              :                     }
    1425              : 
    1426              :                     // calculate the contribution of this element
    1427            0 :                     T contrib = 0;
    1428        57264 :                     for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1429            0 :                         T val = 0;
    1430       499104 :                         for (size_t j = 0; j < numElementDOFs; ++j) {
    1431              :                             // get field index corresponding to this DOF
    1432            0 :                             size_t J           = global_dofs[j];
    1433       442800 :                             auto dof_ndindex_J = this->getMeshVertexNDIndex(J);
    1434      1763712 :                             for (unsigned d = 0; d < Dim; ++d) {
    1435            0 :                                 dof_ndindex_J[d] = dof_ndindex_J[d] - ldom[d].first() + nghost;
    1436              :                             }
    1437              : 
    1438              :                             // get field value at DOF and interpolate to q_k
    1439       442800 :                             val += basis_q[k][j] * apply(field, dof_ndindex_J);
    1440              :                         }
    1441              : 
    1442            0 :                         contrib += w[k] * basis_q[k][i] * absDetDPhi * val;
    1443              :                     }
    1444              : 
    1445              :                     // get the appropriate index for the Kokkos view of the field
    1446         3720 :                     for (unsigned d = 0; d < Dim; ++d) {
    1447            0 :                         dof_ndindex_I[d] = dof_ndindex_I[d] - ldom[d].first() + nghost;
    1448              :                     }
    1449              : 
    1450              :                     // add the contribution of the element to the field
    1451          960 :                     apply(atomic_view, dof_ndindex_I) += contrib;
    1452              : 
    1453              :                 }
    1454            0 :             });
    1455           20 :         IpplTimings::stopTimer(outer_loop);
    1456              : 
    1457           20 :         temp_field.accumulateHalo();
    1458              : 
    1459           20 :         if ((bcType == PERIODIC_FACE) || (bcType == CONSTANT_FACE)) {
    1460            0 :             bcField.apply(temp_field);
    1461            0 :             bcField.assignGhostToPhysical(temp_field);
    1462              :         }
    1463              : 
    1464           20 :         field = temp_field;
    1465              : 
    1466           20 :         IpplTimings::stopTimer(evalLoadV);
    1467           20 :     }
    1468              : 
    1469              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
    1470              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
    1471              :     template <typename F>
    1472            0 :     T LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::computeErrorL2(
    1473              :         const FieldLHS& u_h, const F& u_sol) const {
    1474            0 :         if (this->quadrature_m.getOrder() < (2 * Order + 1)) {
    1475              :             // throw exception
    1476            0 :             throw IpplException(
    1477              :                 "LagrangeSpace::computeErrorL2()",
    1478              :                 "Order of quadrature rule for error computation should be > 2*p + 1");
    1479              :         }
    1480              : 
    1481              :         // List of quadrature weights
    1482            0 :         const Vector<T, QuadratureType::numElementNodes> w =
    1483            0 :             this->quadrature_m.getWeightsForRefElement();
    1484              : 
    1485              :         // List of quadrature nodes
    1486            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
    1487            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
    1488              : 
    1489              :         // Evaluate the basis functions for the DOF at the quadrature nodes
    1490            0 :         Vector<Vector<T, numElementDOFs>, QuadratureType::numElementNodes> basis_q;
    1491            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1492            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
    1493            0 :                 basis_q[k][i] = this->evaluateRefElementShapeFunction(i, q[k]);
    1494              :             }
    1495              :         }
    1496              : 
    1497            0 :         const indices_t zeroNdIndex = Vector<size_t, Dim>(0);
    1498              : 
    1499              :         // Absolute value of det Phi_K
    1500            0 :         const T absDetDPhi = Kokkos::abs(this->ref_element_m.getDeterminantOfTransformationJacobian(
    1501            0 :             this->getElementMeshVertexPoints(zeroNdIndex)));
    1502              : 
    1503              :         // Variable to sum the error to
    1504            0 :         T error = 0;
    1505              : 
    1506              :         // Get domain information and ghost cells
    1507            0 :         auto ldom        = (u_h.getLayout()).getLocalNDIndex();
    1508            0 :         const int nghost = u_h.getNghost();
    1509              : 
    1510              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
    1511              :         using policy_type = Kokkos::RangePolicy<exec_space>;
    1512              : 
    1513              :         // Loop over elements to compute contributions
    1514            0 :         Kokkos::parallel_reduce(
    1515            0 :             "Compute error over elements", policy_type(0, elementIndices.extent(0)),
    1516            0 :             KOKKOS_CLASS_LAMBDA(size_t index, double& local) {
    1517            0 :                 const size_t elementIndex = elementIndices(index);
    1518            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
    1519            0 :                     this->getGlobalDOFIndices(elementIndex);
    1520              : 
    1521              :                 // contribution of this element to the error
    1522            0 :                 T contrib = 0;
    1523            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1524            0 :                     T val_u_sol = u_sol(this->ref_element_m.localToGlobal(
    1525            0 :                         this->getElementMeshVertexPoints(this->getElementNDIndex(elementIndex)),
    1526            0 :                         q[k]));
    1527              : 
    1528            0 :                     T val_u_h = 0;
    1529            0 :                     for (size_t i = 0; i < numElementDOFs; ++i) {
    1530              :                         // get field index corresponding to this DOF
    1531            0 :                         size_t I           = global_dofs[i];
    1532            0 :                         auto dof_ndindex_I = this->getMeshVertexNDIndex(I);
    1533            0 :                         for (unsigned d = 0; d < Dim; ++d) {
    1534            0 :                             dof_ndindex_I[d] = dof_ndindex_I[d] - ldom[d].first() + nghost;
    1535              :                         }
    1536              : 
    1537              :                         // get field value at DOF and interpolate to q_k
    1538            0 :                         val_u_h += basis_q[k][i] * apply(u_h, dof_ndindex_I);
    1539              :                     }
    1540              : 
    1541            0 :                     contrib += w[k] * Kokkos::pow(val_u_sol - val_u_h, 2) * absDetDPhi;
    1542              :                 }
    1543            0 :                 local += contrib;
    1544            0 :             },
    1545            0 :             Kokkos::Sum<double>(error));
    1546              : 
    1547              :         // MPI reduce
    1548            0 :         T global_error = 0.0;
    1549            0 :         Comm->allreduce(error, global_error, 1, std::plus<T>());
    1550              : 
    1551            0 :         return Kokkos::sqrt(global_error);
    1552            0 :     }
    1553              : 
    1554              :     template <typename T, unsigned Dim, unsigned Order, typename ElementType,
    1555              :               typename QuadratureType, typename FieldLHS, typename FieldRHS>
    1556            0 :     T LagrangeSpace<T, Dim, Order, ElementType, QuadratureType, FieldLHS, FieldRHS>::computeAvg(
    1557              :         const FieldLHS& u_h) const {
    1558            0 :         if (this->quadrature_m.getOrder() < (2 * Order + 1)) {
    1559              :             // throw exception
    1560            0 :             throw IpplException(
    1561              :                 "LagrangeSpace::computeAvg()",
    1562              :                 "Order of quadrature rule for error computation should be > 2*p + 1");
    1563              :         }
    1564              : 
    1565              :         // List of quadrature weights
    1566            0 :         const Vector<T, QuadratureType::numElementNodes> w =
    1567            0 :             this->quadrature_m.getWeightsForRefElement();
    1568              : 
    1569              :         // List of quadrature nodes
    1570            0 :         const Vector<point_t, QuadratureType::numElementNodes> q =
    1571            0 :             this->quadrature_m.getIntegrationNodesForRefElement();
    1572              : 
    1573              :         // Evaluate the basis functions for the DOF at the quadrature nodes
    1574            0 :         Vector<Vector<T, numElementDOFs>, QuadratureType::numElementNodes> basis_q;
    1575            0 :         for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1576            0 :             for (size_t i = 0; i < numElementDOFs; ++i) {
    1577            0 :                 basis_q[k][i] = this->evaluateRefElementShapeFunction(i, q[k]);
    1578              :             }
    1579              :         }
    1580              : 
    1581            0 :         const indices_t zeroNdIndex = Vector<size_t, Dim>(0);
    1582              : 
    1583              :         // Absolute value of det Phi_K
    1584            0 :         const T absDetDPhi = Kokkos::abs(this->ref_element_m.getDeterminantOfTransformationJacobian(
    1585            0 :             this->getElementMeshVertexPoints(zeroNdIndex)));
    1586              : 
    1587              :         // Variable to sum the error to
    1588            0 :         T avg = 0;
    1589              : 
    1590              :         // Get domain information and ghost cells
    1591            0 :         auto ldom        = (u_h.getLayout()).getLocalNDIndex();
    1592            0 :         const int nghost = u_h.getNghost();
    1593              : 
    1594              :         using exec_space  = typename Kokkos::View<const size_t*>::execution_space;
    1595              :         using policy_type = Kokkos::RangePolicy<exec_space>;
    1596              : 
    1597              :         // Loop over elements to compute contributions
    1598            0 :         Kokkos::parallel_reduce(
    1599            0 :             "Compute average over elements", policy_type(0, elementIndices.extent(0)),
    1600            0 :             KOKKOS_CLASS_LAMBDA(size_t index, double& local) {
    1601            0 :                 const size_t elementIndex = elementIndices(index);
    1602            0 :                 const Vector<size_t, numElementDOFs> global_dofs =
    1603            0 :                     this->getGlobalDOFIndices(elementIndex);
    1604              : 
    1605              :                 // contribution of this element to the error
    1606            0 :                 T contrib = 0;
    1607            0 :                 for (size_t k = 0; k < QuadratureType::numElementNodes; ++k) {
    1608            0 :                     T val_u_h = 0;
    1609            0 :                     for (size_t i = 0; i < numElementDOFs; ++i) {
    1610              :                         // get field index corresponding to this DOF
    1611            0 :                         size_t I           = global_dofs[i];
    1612            0 :                         auto dof_ndindex_I = this->getMeshVertexNDIndex(I);
    1613            0 :                         for (unsigned d = 0; d < Dim; ++d) {
    1614            0 :                             dof_ndindex_I[d] = dof_ndindex_I[d] - ldom[d].first() + nghost;
    1615              :                         }
    1616              : 
    1617              :                         // get field value at DOF and interpolate to q_k
    1618            0 :                         val_u_h += basis_q[k][i] * apply(u_h, dof_ndindex_I);
    1619              :                     }
    1620              : 
    1621            0 :                     contrib += w[k] * val_u_h * absDetDPhi;
    1622              :                 }
    1623            0 :                 local += contrib;
    1624            0 :             },
    1625            0 :             Kokkos::Sum<double>(avg));
    1626              : 
    1627              :         // MPI reduce
    1628            0 :         T global_avg = 0.0;
    1629            0 :         Comm->allreduce(avg, global_avg, 1, std::plus<T>());
    1630              : 
    1631            0 :         return global_avg;
    1632            0 :     }
    1633              : 
    1634              : }  // namespace ippl
        

Generated by: LCOV version 2.0-1